summaryrefslogtreecommitdiff
path: root/servers/rendering
diff options
context:
space:
mode:
authorreduz <reduzio@gmail.com>2021-01-17 13:25:38 -0300
committerJuan Linietsky <reduzio@gmail.com>2021-01-19 23:31:06 +0100
commit099dee35f47db3e293cb8e60287ffe6a44f3d5d4 (patch)
treedea148899efa156adf4c7b9ff32464871cef4253 /servers/rendering
parent7008e3c6eafa374e5d64ee7867608abe696698c2 (diff)
Added GPU based cluster builder
Clustering is now GPU based, uses an implementation based on the Activision algorithm.
Diffstat (limited to 'servers/rendering')
-rw-r--r--servers/rendering/renderer_rd/cluster_builder_rd.cpp550
-rw-r--r--servers/rendering/renderer_rd/cluster_builder_rd.h378
-rw-r--r--servers/rendering/renderer_rd/light_cluster_builder.cpp252
-rw-r--r--servers/rendering/renderer_rd/light_cluster_builder.h290
-rw-r--r--servers/rendering/renderer_rd/renderer_scene_render_forward.cpp132
-rw-r--r--servers/rendering/renderer_rd/renderer_scene_render_forward.h11
-rw-r--r--servers/rendering/renderer_rd/renderer_scene_render_rd.cpp679
-rw-r--r--servers/rendering/renderer_rd/renderer_scene_render_rd.h67
-rw-r--r--servers/rendering/renderer_rd/renderer_storage_rd.cpp1
-rw-r--r--servers/rendering/renderer_rd/renderer_storage_rd.h15
-rw-r--r--servers/rendering/renderer_rd/shaders/SCsub3
-rw-r--r--servers/rendering/renderer_rd/shaders/cluster_debug.glsl115
-rw-r--r--servers/rendering/renderer_rd/shaders/cluster_render.glsl168
-rw-r--r--servers/rendering/renderer_rd/shaders/cluster_store.glsl119
-rw-r--r--servers/rendering/renderer_rd/shaders/gi.glsl17
-rw-r--r--servers/rendering/renderer_rd/shaders/scene_forward.glsl536
-rw-r--r--servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl73
-rw-r--r--servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl141
-rw-r--r--servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl125
-rw-r--r--servers/rendering/renderer_rd/shaders/volumetric_fog.glsl246
-rw-r--r--servers/rendering/renderer_scene_cull.cpp17
-rw-r--r--servers/rendering/rendering_device.cpp5
-rw-r--r--servers/rendering/rendering_device.h5
23 files changed, 2610 insertions, 1335 deletions
diff --git a/servers/rendering/renderer_rd/cluster_builder_rd.cpp b/servers/rendering/renderer_rd/cluster_builder_rd.cpp
new file mode 100644
index 0000000000..8d9cff0f43
--- /dev/null
+++ b/servers/rendering/renderer_rd/cluster_builder_rd.cpp
@@ -0,0 +1,550 @@
+/*************************************************************************/
+/* cluster_builder_rd.cpp */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
+#include "cluster_builder_rd.h"
+#include "servers/rendering/rendering_device.h"
+#include "servers/rendering/rendering_server_globals.h"
+
+ClusterBuilderSharedDataRD::ClusterBuilderSharedDataRD() {
+ RD::VertexFormatID vertex_format;
+
+ {
+ Vector<RD::VertexAttribute> attributes;
+ {
+ RD::VertexAttribute va;
+ va.format = RD::DATA_FORMAT_R32G32B32_SFLOAT;
+ va.stride = sizeof(float) * 3;
+ attributes.push_back(va);
+ }
+ vertex_format = RD::get_singleton()->vertex_format_create(attributes);
+ }
+
+ {
+ Vector<String> versions;
+ versions.push_back("");
+ cluster_render.cluster_render_shader.initialize(versions);
+ cluster_render.shader_version = cluster_render.cluster_render_shader.version_create();
+ cluster_render.shader = cluster_render.cluster_render_shader.version_get_shader(cluster_render.shader_version, 0);
+ cluster_render.shader_pipelines[ClusterRender::PIPELINE_NORMAL] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, RD::get_singleton()->framebuffer_format_create_empty(), vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), RD::PipelineColorBlendState(), 0);
+ RD::PipelineMultisampleState ms;
+ ms.sample_count = RD::TEXTURE_SAMPLES_4;
+ cluster_render.shader_pipelines[ClusterRender::PIPELINE_MSAA] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, RD::get_singleton()->framebuffer_format_create_empty(), vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), ms, RD::PipelineDepthStencilState(), RD::PipelineColorBlendState(), 0);
+ }
+ {
+ Vector<String> versions;
+ versions.push_back("");
+ cluster_store.cluster_store_shader.initialize(versions);
+ cluster_store.shader_version = cluster_store.cluster_store_shader.version_create();
+ cluster_store.shader = cluster_store.cluster_store_shader.version_get_shader(cluster_store.shader_version, 0);
+ cluster_store.shader_pipeline = RD::get_singleton()->compute_pipeline_create(cluster_store.shader);
+ }
+ {
+ Vector<String> versions;
+ versions.push_back("");
+ cluster_debug.cluster_debug_shader.initialize(versions);
+ cluster_debug.shader_version = cluster_debug.cluster_debug_shader.version_create();
+ cluster_debug.shader = cluster_debug.cluster_debug_shader.version_get_shader(cluster_debug.shader_version, 0);
+ cluster_debug.shader_pipeline = RD::get_singleton()->compute_pipeline_create(cluster_debug.shader);
+ }
+
+ { // SPHERE
+ static const uint32_t icosphere_vertex_count = 42;
+ static const float icosphere_vertices[icosphere_vertex_count * 3] = {
+ 0, 0, -1, 0.7236073, -0.5257253, -0.4472195, -0.276388, -0.8506492, -0.4472199, -0.8944262, 0, -0.4472156, -0.276388, 0.8506492, -0.4472199, 0.7236073, 0.5257253, -0.4472195, 0.276388, -0.8506492, 0.4472199, -0.7236073, -0.5257253, 0.4472195, -0.7236073, 0.5257253, 0.4472195, 0.276388, 0.8506492, 0.4472199, 0.8944262, 0, 0.4472156, 0, 0, 1, -0.1624555, -0.4999952, -0.8506544, 0.4253227, -0.3090114, -0.8506542, 0.2628688, -0.8090116, -0.5257377, 0.8506479, 0, -0.5257359, 0.4253227, 0.3090114, -0.8506542, -0.5257298, 0, -0.8506517, -0.6881894, -0.4999969, -0.5257362, -0.1624555, 0.4999952, -0.8506544, -0.6881894, 0.4999969, -0.5257362, 0.2628688, 0.8090116, -0.5257377, 0.9510579, -0.3090126, 0, 0.9510579, 0.3090126, 0, 0, -1, 0, 0.5877856, -0.8090167, 0, -0.9510579, -0.3090126, 0, -0.5877856, -0.8090167, 0, -0.5877856, 0.8090167, 0, -0.9510579, 0.3090126, 0, 0.5877856, 0.8090167, 0, 0, 1, 0, 0.6881894, -0.4999969, 0.5257362, -0.2628688, -0.8090116, 0.5257377, -0.8506479, 0, 0.5257359, -0.2628688, 0.8090116, 0.5257377, 0.6881894, 0.4999969, 0.5257362, 0.1624555, -0.4999952, 0.8506544, 0.5257298, 0, 0.8506517, -0.4253227, -0.3090114, 0.8506542, -0.4253227, 0.3090114, 0.8506542, 0.1624555, 0.4999952, 0.8506544
+ };
+ static const uint32_t icosphere_triangle_count = 80;
+ static const uint32_t icosphere_triangle_indices[icosphere_triangle_count * 3] = {
+ 0, 13, 12, 1, 13, 15, 0, 12, 17, 0, 17, 19, 0, 19, 16, 1, 15, 22, 2, 14, 24, 3, 18, 26, 4, 20, 28, 5, 21, 30, 1, 22, 25, 2, 24, 27, 3, 26, 29, 4, 28, 31, 5, 30, 23, 6, 32, 37, 7, 33, 39, 8, 34, 40, 9, 35, 41, 10, 36, 38, 38, 41, 11, 38, 36, 41, 36, 9, 41, 41, 40, 11, 41, 35, 40, 35, 8, 40, 40, 39, 11, 40, 34, 39, 34, 7, 39, 39, 37, 11, 39, 33, 37, 33, 6, 37, 37, 38, 11, 37, 32, 38, 32, 10, 38, 23, 36, 10, 23, 30, 36, 30, 9, 36, 31, 35, 9, 31, 28, 35, 28, 8, 35, 29, 34, 8, 29, 26, 34, 26, 7, 34, 27, 33, 7, 27, 24, 33, 24, 6, 33, 25, 32, 6, 25, 22, 32, 22, 10, 32, 30, 31, 9, 30, 21, 31, 21, 4, 31, 28, 29, 8, 28, 20, 29, 20, 3, 29, 26, 27, 7, 26, 18, 27, 18, 2, 27, 24, 25, 6, 24, 14, 25, 14, 1, 25, 22, 23, 10, 22, 15, 23, 15, 5, 23, 16, 21, 5, 16, 19, 21, 19, 4, 21, 19, 20, 4, 19, 17, 20, 17, 3, 20, 17, 18, 3, 17, 12, 18, 12, 2, 18, 15, 16, 5, 15, 13, 16, 13, 0, 16, 12, 14, 2, 12, 13, 14, 13, 1, 14
+ };
+
+ Vector<uint8_t> vertex_data;
+ vertex_data.resize(sizeof(float) * icosphere_vertex_count * 3);
+ copymem(vertex_data.ptrw(), icosphere_vertices, vertex_data.size());
+
+ sphere_vertex_buffer = RD::get_singleton()->vertex_buffer_create(vertex_data.size(), vertex_data);
+
+ Vector<uint8_t> index_data;
+ index_data.resize(sizeof(uint32_t) * icosphere_triangle_count * 3);
+ copymem(index_data.ptrw(), icosphere_triangle_indices, index_data.size());
+
+ sphere_index_buffer = RD::get_singleton()->index_buffer_create(icosphere_triangle_count * 3, RD::INDEX_BUFFER_FORMAT_UINT32, index_data);
+
+ Vector<RID> buffers;
+ buffers.push_back(sphere_vertex_buffer);
+
+ sphere_vertex_array = RD::get_singleton()->vertex_array_create(icosphere_vertex_count, vertex_format, buffers);
+
+ sphere_index_array = RD::get_singleton()->index_array_create(sphere_index_buffer, 0, icosphere_triangle_count * 3);
+
+ float min_d = 1e20;
+ for (uint32_t i = 0; i < icosphere_triangle_count; i++) {
+ Vector3 vertices[3];
+ for (uint32_t j = 0; j < 3; j++) {
+ uint32_t index = icosphere_triangle_indices[i * 3 + j];
+ for (uint32_t k = 0; k < 3; k++) {
+ vertices[j][k] = icosphere_vertices[index * 3 + k];
+ }
+ }
+ Plane p(vertices[0], vertices[1], vertices[2]);
+ min_d = MIN(Math::abs(p.d), min_d);
+ }
+ sphere_overfit = 1.0 / min_d;
+ }
+
+ { // CONE
+ static const uint32_t cone_vertex_count = 99;
+ static const float cone_vertices[cone_vertex_count * 3] = {
+ 0, 1, -1, 0.1950903, 0.9807853, -1, 0.3826835, 0.9238795, -1, 0.5555703, 0.8314696, -1, 0.7071068, 0.7071068, -1, 0.8314697, 0.5555702, -1, 0.9238795, 0.3826834, -1, 0.9807853, 0.1950903, -1, 1, 0, -1, 0.9807853, -0.1950902, -1, 0.9238796, -0.3826833, -1, 0.8314697, -0.5555702, -1, 0.7071068, -0.7071068, -1, 0.5555702, -0.8314697, -1, 0.3826833, -0.9238796, -1, 0.1950901, -0.9807853, -1, -3.25841e-7, -1, -1, -0.1950907, -0.9807852, -1, -0.3826839, -0.9238793, -1, -0.5555707, -0.8314693, -1, -0.7071073, -0.7071063, -1, -0.83147, -0.5555697, -1, -0.9238799, -0.3826827, -1, 0, 0, 0, -0.9807854, -0.1950894, -1, -1, 9.65599e-7, -1, -0.9807851, 0.1950913, -1, -0.9238791, 0.3826845, -1, -0.8314689, 0.5555713, -1, -0.7071059, 0.7071077, -1, -0.5555691, 0.8314704, -1, -0.3826821, 0.9238801, -1, -0.1950888, 0.9807856, -1
+ };
+ static const uint32_t cone_triangle_count = 62;
+ static const uint32_t cone_triangle_indices[cone_triangle_count * 3] = {
+ 0, 23, 1, 1, 23, 2, 2, 23, 3, 3, 23, 4, 4, 23, 5, 5, 23, 6, 6, 23, 7, 7, 23, 8, 8, 23, 9, 9, 23, 10, 10, 23, 11, 11, 23, 12, 12, 23, 13, 13, 23, 14, 14, 23, 15, 15, 23, 16, 16, 23, 17, 17, 23, 18, 18, 23, 19, 19, 23, 20, 20, 23, 21, 21, 23, 22, 22, 23, 24, 24, 23, 25, 25, 23, 26, 26, 23, 27, 27, 23, 28, 28, 23, 29, 29, 23, 30, 30, 23, 31, 31, 23, 32, 32, 23, 0, 7, 15, 24, 32, 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 3, 6, 7, 3, 7, 8, 9, 9, 10, 7, 10, 11, 7, 11, 12, 15, 12, 13, 15, 13, 14, 15, 15, 16, 17, 17, 18, 19, 19, 20, 24, 20, 21, 24, 21, 22, 24, 24, 25, 26, 26, 27, 28, 28, 29, 30, 30, 31, 32, 32, 1, 3, 15, 17, 24, 17, 19, 24, 24, 26, 32, 26, 28, 32, 28, 30, 32, 32, 3, 7, 7, 11, 15, 32, 7, 24
+ };
+
+ Vector<uint8_t> vertex_data;
+ vertex_data.resize(sizeof(float) * cone_vertex_count * 3);
+ copymem(vertex_data.ptrw(), cone_vertices, vertex_data.size());
+
+ cone_vertex_buffer = RD::get_singleton()->vertex_buffer_create(vertex_data.size(), vertex_data);
+
+ Vector<uint8_t> index_data;
+ index_data.resize(sizeof(uint32_t) * cone_triangle_count * 3);
+ copymem(index_data.ptrw(), cone_triangle_indices, index_data.size());
+
+ cone_index_buffer = RD::get_singleton()->index_buffer_create(cone_triangle_count * 3, RD::INDEX_BUFFER_FORMAT_UINT32, index_data);
+
+ Vector<RID> buffers;
+ buffers.push_back(cone_vertex_buffer);
+
+ cone_vertex_array = RD::get_singleton()->vertex_array_create(cone_vertex_count, vertex_format, buffers);
+
+ cone_index_array = RD::get_singleton()->index_array_create(cone_index_buffer, 0, cone_triangle_count * 3);
+
+ float min_d = 1e20;
+ for (uint32_t i = 0; i < cone_triangle_count; i++) {
+ Vector3 vertices[3];
+ int32_t zero_index = -1;
+ for (uint32_t j = 0; j < 3; j++) {
+ uint32_t index = cone_triangle_indices[i * 3 + j];
+ for (uint32_t k = 0; k < 3; k++) {
+ vertices[j][k] = cone_vertices[index * 3 + k];
+ }
+ if (vertices[j] == Vector3()) {
+ zero_index = j;
+ }
+ }
+
+ if (zero_index != -1) {
+ Vector3 a = vertices[(zero_index + 1) % 3];
+ Vector3 b = vertices[(zero_index + 2) % 3];
+ Vector3 c = a + Vector3(0, 0, 1);
+ Plane p(a, b, c);
+ min_d = MIN(Math::abs(p.d), min_d);
+ }
+ }
+ cone_overfit = 1.0 / min_d;
+ }
+
+ { // BOX
+ static const uint32_t box_vertex_count = 8;
+ static const float box_vertices[box_vertex_count * 3] = {
+ -1, -1, -1, -1, -1, 1, -1, 1, -1, -1, 1, 1, 1, -1, -1, 1, -1, 1, 1, 1, -1, 1, 1, 1
+ };
+ static const uint32_t box_triangle_count = 12;
+ static const uint32_t box_triangle_indices[box_triangle_count * 3] = {
+ 1, 2, 0, 3, 6, 2, 7, 4, 6, 5, 0, 4, 6, 0, 2, 3, 5, 7, 1, 3, 2, 3, 7, 6, 7, 5, 4, 5, 1, 0, 6, 4, 0, 3, 1, 5
+ };
+
+ Vector<uint8_t> vertex_data;
+ vertex_data.resize(sizeof(float) * box_vertex_count * 3);
+ copymem(vertex_data.ptrw(), box_vertices, vertex_data.size());
+
+ box_vertex_buffer = RD::get_singleton()->vertex_buffer_create(vertex_data.size(), vertex_data);
+
+ Vector<uint8_t> index_data;
+ index_data.resize(sizeof(uint32_t) * box_triangle_count * 3);
+ copymem(index_data.ptrw(), box_triangle_indices, index_data.size());
+
+ box_index_buffer = RD::get_singleton()->index_buffer_create(box_triangle_count * 3, RD::INDEX_BUFFER_FORMAT_UINT32, index_data);
+
+ Vector<RID> buffers;
+ buffers.push_back(box_vertex_buffer);
+
+ box_vertex_array = RD::get_singleton()->vertex_array_create(box_vertex_count, vertex_format, buffers);
+
+ box_index_array = RD::get_singleton()->index_array_create(box_index_buffer, 0, box_triangle_count * 3);
+ }
+}
+ClusterBuilderSharedDataRD::~ClusterBuilderSharedDataRD() {
+ RD::get_singleton()->free(sphere_vertex_buffer);
+ RD::get_singleton()->free(sphere_index_buffer);
+ RD::get_singleton()->free(cone_vertex_buffer);
+ RD::get_singleton()->free(cone_index_buffer);
+ RD::get_singleton()->free(box_vertex_buffer);
+ RD::get_singleton()->free(box_index_buffer);
+
+ cluster_render.cluster_render_shader.version_free(cluster_render.shader_version);
+ cluster_store.cluster_store_shader.version_free(cluster_store.shader_version);
+ cluster_debug.cluster_debug_shader.version_free(cluster_debug.shader_version);
+}
+
+/////////////////////////////
+
+void ClusterBuilderRD::_clear() {
+ if (cluster_buffer.is_null()) {
+ return; //nothing to clear
+ }
+ RD::get_singleton()->free(cluster_buffer);
+ RD::get_singleton()->free(cluster_render_buffer);
+ RD::get_singleton()->free(element_buffer);
+ cluster_buffer = RID();
+ cluster_render_buffer = RID();
+ element_buffer = RID();
+
+ memfree(render_elements);
+
+ render_elements = nullptr;
+ render_element_max = 0;
+ render_element_count = 0;
+
+ RD::get_singleton()->free(framebuffer);
+ framebuffer = RID();
+
+ cluster_render_uniform_set = RID();
+ cluster_store_uniform_set = RID();
+}
+
+void ClusterBuilderRD::setup(Size2i p_screen_size, uint32_t p_max_elements, RID p_depth_buffer, RID p_depth_buffer_sampler, RID p_color_buffer) {
+ ERR_FAIL_COND(p_max_elements == 0);
+ ERR_FAIL_COND(p_screen_size.x < 1);
+ ERR_FAIL_COND(p_screen_size.y < 1);
+
+ _clear();
+
+ screen_size = p_screen_size;
+
+ cluster_screen_size.width = (p_screen_size.width - 1) / cluster_size + 1;
+ cluster_screen_size.height = (p_screen_size.height - 1) / cluster_size + 1;
+
+ max_elements_by_type = p_max_elements;
+ if (max_elements_by_type % 32) { //need to be 32 aligned
+ max_elements_by_type += 32 - (max_elements_by_type % 32);
+ }
+
+ cluster_buffer_size = cluster_screen_size.x * cluster_screen_size.y * (max_elements_by_type / 32 + 32) * ELEMENT_TYPE_MAX * 4;
+
+ render_element_max = max_elements_by_type * ELEMENT_TYPE_MAX;
+
+ uint32_t element_tag_bits_size = render_element_max / 32;
+ uint32_t element_tag_depth_bits_size = render_element_max;
+ cluster_render_buffer_size = cluster_screen_size.x * cluster_screen_size.y * (element_tag_bits_size + element_tag_depth_bits_size) * 4; // tag bits (element was used) and tag depth (depth range in which it was used)
+
+ cluster_render_buffer = RD::get_singleton()->storage_buffer_create(cluster_render_buffer_size);
+ cluster_buffer = RD::get_singleton()->storage_buffer_create(cluster_buffer_size);
+
+ render_elements = (RenderElementData *)memalloc(sizeof(RenderElementData *) * render_element_max);
+ render_element_count = 0;
+
+ element_buffer = RD::get_singleton()->storage_buffer_create(sizeof(RenderElementData) * render_element_max);
+
+ uint32_t div_value = 1 << divisor;
+ if (use_msaa) {
+ framebuffer = RD::get_singleton()->framebuffer_create_empty(p_screen_size / div_value, RD::TEXTURE_SAMPLES_4);
+ } else {
+ framebuffer = RD::get_singleton()->framebuffer_create_empty(p_screen_size / div_value);
+ }
+
+ {
+ Vector<RD::Uniform> uniforms;
+ {
+ RD::Uniform u;
+ u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER;
+ u.binding = 1;
+ u.ids.push_back(state_uniform);
+ uniforms.push_back(u);
+ }
+ {
+ RD::Uniform u;
+ u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+ u.binding = 2;
+ u.ids.push_back(element_buffer);
+ uniforms.push_back(u);
+ }
+ {
+ RD::Uniform u;
+ u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+ u.binding = 3;
+ u.ids.push_back(cluster_render_buffer);
+ uniforms.push_back(u);
+ }
+
+ cluster_render_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shared->cluster_render.shader, 0);
+ }
+
+ {
+ Vector<RD::Uniform> uniforms;
+ {
+ RD::Uniform u;
+ u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+ u.binding = 1;
+ u.ids.push_back(cluster_render_buffer);
+ uniforms.push_back(u);
+ }
+ {
+ RD::Uniform u;
+ u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+ u.binding = 2;
+ u.ids.push_back(cluster_buffer);
+ uniforms.push_back(u);
+ }
+
+ {
+ RD::Uniform u;
+ u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+ u.binding = 3;
+ u.ids.push_back(element_buffer);
+ uniforms.push_back(u);
+ }
+
+ cluster_store_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shared->cluster_store.shader, 0);
+ }
+
+ if (p_color_buffer.is_valid()) {
+ Vector<RD::Uniform> uniforms;
+ {
+ RD::Uniform u;
+ u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+ u.binding = 1;
+ u.ids.push_back(cluster_buffer);
+ uniforms.push_back(u);
+ }
+ {
+ RD::Uniform u;
+ u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
+ u.binding = 2;
+ u.ids.push_back(p_color_buffer);
+ uniforms.push_back(u);
+ }
+
+ {
+ RD::Uniform u;
+ u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
+ u.binding = 3;
+ u.ids.push_back(p_depth_buffer);
+ uniforms.push_back(u);
+ }
+ {
+ RD::Uniform u;
+ u.uniform_type = RD::UNIFORM_TYPE_SAMPLER;
+ u.binding = 4;
+ u.ids.push_back(p_depth_buffer_sampler);
+ uniforms.push_back(u);
+ }
+
+ debug_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shared->cluster_debug.shader, 0);
+ } else {
+ debug_uniform_set = RID();
+ }
+}
+
+void ClusterBuilderRD::begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection, bool p_flip_y) {
+ view_xform = p_view_transform.affine_inverse();
+ projection = p_cam_projection;
+ z_near = projection.get_z_near();
+ z_far = projection.get_z_far();
+ orthogonal = p_cam_projection.is_orthogonal();
+ adjusted_projection = projection;
+ if (!orthogonal) {
+ adjusted_projection.adjust_perspective_znear(0.0001);
+ }
+
+ CameraMatrix correction;
+ correction.set_depth_correction(p_flip_y);
+ projection = correction * projection;
+ adjusted_projection = correction * adjusted_projection;
+
+ //reset counts
+ render_element_count = 0;
+ for (uint32_t i = 0; i < ELEMENT_TYPE_MAX; i++) {
+ cluster_count_by_type[i] = 0;
+ }
+}
+
+void ClusterBuilderRD::bake_cluster() {
+ RENDER_TIMESTAMP(">Bake Cluster");
+
+ //clear cluster buffer
+ RD::get_singleton()->buffer_clear(cluster_buffer, 0, cluster_buffer_size, true);
+
+ if (render_element_count > 0) {
+ //clear render buffer
+ RD::get_singleton()->buffer_clear(cluster_render_buffer, 0, cluster_render_buffer_size, true);
+
+ { //fill state uniform
+
+ StateUniform state;
+
+ RendererStorageRD::store_camera(adjusted_projection, state.projection);
+ state.inv_z_far = 1.0 / z_far;
+ state.screen_to_clusters_shift = get_shift_from_power_of_2(cluster_size);
+ state.screen_to_clusters_shift -= divisor; //screen is smaller, shift one less
+
+ state.cluster_screen_width = cluster_screen_size.x;
+ state.cluster_depth_offset = (render_element_max / 32);
+ state.cluster_data_size = state.cluster_depth_offset + render_element_max;
+
+ RD::get_singleton()->buffer_update(state_uniform, 0, sizeof(StateUniform), &state, true);
+ }
+
+ //update instances
+
+ RD::get_singleton()->buffer_update(element_buffer, 0, sizeof(RenderElementData) * render_element_count, render_elements, true);
+
+ RENDER_TIMESTAMP("Render Elements");
+
+ //render elements
+ {
+ RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD);
+ ClusterBuilderSharedDataRD::ClusterRender::PushConstant push_constant = {};
+
+ RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, shared->cluster_render.shader_pipelines[use_msaa ? ClusterBuilderSharedDataRD::ClusterRender::PIPELINE_MSAA : ClusterBuilderSharedDataRD::ClusterRender::PIPELINE_NORMAL]);
+ RD::get_singleton()->draw_list_bind_uniform_set(draw_list, cluster_render_uniform_set, 0);
+
+ for (uint32_t i = 0; i < render_element_count;) {
+ push_constant.base_index = i;
+ switch (render_elements[i].type) {
+ case ELEMENT_TYPE_OMNI_LIGHT: {
+ RD::get_singleton()->draw_list_bind_vertex_array(draw_list, shared->sphere_vertex_array);
+ RD::get_singleton()->draw_list_bind_index_array(draw_list, shared->sphere_index_array);
+ } break;
+ case ELEMENT_TYPE_SPOT_LIGHT: {
+ RD::get_singleton()->draw_list_bind_vertex_array(draw_list, shared->cone_vertex_array);
+ RD::get_singleton()->draw_list_bind_index_array(draw_list, shared->cone_index_array);
+ } break;
+ case ELEMENT_TYPE_DECAL:
+ case ELEMENT_TYPE_REFLECTION_PROBE: {
+ RD::get_singleton()->draw_list_bind_vertex_array(draw_list, shared->box_vertex_array);
+ RD::get_singleton()->draw_list_bind_index_array(draw_list, shared->box_index_array);
+ } break;
+ }
+
+ RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterRender::PushConstant));
+
+ uint32_t instances = 1;
+#if 0
+ for (uint32_t j = i+1; j < element_count; j++) {
+ if (elements[i].type!=elements[j].type) {
+ break;
+ }
+ instances++;
+ }
+#endif
+ RD::get_singleton()->draw_list_draw(draw_list, true, instances);
+ i += instances;
+ }
+ RD::get_singleton()->draw_list_end();
+ }
+ //store elements
+ RENDER_TIMESTAMP("Pack Elements");
+
+ {
+ RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
+ RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shared->cluster_store.shader_pipeline);
+ RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cluster_store_uniform_set, 0);
+
+ ClusterBuilderSharedDataRD::ClusterStore::PushConstant push_constant;
+ push_constant.cluster_render_data_size = render_element_max / 32 + render_element_max;
+ push_constant.max_render_element_count_div_32 = render_element_max / 32;
+ push_constant.cluster_screen_size[0] = cluster_screen_size.x;
+ push_constant.cluster_screen_size[1] = cluster_screen_size.y;
+ push_constant.render_element_count_div_32 = render_element_count > 0 ? (render_element_count - 1) / 32 + 1 : 0;
+ push_constant.max_cluster_element_count_div_32 = max_elements_by_type / 32;
+ push_constant.pad1 = 0;
+ push_constant.pad2 = 0;
+
+ RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterStore::PushConstant));
+
+ RD::get_singleton()->compute_list_dispatch_threads(compute_list, cluster_screen_size.x, cluster_screen_size.y, 1, 8, 8, 1);
+
+ RD::get_singleton()->compute_list_end();
+ }
+ }
+ RENDER_TIMESTAMP("<Bake Cluster");
+}
+
+void ClusterBuilderRD::debug(ElementType p_element) {
+ ERR_FAIL_COND(debug_uniform_set.is_null());
+ RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
+ RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shared->cluster_debug.shader_pipeline);
+ RD::get_singleton()->compute_list_bind_uniform_set(compute_list, debug_uniform_set, 0);
+
+ ClusterBuilderSharedDataRD::ClusterDebug::PushConstant push_constant;
+ push_constant.screen_size[0] = screen_size.x;
+ push_constant.screen_size[1] = screen_size.y;
+ push_constant.cluster_screen_size[0] = cluster_screen_size.x;
+ push_constant.cluster_screen_size[1] = cluster_screen_size.y;
+ push_constant.cluster_shift = get_shift_from_power_of_2(cluster_size);
+ push_constant.cluster_type = p_element;
+ push_constant.orthogonal = orthogonal;
+ push_constant.z_far = z_far;
+ push_constant.z_near = z_near;
+ push_constant.max_cluster_element_count_div_32 = max_elements_by_type / 32;
+
+ RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterDebug::PushConstant));
+
+ RD::get_singleton()->compute_list_dispatch_threads(compute_list, screen_size.x, screen_size.y, 1, 8, 8, 1);
+
+ RD::get_singleton()->compute_list_end();
+}
+
+RID ClusterBuilderRD::get_cluster_buffer() const {
+ return cluster_buffer;
+}
+
+uint32_t ClusterBuilderRD::get_cluster_size() const {
+ return cluster_size;
+}
+
+uint32_t ClusterBuilderRD::get_max_cluster_elements() const {
+ return max_elements_by_type;
+}
+
+void ClusterBuilderRD::set_shared(ClusterBuilderSharedDataRD *p_shared) {
+ shared = p_shared;
+}
+
+ClusterBuilderRD::ClusterBuilderRD() {
+ state_uniform = RD::get_singleton()->uniform_buffer_create(sizeof(StateUniform));
+}
+
+ClusterBuilderRD::~ClusterBuilderRD() {
+ _clear();
+ RD::get_singleton()->free(state_uniform);
+}
diff --git a/servers/rendering/renderer_rd/cluster_builder_rd.h b/servers/rendering/renderer_rd/cluster_builder_rd.h
new file mode 100644
index 0000000000..dc1707b534
--- /dev/null
+++ b/servers/rendering/renderer_rd/cluster_builder_rd.h
@@ -0,0 +1,378 @@
+/*************************************************************************/
+/* cluster_builder_rd.h */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
+#ifndef CLUSTER_BUILDER_RD_H
+#define CLUSTER_BUILDER_RD_H
+
+#include "servers/rendering/renderer_rd/renderer_storage_rd.h"
+#include "servers/rendering/renderer_rd/shaders/cluster_debug.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/cluster_render.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/cluster_store.glsl.gen.h"
+
+class ClusterBuilderSharedDataRD {
+ friend class ClusterBuilderRD;
+
+ RID sphere_vertex_buffer;
+ RID sphere_vertex_array;
+ RID sphere_index_buffer;
+ RID sphere_index_array;
+ float sphere_overfit = 0.0; //because an icosphere is not a perfect sphere, we need to enlarge it to cover the sphere area
+
+ RID cone_vertex_buffer;
+ RID cone_vertex_array;
+ RID cone_index_buffer;
+ RID cone_index_array;
+ float cone_overfit = 0.0; //because an cone mesh is not a perfect sphere, we need to enlarge it to cover the actual cone area
+
+ RID box_vertex_buffer;
+ RID box_vertex_array;
+ RID box_index_buffer;
+ RID box_index_array;
+
+ enum Divisor {
+ DIVISOR_1,
+ DIVISOR_2,
+ DIVISOR_4,
+ };
+
+ struct ClusterRender {
+ struct PushConstant {
+ uint32_t base_index;
+ uint32_t pad0;
+ uint32_t pad1;
+ uint32_t pad2;
+ };
+
+ ClusterRenderShaderRD cluster_render_shader;
+ RID shader_version;
+ RID shader;
+ enum PipelineVersion {
+ PIPELINE_NORMAL,
+ PIPELINE_MSAA,
+ PIPELINE_MAX
+ };
+
+ RID shader_pipelines[PIPELINE_MAX];
+ } cluster_render;
+
+ struct ClusterStore {
+ struct PushConstant {
+ uint32_t cluster_render_data_size; // how much data for a single cluster takes
+ uint32_t max_render_element_count_div_32; //divided by 32
+ uint32_t cluster_screen_size[2];
+ uint32_t render_element_count_div_32; //divided by 32
+ uint32_t max_cluster_element_count_div_32; //divided by 32
+ uint32_t pad1;
+ uint32_t pad2;
+ };
+
+ ClusterStoreShaderRD cluster_store_shader;
+ RID shader_version;
+ RID shader;
+ RID shader_pipeline;
+ } cluster_store;
+
+ struct ClusterDebug {
+ struct PushConstant {
+ uint32_t screen_size[2];
+ uint32_t cluster_screen_size[2];
+
+ uint32_t cluster_shift;
+ uint32_t cluster_type;
+ float z_near;
+ float z_far;
+
+ uint32_t orthogonal;
+ uint32_t max_cluster_element_count_div_32;
+ uint32_t pad1;
+ uint32_t pad2;
+ };
+
+ ClusterDebugShaderRD cluster_debug_shader;
+ RID shader_version;
+ RID shader;
+ RID shader_pipeline;
+ } cluster_debug;
+
+public:
+ ClusterBuilderSharedDataRD();
+ ~ClusterBuilderSharedDataRD();
+};
+
+class ClusterBuilderRD {
+public:
+ enum LightType {
+ LIGHT_TYPE_OMNI,
+ LIGHT_TYPE_SPOT
+ };
+
+ enum BoxType {
+ BOX_TYPE_REFLECTION_PROBE,
+ BOX_TYPE_DECAL,
+ };
+
+ enum ElementType {
+ ELEMENT_TYPE_OMNI_LIGHT,
+ ELEMENT_TYPE_SPOT_LIGHT,
+ ELEMENT_TYPE_DECAL,
+ ELEMENT_TYPE_REFLECTION_PROBE,
+ ELEMENT_TYPE_MAX,
+
+ };
+
+private:
+ ClusterBuilderSharedDataRD *shared = nullptr;
+
+ struct RenderElementData {
+ uint32_t type; //0-4
+ uint32_t touches_near;
+ uint32_t touches_far;
+ uint32_t original_index;
+ float transform_inv[12]; //transposed transform for less space
+ float scale[3];
+ uint32_t pad;
+ };
+
+ uint32_t cluster_count_by_type[ELEMENT_TYPE_MAX] = {};
+ uint32_t max_elements_by_type = 0;
+
+ RenderElementData *render_elements = nullptr;
+ uint32_t render_element_count = 0;
+ uint32_t render_element_max = 0;
+
+ Transform view_xform;
+ CameraMatrix adjusted_projection;
+ CameraMatrix projection;
+ float z_far = 0;
+ float z_near = 0;
+ bool orthogonal = false;
+
+ enum Divisor {
+ DIVISOR_1,
+ DIVISOR_2,
+ DIVISOR_4,
+ };
+
+ uint32_t cluster_size = 32;
+ bool use_msaa = true;
+ Divisor divisor = DIVISOR_4;
+
+ Size2i screen_size;
+ Size2i cluster_screen_size;
+
+ RID framebuffer;
+ RID cluster_render_buffer; //used for creating
+ RID cluster_buffer; //used for rendering
+ RID element_buffer; //used for storing, to hint element touches far plane or near plane
+ uint32_t cluster_render_buffer_size = 0;
+ uint32_t cluster_buffer_size = 0;
+
+ RID cluster_render_uniform_set;
+ RID cluster_store_uniform_set;
+
+ //persistent data
+
+ void _clear();
+
+ struct StateUniform {
+ float projection[16];
+ float inv_z_far;
+ uint32_t screen_to_clusters_shift; // shift to obtain coordinates in block indices
+ uint32_t cluster_screen_width; //
+ uint32_t cluster_data_size; // how much data for a single cluster takes
+ uint32_t cluster_depth_offset;
+ uint32_t pad0;
+ uint32_t pad1;
+ uint32_t pad2;
+ };
+
+ RID state_uniform;
+
+ RID debug_uniform_set;
+
+public:
+ void setup(Size2i p_screen_size, uint32_t p_max_elements, RID p_depth_buffer, RID p_depth_buffer_sampler, RID p_color_buffer);
+
+ void begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection, bool p_flip_y);
+
+ _FORCE_INLINE_ void add_light(LightType p_type, const Transform &p_transform, float p_radius, float p_spot_aperture) {
+ if (p_type == LIGHT_TYPE_OMNI && cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT] == max_elements_by_type) {
+ return; //max number elements reached
+ }
+ if (p_type == LIGHT_TYPE_SPOT && cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT] == max_elements_by_type) {
+ return; //max number elements reached
+ }
+
+ RenderElementData &e = render_elements[render_element_count];
+
+ Transform xform = view_xform * p_transform;
+
+ float radius = xform.basis.get_uniform_scale();
+ if (radius > 0.98 || radius < 1.02) {
+ xform.basis.orthonormalize();
+ }
+
+ radius *= p_radius;
+
+ if (p_type == LIGHT_TYPE_OMNI) {
+ radius *= shared->sphere_overfit; // overfit icosphere
+
+ //omni
+ float depth = -xform.origin.z;
+ if (orthogonal) {
+ e.touches_near = (depth - radius) < z_near;
+ } else {
+ //contains camera inside light
+ float radius2 = radius * shared->sphere_overfit; // overfit again for outer size (camera may be outside actual sphere but behind an icosphere vertex)
+ e.touches_near = xform.origin.length_squared() < radius2 * radius2;
+ }
+
+ e.touches_far = (depth + radius) > z_far;
+ e.scale[0] = radius;
+ e.scale[1] = radius;
+ e.scale[2] = radius;
+ e.type = ELEMENT_TYPE_OMNI_LIGHT;
+ e.original_index = cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT];
+
+ RendererStorageRD::store_transform_transposed_3x4(xform, e.transform_inv);
+
+ cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT]++;
+
+ } else {
+ //spot
+ radius *= shared->cone_overfit; // overfit icosphere
+
+ real_t len = Math::tan(Math::deg2rad(p_spot_aperture)) * radius;
+ //approximate, probably better to use a cone support function
+ float max_d = -1e20;
+ float min_d = 1e20;
+#define CONE_MINMAX(m_x, m_y) \
+ { \
+ float d = -xform.xform(Vector3(len * m_x, len * m_y, -radius)).z; \
+ min_d = MIN(d, min_d); \
+ max_d = MAX(d, max_d); \
+ }
+
+ CONE_MINMAX(1, 1);
+ CONE_MINMAX(-1, 1);
+ CONE_MINMAX(-1, -1);
+ CONE_MINMAX(1, -1);
+
+ if (orthogonal) {
+ e.touches_near = min_d < z_near;
+ } else {
+ //contains camera inside light
+ Plane base_plane(xform.origin, -xform.basis.get_axis(Vector3::AXIS_Z));
+ float dist = base_plane.distance_to(Vector3());
+ if (dist >= 0 && dist < radius) {
+ //inside, check angle
+ float angle = Math::rad2deg(Math::acos((-xform.origin.normalized()).dot(-xform.basis.get_axis(Vector3::AXIS_Z))));
+ e.touches_near = angle < p_spot_aperture * 1.05; //overfit aperture a little due to cone overfit
+ } else {
+ e.touches_near = false;
+ }
+ }
+
+ e.touches_far = max_d > z_far;
+
+ e.scale[0] = len * shared->cone_overfit;
+ e.scale[1] = len * shared->cone_overfit;
+ e.scale[2] = radius;
+
+ e.type = ELEMENT_TYPE_SPOT_LIGHT;
+ e.original_index = cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]; //use omni since they share index
+
+ RendererStorageRD::store_transform_transposed_3x4(xform, e.transform_inv);
+
+ cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]++;
+ }
+
+ render_element_count++;
+ }
+
+ _FORCE_INLINE_ void add_box(BoxType p_box_type, const Transform &p_transform, const Vector3 &p_half_extents) {
+ if (p_box_type == BOX_TYPE_DECAL && cluster_count_by_type[ELEMENT_TYPE_DECAL] == max_elements_by_type) {
+ return; //max number elements reached
+ }
+ if (p_box_type == BOX_TYPE_REFLECTION_PROBE && cluster_count_by_type[ELEMENT_TYPE_REFLECTION_PROBE] == max_elements_by_type) {
+ return; //max number elements reached
+ }
+
+ RenderElementData &e = render_elements[render_element_count];
+ Transform xform = view_xform * p_transform;
+
+ //extract scale and scale the matrix by it, makes things simpler
+ Vector3 scale = p_half_extents;
+ for (uint32_t i = 0; i < 3; i++) {
+ float s = xform.basis.elements[i].length();
+ scale[i] *= s;
+ xform.basis.elements[i] /= s;
+ };
+
+ float box_depth = Math::abs(xform.basis.xform_inv(Vector3(0, 0, -1)).dot(scale));
+ float depth = -xform.origin.z;
+
+ if (orthogonal) {
+ e.touches_near = depth - box_depth < z_near;
+ } else {
+ //contains camera inside box
+ Vector3 inside = xform.xform_inv(Vector3(0, 0, 0)).abs();
+ e.touches_near = inside.x < scale.x && inside.y < scale.y && inside.z < scale.z;
+ }
+
+ e.touches_far = depth + box_depth > z_far;
+
+ e.scale[0] = scale.x;
+ e.scale[1] = scale.y;
+ e.scale[2] = scale.z;
+
+ e.type = (p_box_type == BOX_TYPE_DECAL) ? ELEMENT_TYPE_DECAL : ELEMENT_TYPE_REFLECTION_PROBE;
+ e.original_index = cluster_count_by_type[e.type];
+
+ RendererStorageRD::store_transform_transposed_3x4(xform, e.transform_inv);
+
+ cluster_count_by_type[e.type]++;
+ render_element_count++;
+ }
+
+ void bake_cluster();
+ void debug(ElementType p_element);
+
+ RID get_cluster_buffer() const;
+ uint32_t get_cluster_size() const;
+ uint32_t get_max_cluster_elements() const;
+
+ void set_shared(ClusterBuilderSharedDataRD *p_shared);
+
+ ClusterBuilderRD();
+ ~ClusterBuilderRD();
+};
+
+#endif // CLUSTER_BUILDER_H
diff --git a/servers/rendering/renderer_rd/light_cluster_builder.cpp b/servers/rendering/renderer_rd/light_cluster_builder.cpp
deleted file mode 100644
index bb807ca4ca..0000000000
--- a/servers/rendering/renderer_rd/light_cluster_builder.cpp
+++ /dev/null
@@ -1,252 +0,0 @@
-/*************************************************************************/
-/* light_cluster_builder.cpp */
-/*************************************************************************/
-/* This file is part of: */
-/* GODOT ENGINE */
-/* https://godotengine.org */
-/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
-/* */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the */
-/* "Software"), to deal in the Software without restriction, including */
-/* without limitation the rights to use, copy, modify, merge, publish, */
-/* distribute, sublicense, and/or sell copies of the Software, and to */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions: */
-/* */
-/* The above copyright notice and this permission notice shall be */
-/* included in all copies or substantial portions of the Software. */
-/* */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
-/*************************************************************************/
-
-#include "light_cluster_builder.h"
-
-void LightClusterBuilder::begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection) {
- view_xform = p_view_transform;
- projection = p_cam_projection;
- z_near = -projection.get_z_near();
- z_far = -projection.get_z_far();
-
- //reset counts
- light_count = 0;
- refprobe_count = 0;
- decal_count = 0;
- item_count = 0;
- sort_id_count = 0;
-}
-
-void LightClusterBuilder::bake_cluster() {
- float slice_depth = (z_near - z_far) / depth;
-
- uint8_t *cluster_dataw = cluster_data.ptrw();
- Cell *cluster_data_ptr = (Cell *)cluster_dataw;
- //clear the cluster
- zeromem(cluster_data_ptr, (width * height * depth * sizeof(Cell)));
-
- /* Step 1, create cell positions and count them */
-
- for (uint32_t i = 0; i < item_count; i++) {
- const Item &item = items[i];
-
- int from_slice = Math::floor((z_near - (item.aabb.position.z + item.aabb.size.z)) / slice_depth);
- int to_slice = Math::floor((z_near - item.aabb.position.z) / slice_depth);
-
- if (from_slice >= (int)depth || to_slice < 0) {
- continue; //sorry no go
- }
-
- from_slice = MAX(0, from_slice);
- to_slice = MIN((int)depth - 1, to_slice);
-
- for (int j = from_slice; j <= to_slice; j++) {
- Vector3 min = item.aabb.position;
- Vector3 max = item.aabb.position + item.aabb.size;
-
- float limit_near = MIN((z_near - slice_depth * j), max.z);
- float limit_far = MAX((z_near - slice_depth * (j + 1)), min.z);
-
- max.z = limit_near;
- min.z = limit_near;
-
- Vector3 proj_min = projection.xform(min);
- Vector3 proj_max = projection.xform(max);
-
- int near_from_x = int(Math::floor((proj_min.x * 0.5 + 0.5) * width));
- int near_from_y = int(Math::floor((-proj_max.y * 0.5 + 0.5) * height));
- int near_to_x = int(Math::floor((proj_max.x * 0.5 + 0.5) * width));
- int near_to_y = int(Math::floor((-proj_min.y * 0.5 + 0.5) * height));
-
- max.z = limit_far;
- min.z = limit_far;
-
- proj_min = projection.xform(min);
- proj_max = projection.xform(max);
-
- int far_from_x = int(Math::floor((proj_min.x * 0.5 + 0.5) * width));
- int far_from_y = int(Math::floor((-proj_max.y * 0.5 + 0.5) * height));
- int far_to_x = int(Math::floor((proj_max.x * 0.5 + 0.5) * width));
- int far_to_y = int(Math::floor((-proj_min.y * 0.5 + 0.5) * height));
-
- //print_line(itos(j) + " near - " + Vector2i(near_from_x, near_from_y) + " -> " + Vector2i(near_to_x, near_to_y));
- //print_line(itos(j) + " far - " + Vector2i(far_from_x, far_from_y) + " -> " + Vector2i(far_to_x, far_to_y));
-
- int from_x = MIN(near_from_x, far_from_x);
- int from_y = MIN(near_from_y, far_from_y);
- int to_x = MAX(near_to_x, far_to_x);
- int to_y = MAX(near_to_y, far_to_y);
-
- if (from_x >= (int)width || to_x < 0 || from_y >= (int)height || to_y < 0) {
- continue;
- }
-
- int sx = MAX(0, from_x);
- int sy = MAX(0, from_y);
- int dx = MIN((int)width - 1, to_x);
- int dy = MIN((int)height - 1, to_y);
-
- //print_line(itos(j) + " - " + Vector2i(sx, sy) + " -> " + Vector2i(dx, dy));
-
- for (int x = sx; x <= dx; x++) {
- for (int y = sy; y <= dy; y++) {
- uint32_t offset = j * (width * height) + y * width + x;
-
- if (unlikely(sort_id_count == sort_id_max)) {
- sort_id_max = nearest_power_of_2_templated(sort_id_max + 1);
- sort_ids = (SortID *)memrealloc(sort_ids, sizeof(SortID) * sort_id_max);
- if (ids.size()) {
- ids.resize(sort_id_max);
- RD::get_singleton()->free(items_buffer);
- items_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * sort_id_max);
- }
- }
-
- sort_ids[sort_id_count].cell_index = offset;
- sort_ids[sort_id_count].item_index = item.index;
- sort_ids[sort_id_count].item_type = item.type;
-
- sort_id_count++;
-
- //for now, only count
- cluster_data_ptr[offset].item_pointers[item.type]++;
- //print_line("at offset " + itos(offset) + " value: " + itos(cluster_data_ptr[offset].item_pointers[item.type]));
- }
- }
- }
- }
-
- /* Step 2, Assign pointers (and reset counters) */
-
- uint32_t offset = 0;
- for (uint32_t i = 0; i < (width * height * depth); i++) {
- for (int j = 0; j < ITEM_TYPE_MAX; j++) {
- uint32_t count = cluster_data_ptr[i].item_pointers[j]; //save count
- cluster_data_ptr[i].item_pointers[j] = offset; //replace count by pointer
- offset += count; //increase offset by count;
- }
- }
-
- //print_line("offset: " + itos(offset));
- /* Step 3, Place item lists */
-
- uint32_t *ids_ptr = ids.ptrw();
-
- for (uint32_t i = 0; i < sort_id_count; i++) {
- const SortID &id = sort_ids[i];
- Cell &cell = cluster_data_ptr[id.cell_index];
- uint32_t pointer = cell.item_pointers[id.item_type] & POINTER_MASK;
- uint32_t counter = cell.item_pointers[id.item_type] >> COUNTER_SHIFT;
- ids_ptr[pointer + counter] = id.item_index;
-
- cell.item_pointers[id.item_type] = pointer | ((counter + 1) << COUNTER_SHIFT);
- }
-
- RD::get_singleton()->texture_update(cluster_texture, 0, cluster_data, true);
- RD::get_singleton()->buffer_update(items_buffer, 0, offset * sizeof(uint32_t), ids_ptr, true);
-}
-
-void LightClusterBuilder::setup(uint32_t p_width, uint32_t p_height, uint32_t p_depth) {
- if (width == p_width && height == p_height && depth == p_depth) {
- return;
- }
- if (cluster_texture.is_valid()) {
- RD::get_singleton()->free(cluster_texture);
- }
-
- width = p_width;
- height = p_height;
- depth = p_depth;
-
- cluster_data.resize(width * height * depth * sizeof(Cell));
-
- {
- RD::TextureFormat tf;
- tf.format = RD::DATA_FORMAT_R32G32B32A32_UINT;
- tf.texture_type = RD::TEXTURE_TYPE_3D;
- tf.width = width;
- tf.height = height;
- tf.depth = depth;
- tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;
-
- cluster_texture = RD::get_singleton()->texture_create(tf, RD::TextureView());
- }
-}
-
-RID LightClusterBuilder::get_cluster_texture() const {
- return cluster_texture;
-}
-
-RID LightClusterBuilder::get_cluster_indices_buffer() const {
- return items_buffer;
-}
-
-LightClusterBuilder::LightClusterBuilder() {
- //initialize accumulators to something
- lights = (LightData *)memalloc(sizeof(LightData) * 1024);
- light_max = 1024;
-
- refprobes = (OrientedBoxData *)memalloc(sizeof(OrientedBoxData) * 1024);
- refprobe_max = 1024;
-
- decals = (OrientedBoxData *)memalloc(sizeof(OrientedBoxData) * 1024);
- decal_max = 1024;
-
- items = (Item *)memalloc(sizeof(Item) * 1024);
- item_max = 1024;
-
- sort_ids = (SortID *)memalloc(sizeof(SortID) * 1024);
- ids.resize(2014);
- items_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 1024);
- item_max = 1024;
-}
-
-LightClusterBuilder::~LightClusterBuilder() {
- if (cluster_data.size()) {
- RD::get_singleton()->free(cluster_texture);
- }
-
- if (lights) {
- memfree(lights);
- }
- if (refprobes) {
- memfree(refprobes);
- }
- if (decals) {
- memfree(decals);
- }
- if (items) {
- memfree(items);
- }
- if (sort_ids) {
- memfree(sort_ids);
- RD::get_singleton()->free(items_buffer);
- }
-}
diff --git a/servers/rendering/renderer_rd/light_cluster_builder.h b/servers/rendering/renderer_rd/light_cluster_builder.h
deleted file mode 100644
index 8f77ece6f5..0000000000
--- a/servers/rendering/renderer_rd/light_cluster_builder.h
+++ /dev/null
@@ -1,290 +0,0 @@
-/*************************************************************************/
-/* light_cluster_builder.h */
-/*************************************************************************/
-/* This file is part of: */
-/* GODOT ENGINE */
-/* https://godotengine.org */
-/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
-/* */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the */
-/* "Software"), to deal in the Software without restriction, including */
-/* without limitation the rights to use, copy, modify, merge, publish, */
-/* distribute, sublicense, and/or sell copies of the Software, and to */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions: */
-/* */
-/* The above copyright notice and this permission notice shall be */
-/* included in all copies or substantial portions of the Software. */
-/* */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
-/*************************************************************************/
-
-#ifndef LIGHT_CLUSTER_BUILDER_H
-#define LIGHT_CLUSTER_BUILDER_H
-
-#include "servers/rendering/renderer_rd/renderer_storage_rd.h"
-
-class LightClusterBuilder {
-public:
- enum LightType {
- LIGHT_TYPE_OMNI,
- LIGHT_TYPE_SPOT
- };
-
- enum ItemType {
- ITEM_TYPE_OMNI_LIGHT,
- ITEM_TYPE_SPOT_LIGHT,
- ITEM_TYPE_REFLECTION_PROBE,
- ITEM_TYPE_DECAL,
- ITEM_TYPE_MAX //should always be 4
- };
-
- enum {
- COUNTER_SHIFT = 20, //one million total ids
- POINTER_MASK = (1 << COUNTER_SHIFT) - 1,
- COUNTER_MASK = 0xfff // 4096 items per cell
- };
-
-private:
- struct LightData {
- float position[3];
- uint32_t type;
- float radius;
- float spot_aperture;
- uint32_t pad[2];
- };
-
- uint32_t light_count = 0;
- uint32_t light_max = 0;
- LightData *lights = nullptr;
-
- struct OrientedBoxData {
- float position[3];
- uint32_t pad;
- float x_axis[3];
- uint32_t pad2;
- float y_axis[3];
- uint32_t pad3;
- float z_axis[3];
- uint32_t pad4;
- };
-
- uint32_t refprobe_count = 0;
- uint32_t refprobe_max = 0;
- OrientedBoxData *refprobes = nullptr;
-
- uint32_t decal_count = 0;
- uint32_t decal_max = 0;
- OrientedBoxData *decals = nullptr;
-
- struct Item {
- AABB aabb;
- ItemType type;
- uint32_t index;
- };
-
- Item *items = nullptr;
- uint32_t item_count = 0;
- uint32_t item_max = 0;
-
- uint32_t width = 0;
- uint32_t height = 0;
- uint32_t depth = 0;
-
- struct Cell {
- uint32_t item_pointers[ITEM_TYPE_MAX];
- };
-
- Vector<uint8_t> cluster_data;
- RID cluster_texture;
-
- struct SortID {
- uint32_t cell_index;
- uint32_t item_index;
- ItemType item_type;
- };
-
- SortID *sort_ids = nullptr;
- Vector<uint32_t> ids;
- uint32_t sort_id_count = 0;
- uint32_t sort_id_max = 0;
- RID items_buffer;
-
- Transform view_xform;
- CameraMatrix projection;
- float z_far = 0;
- float z_near = 0;
-
- _FORCE_INLINE_ void _add_item(const AABB &p_aabb, ItemType p_type, uint32_t p_index) {
- if (unlikely(item_count == item_max)) {
- item_max = nearest_power_of_2_templated(item_max + 1);
- items = (Item *)memrealloc(items, sizeof(Item) * item_max);
- }
-
- Item &item = items[item_count];
- item.aabb = p_aabb;
- item.index = p_index;
- item.type = p_type;
- item_count++;
- }
-
-public:
- void begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection);
-
- _FORCE_INLINE_ void add_light(LightType p_type, const Transform &p_transform, float p_radius, float p_spot_aperture) {
- if (unlikely(light_count == light_max)) {
- light_max = nearest_power_of_2_templated(light_max + 1);
- lights = (LightData *)memrealloc(lights, sizeof(LightData) * light_max);
- }
-
- LightData &ld = lights[light_count];
- ld.type = p_type;
- ld.position[0] = p_transform.origin.x;
- ld.position[1] = p_transform.origin.y;
- ld.position[2] = p_transform.origin.z;
- ld.radius = p_radius;
- ld.spot_aperture = p_spot_aperture;
-
- Transform xform = view_xform * p_transform;
-
- ld.radius *= xform.basis.get_uniform_scale();
-
- AABB aabb;
-
- switch (p_type) {
- case LIGHT_TYPE_OMNI: {
- aabb.position = xform.origin;
- aabb.size = Vector3(ld.radius, ld.radius, ld.radius);
- aabb.position -= aabb.size;
- aabb.size *= 2.0;
-
- _add_item(aabb, ITEM_TYPE_OMNI_LIGHT, light_count);
- } break;
- case LIGHT_TYPE_SPOT: {
- float r = ld.radius;
- real_t len = Math::tan(Math::deg2rad(ld.spot_aperture)) * r;
-
- aabb.position = xform.origin;
- aabb.expand_to(xform.xform(Vector3(len, len, -r)));
- aabb.expand_to(xform.xform(Vector3(-len, len, -r)));
- aabb.expand_to(xform.xform(Vector3(-len, -len, -r)));
- aabb.expand_to(xform.xform(Vector3(len, -len, -r)));
- _add_item(aabb, ITEM_TYPE_SPOT_LIGHT, light_count);
- } break;
- }
-
- light_count++;
- }
-
- _FORCE_INLINE_ void add_reflection_probe(const Transform &p_transform, const Vector3 &p_half_extents) {
- if (unlikely(refprobe_count == refprobe_max)) {
- refprobe_max = nearest_power_of_2_templated(refprobe_max + 1);
- refprobes = (OrientedBoxData *)memrealloc(refprobes, sizeof(OrientedBoxData) * refprobe_max);
- }
-
- Transform xform = view_xform * p_transform;
-
- OrientedBoxData &rp = refprobes[refprobe_count];
- Vector3 origin = xform.origin;
- rp.position[0] = origin.x;
- rp.position[1] = origin.y;
- rp.position[2] = origin.z;
-
- Vector3 x_axis = xform.basis.get_axis(0) * p_half_extents.x;
- rp.x_axis[0] = x_axis.x;
- rp.x_axis[1] = x_axis.y;
- rp.x_axis[2] = x_axis.z;
-
- Vector3 y_axis = xform.basis.get_axis(1) * p_half_extents.y;
- rp.y_axis[0] = y_axis.x;
- rp.y_axis[1] = y_axis.y;
- rp.y_axis[2] = y_axis.z;
-
- Vector3 z_axis = xform.basis.get_axis(2) * p_half_extents.z;
- rp.z_axis[0] = z_axis.x;
- rp.z_axis[1] = z_axis.y;
- rp.z_axis[2] = z_axis.z;
-
- AABB aabb;
-
- aabb.position = origin + x_axis + y_axis + z_axis;
- aabb.expand_to(origin + x_axis + y_axis - z_axis);
- aabb.expand_to(origin + x_axis - y_axis + z_axis);
- aabb.expand_to(origin + x_axis - y_axis - z_axis);
- aabb.expand_to(origin - x_axis + y_axis + z_axis);
- aabb.expand_to(origin - x_axis + y_axis - z_axis);
- aabb.expand_to(origin - x_axis - y_axis + z_axis);
- aabb.expand_to(origin - x_axis - y_axis - z_axis);
-
- _add_item(aabb, ITEM_TYPE_REFLECTION_PROBE, refprobe_count);
-
- refprobe_count++;
- }
-
- _FORCE_INLINE_ void add_decal(const Transform &p_transform, const Vector3 &p_half_extents) {
- if (unlikely(decal_count == decal_max)) {
- decal_max = nearest_power_of_2_templated(decal_max + 1);
- decals = (OrientedBoxData *)memrealloc(decals, sizeof(OrientedBoxData) * decal_max);
- }
-
- Transform xform = view_xform * p_transform;
-
- OrientedBoxData &dc = decals[decal_count];
-
- Vector3 origin = xform.origin;
- dc.position[0] = origin.x;
- dc.position[1] = origin.y;
- dc.position[2] = origin.z;
-
- Vector3 x_axis = xform.basis.get_axis(0) * p_half_extents.x;
- dc.x_axis[0] = x_axis.x;
- dc.x_axis[1] = x_axis.y;
- dc.x_axis[2] = x_axis.z;
-
- Vector3 y_axis = xform.basis.get_axis(1) * p_half_extents.y;
- dc.y_axis[0] = y_axis.x;
- dc.y_axis[1] = y_axis.y;
- dc.y_axis[2] = y_axis.z;
-
- Vector3 z_axis = xform.basis.get_axis(2) * p_half_extents.z;
- dc.z_axis[0] = z_axis.x;
- dc.z_axis[1] = z_axis.y;
- dc.z_axis[2] = z_axis.z;
-
- AABB aabb;
-
- aabb.position = origin + x_axis + y_axis + z_axis;
- aabb.expand_to(origin + x_axis + y_axis - z_axis);
- aabb.expand_to(origin + x_axis - y_axis + z_axis);
- aabb.expand_to(origin + x_axis - y_axis - z_axis);
- aabb.expand_to(origin - x_axis + y_axis + z_axis);
- aabb.expand_to(origin - x_axis + y_axis - z_axis);
- aabb.expand_to(origin - x_axis - y_axis + z_axis);
- aabb.expand_to(origin - x_axis - y_axis - z_axis);
-
- _add_item(aabb, ITEM_TYPE_DECAL, decal_count);
-
- decal_count++;
- }
-
- void bake_cluster();
-
- void setup(uint32_t p_width, uint32_t p_height, uint32_t p_depth);
-
- RID get_cluster_texture() const;
- RID get_cluster_indices_buffer() const;
-
- LightClusterBuilder();
- ~LightClusterBuilder();
-};
-
-#endif // LIGHT_CLUSTER_BUILDER_H
diff --git a/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp b/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp
index 74556f8105..f3b09399f9 100644
--- a/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp
+++ b/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp
@@ -1071,7 +1071,7 @@ void RendererSceneRenderForward::_render_list_with_threads(RenderListParameters
}
}
-void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2 &p_screen_pixel_size, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers, bool p_pancake_shadows) {
+void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers, bool p_pancake_shadows) {
//CameraMatrix projection = p_cam_projection;
//projection.flip_y(); // Vulkan and modern APIs use Y-Down
CameraMatrix correction;
@@ -1099,8 +1099,18 @@ void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_ren
scene_state.ubo.penumbra_shadow_samples = penumbra_shadow_samples_get();
scene_state.ubo.soft_shadow_samples = soft_shadow_samples_get();
- scene_state.ubo.screen_pixel_size[0] = p_screen_pixel_size.x;
- scene_state.ubo.screen_pixel_size[1] = p_screen_pixel_size.y;
+ Size2 screen_pixel_size = Vector2(1.0, 1.0) / Size2(p_screen_size);
+ scene_state.ubo.screen_pixel_size[0] = screen_pixel_size.x;
+ scene_state.ubo.screen_pixel_size[1] = screen_pixel_size.y;
+
+ scene_state.ubo.cluster_shift = get_shift_from_power_of_2(p_cluster_size);
+ scene_state.ubo.max_cluster_element_count_div_32 = p_max_cluster_elements / 32;
+ {
+ uint32_t cluster_screen_width = (p_screen_size.width - 1) / p_cluster_size + 1;
+ uint32_t cluster_screen_height = (p_screen_size.height - 1) / p_cluster_size + 1;
+ scene_state.ubo.cluster_type_size = cluster_screen_width * cluster_screen_height * (scene_state.ubo.max_cluster_element_count_div_32 + 32);
+ scene_state.ubo.cluster_width = cluster_screen_width;
+ }
if (p_shadow_atlas.is_valid()) {
Vector2 sas = shadow_atlas_get_size(p_shadow_atlas);
@@ -1489,7 +1499,7 @@ void RendererSceneRenderForward::_setup_lightmaps(const PagedArray<RID> &p_light
}
}
-void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_screen_lod_threshold) {
+void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_screen_lod_threshold) {
RenderBufferDataForward *render_buffer = nullptr;
if (p_render_buffer.is_valid()) {
render_buffer = (RenderBufferDataForward *)render_buffers_get_data(p_render_buffer);
@@ -1522,7 +1532,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
scene_state.ubo.viewport_size[1] = vp_he.y;
scene_state.ubo.directional_light_count = p_directional_light_count;
- Size2 screen_pixel_size;
Size2i screen_size;
RID opaque_framebuffer;
RID opaque_specular_framebuffer;
@@ -1537,8 +1546,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
bool using_giprobe = false;
if (render_buffer) {
- screen_pixel_size.width = 1.0 / render_buffer->width;
- screen_pixel_size.height = 1.0 / render_buffer->height;
screen_size.x = render_buffer->width;
screen_size.y = render_buffer->height;
@@ -1595,8 +1602,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
alpha_framebuffer = opaque_framebuffer;
} else if (p_reflection_probe.is_valid()) {
uint32_t resolution = reflection_probe_instance_get_resolution(p_reflection_probe);
- screen_pixel_size.width = 1.0 / resolution;
- screen_pixel_size.height = 1.0 / resolution;
screen_size.x = resolution;
screen_size.y = resolution;
@@ -1613,7 +1618,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
_setup_lightmaps(p_lightmaps, p_cam_transform);
_setup_giprobes(p_gi_probes);
- _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false);
+ _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false);
_update_render_base_uniform_set(); //may have changed due to the above (light buffer enlarged, as an example)
@@ -1703,7 +1708,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
bool debug_giprobes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_ALBEDO || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_LIGHTING || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_EMISSION;
bool debug_sdfgi_probes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_SDFGI_PROBES;
-
bool depth_pre_pass = !low_end && depth_framebuffer.is_valid();
bool using_ssao = depth_pre_pass && p_render_buffer.is_valid() && p_environment.is_valid() && environment_is_ssao_enabled(p_environment);
@@ -1711,7 +1715,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
if (depth_pre_pass) { //depth pre pass
RENDER_TIMESTAMP("Render Depth Pre-Pass");
- RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
+ RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
bool finish_depth = using_ssao || using_sdfgi || using_giprobe;
RenderListParameters render_list_params(render_list.elements, render_list.element_count, false, depth_pass_mode, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold);
@@ -1738,11 +1742,11 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
_process_gi(p_render_buffer, render_buffer->normal_roughness_buffer, render_buffer->ambient_buffer, render_buffer->reflection_buffer, render_buffer->giprobe_buffer, p_environment, p_cam_projection, p_cam_transform, p_gi_probes);
}
- _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), p_render_buffer.is_valid());
+ _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), p_render_buffer.is_valid());
RENDER_TIMESTAMP("Render Opaque Pass");
- RID rp_uniform_set = _setup_render_pass_uniform_set(p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_gi_probes, p_lightmaps);
+ RID rp_uniform_set = _setup_render_pass_uniform_set(p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_cluster_buffer, p_gi_probes, p_lightmaps);
bool can_continue_color = !scene_state.used_screen_texture && !using_ssr && !using_sss;
bool can_continue_depth = !scene_state.used_depth_texture && !using_ssr && !using_sss;
@@ -1844,7 +1848,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
RENDER_TIMESTAMP("Render Transparent Pass");
- _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false);
+ _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false);
render_list.sort_by_reverse_depth_and_priority(true);
@@ -1867,7 +1871,7 @@ void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, const PagedAr
scene_state.ubo.dual_paraboloid_side = p_use_dp_flip ? -1 : 1;
- _setup_environment(RID(), RID(), p_projection, p_transform, RID(), true, Vector2(1, 1), RID(), true, Color(), 0, p_zfar, false, p_use_pancake);
+ _setup_environment(RID(), RID(), p_projection, p_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), true, Color(), 0, p_zfar, false, p_use_pancake);
if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_DISABLE_LOD) {
p_screen_lod_threshold = 0.0;
@@ -1877,7 +1881,7 @@ void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, const PagedAr
_fill_render_list(p_instances, pass_mode, p_projection, p_transform);
- RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
+ RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
RENDER_TIMESTAMP("Render Shadow");
@@ -1899,13 +1903,13 @@ void RendererSceneRenderForward::_render_particle_collider_heightfield(RID p_fb,
scene_state.ubo.dual_paraboloid_side = 0;
- _setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), RID(), true, Color(), 0, p_cam_projection.get_z_far(), false, false);
+ _setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), true, Color(), 0, p_cam_projection.get_z_far(), false, false);
PassMode pass_mode = PASS_MODE_SHADOW;
_fill_render_list(p_instances, pass_mode, p_cam_projection, p_cam_transform);
- RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
+ RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
RENDER_TIMESTAMP("Render Collider Heightield");
@@ -1928,12 +1932,12 @@ void RendererSceneRenderForward::_render_material(const Transform &p_cam_transfo
scene_state.ubo.dual_paraboloid_side = 0;
scene_state.ubo.material_uv2_mode = true;
- _setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), RID(), false, Color(), 0, 0);
+ _setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0);
PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL;
_fill_render_list(p_instances, pass_mode, p_cam_projection, p_cam_transform);
- RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
+ RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
RENDER_TIMESTAMP("Render Material");
@@ -1964,12 +1968,12 @@ void RendererSceneRenderForward::_render_uv2(const PagedArray<GeometryInstance *
scene_state.ubo.dual_paraboloid_side = 0;
scene_state.ubo.material_uv2_mode = true;
- _setup_environment(RID(), RID(), CameraMatrix(), Transform(), RID(), true, Vector2(1, 1), RID(), false, Color(), 0, 0);
+ _setup_environment(RID(), RID(), CameraMatrix(), Transform(), RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0);
PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL;
_fill_render_list(p_instances, pass_mode, CameraMatrix(), Transform());
- RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
+ RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
RENDER_TIMESTAMP("Render Material");
@@ -2079,7 +2083,7 @@ void RendererSceneRenderForward::_render_sdfgi(RID p_render_buffers, const Vecto
RendererStorageRD::store_transform(to_bounds.affine_inverse() * cam_xform, scene_state.ubo.sdf_to_bounds);
- _setup_environment(RID(), RID(), camera_proj, cam_xform, RID(), true, Vector2(1, 1), RID(), false, Color(), 0, 0);
+ _setup_environment(RID(), RID(), camera_proj, cam_xform, RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0);
Map<Size2i, RID>::Element *E = sdfgi_framebuffer_size_cache.find(fb_size);
if (!E) {
@@ -2150,20 +2154,27 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() {
RD::Uniform u;
u.binding = 5;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
- u.ids.push_back(get_positional_light_buffer());
+ u.ids.push_back(get_omni_light_buffer());
uniforms.push_back(u);
}
-
{
RD::Uniform u;
u.binding = 6;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
- u.ids.push_back(get_reflection_probe_buffer());
+ u.ids.push_back(get_spot_light_buffer());
uniforms.push_back(u);
}
+
{
RD::Uniform u;
u.binding = 7;
+ u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+ u.ids.push_back(get_reflection_probe_buffer());
+ uniforms.push_back(u);
+ }
+ {
+ RD::Uniform u;
+ u.binding = 8;
u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER;
u.ids.push_back(get_directional_light_buffer());
uniforms.push_back(u);
@@ -2210,21 +2221,6 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() {
RD::Uniform u;
u.binding = 15;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
- u.ids.push_back(get_cluster_builder_texture());
- uniforms.push_back(u);
- }
- {
- RD::Uniform u;
- u.binding = 16;
- u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
- u.ids.push_back(get_cluster_builder_indices_buffer());
- uniforms.push_back(u);
- }
-
- {
- RD::Uniform u;
- u.binding = 17;
- u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
if (directional_shadow_get_texture().is_valid()) {
u.ids.push_back(directional_shadow_get_texture());
} else {
@@ -2236,7 +2232,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() {
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
- u.binding = 18;
+ u.binding = 16;
u.ids.push_back(storage->global_variables_get_storage_buffer());
uniforms.push_back(u);
}
@@ -2244,7 +2240,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() {
if (!low_end) {
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER;
- u.binding = 19;
+ u.binding = 17;
u.ids.push_back(sdfgi_get_ubo());
uniforms.push_back(u);
}
@@ -2253,7 +2249,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() {
}
}
-RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps) {
+RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps) {
if (render_pass_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(render_pass_uniform_set)) {
RD::get_singleton()->free(render_pass_uniform_set);
}
@@ -2351,6 +2347,15 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff
{
RD::Uniform u;
u.binding = 5;
+ u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+ RID cb = p_cluster_buffer.is_valid() ? p_cluster_buffer : default_vec4_xform_buffer;
+ u.ids.push_back(cb);
+ uniforms.push_back(u);
+ }
+
+ {
+ RD::Uniform u;
+ u.binding = 6;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
RID texture = (false && rb && rb->depth.is_valid()) ? rb->depth : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE);
u.ids.push_back(texture);
@@ -2358,17 +2363,18 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff
}
{
RD::Uniform u;
- u.binding = 6;
+ u.binding = 7;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
RID bbt = rb ? render_buffers_get_back_buffer_texture(p_render_buffers) : RID();
RID texture = bbt.is_valid() ? bbt : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK);
u.ids.push_back(texture);
uniforms.push_back(u);
}
+
if (!low_end) {
{
RD::Uniform u;
- u.binding = 7;
+ u.binding = 8;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
RID texture = rb && rb->normal_roughness_buffer.is_valid() ? rb->normal_roughness_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_NORMAL);
u.ids.push_back(texture);
@@ -2377,7 +2383,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff
{
RD::Uniform u;
- u.binding = 8;
+ u.binding = 9;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
RID aot = rb ? render_buffers_get_ao_texture(p_render_buffers) : RID();
RID texture = aot.is_valid() ? aot : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK);
@@ -2387,7 +2393,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff
{
RD::Uniform u;
- u.binding = 9;
+ u.binding = 10;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
RID texture = rb && rb->ambient_buffer.is_valid() ? rb->ambient_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK);
u.ids.push_back(texture);
@@ -2396,7 +2402,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff
{
RD::Uniform u;
- u.binding = 10;
+ u.binding = 11;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
RID texture = rb && rb->reflection_buffer.is_valid() ? rb->reflection_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK);
u.ids.push_back(texture);
@@ -2404,7 +2410,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff
}
{
RD::Uniform u;
- u.binding = 11;
+ u.binding = 12;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
RID t;
if (rb && render_buffers_is_sdfgi_enabled(p_render_buffers)) {
@@ -2417,7 +2423,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff
}
{
RD::Uniform u;
- u.binding = 12;
+ u.binding = 13;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
if (rb && render_buffers_is_sdfgi_enabled(p_render_buffers)) {
u.ids.push_back(render_buffers_get_sdfgi_occlusion_texture(p_render_buffers));
@@ -2428,14 +2434,14 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff
}
{
RD::Uniform u;
- u.binding = 13;
+ u.binding = 14;
u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER;
u.ids.push_back(rb ? render_buffers_get_gi_probe_buffer(p_render_buffers) : render_buffers_get_default_gi_probe_buffer());
uniforms.push_back(u);
}
{
RD::Uniform u;
- u.binding = 14;
+ u.binding = 15;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
RID vfog = RID();
if (rb && render_buffers_has_volumetric_fog(p_render_buffers)) {
@@ -2519,33 +2525,43 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed
uniforms.push_back(u);
}
+
+ {
+ RD::Uniform u;
+ u.binding = 5;
+ u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+ RID cb = default_vec4_xform_buffer;
+ u.ids.push_back(cb);
+ uniforms.push_back(u);
+ }
+
// actual sdfgi stuff
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
- u.binding = 5;
+ u.binding = 6;
u.ids.push_back(p_albedo_texture);
uniforms.push_back(u);
}
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
- u.binding = 6;
+ u.binding = 7;
u.ids.push_back(p_emission_texture);
uniforms.push_back(u);
}
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
- u.binding = 7;
+ u.binding = 8;
u.ids.push_back(p_emission_aniso_texture);
uniforms.push_back(u);
}
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
- u.binding = 8;
+ u.binding = 9;
u.ids.push_back(p_geom_facing_texture);
uniforms.push_back(u);
}
diff --git a/servers/rendering/renderer_rd/renderer_scene_render_forward.h b/servers/rendering/renderer_rd/renderer_scene_render_forward.h
index 3b5a5ad96f..d4a4c9a3a9 100644
--- a/servers/rendering/renderer_rd/renderer_scene_render_forward.h
+++ b/servers/rendering/renderer_rd/renderer_scene_render_forward.h
@@ -263,7 +263,7 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
void _update_render_base_uniform_set();
RID _setup_sdfgi_render_pass_uniform_set(RID p_albedo_texture, RID p_emission_texture, RID p_emission_aniso_texture, RID p_geom_facing_texture);
- RID _setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps);
+ RID _setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps);
struct LightmapData {
float normal_xform[12];
@@ -300,6 +300,11 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
float viewport_size[2];
float screen_pixel_size[2];
+ uint32_t cluster_shift;
+ uint32_t cluster_width;
+ uint32_t cluster_type_size;
+ uint32_t max_cluster_element_count_div_32;
+
float directional_penumbra_shadow_kernel[128]; //32 vec4s
float directional_soft_shadow_kernel[128];
float penumbra_shadow_kernel[128];
@@ -421,7 +426,7 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
PASS_MODE_SDF,
};
- void _setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2 &p_screen_pixel_size, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false);
+ void _setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false);
void _setup_giprobes(const PagedArray<RID> &p_giprobes);
void _setup_lightmaps(const PagedArray<RID> &p_lightmaps, const Transform &p_cam_transform);
@@ -701,7 +706,7 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
RenderList render_list;
protected:
- virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_lod_threshold);
+ virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_lod_threshold);
virtual void _render_shadow(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0);
virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region);
virtual void _render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region);
diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp
index 3061511c6d..2e457c2ce6 100644
--- a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp
+++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp
@@ -1514,7 +1514,9 @@ void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_rough
push_constant.proj_info[3] = (1.0f + p_projection.matrix[1][2]) / p_projection.matrix[1][1];
push_constant.max_giprobes = MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size());
push_constant.high_quality_vct = gi_probe_quality == RS::GI_PROBE_QUALITY_HIGH;
- push_constant.use_sdfgi = rb->sdfgi != nullptr;
+
+ bool use_sdfgi = rb->sdfgi != nullptr;
+ bool use_giprobes = push_constant.max_giprobes > 0;
if (env) {
push_constant.ao_color[0] = env->ao_color.r;
@@ -1765,8 +1767,9 @@ void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_rough
rb->gi_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi.shader.version_get_shader(gi.shader_version, 0), 0);
}
+ GI::Mode mode = (use_sdfgi && use_giprobes) ? GI::MODE_COMBINED : (use_sdfgi ? GI::MODE_SDFGI : GI::MODE_GIPROBE);
RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
- RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi.pipelines[0]);
+ RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi.pipelines[mode]);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->gi_uniform_set, 0);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(GI::PushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width, rb->height, 1, 8, 8, 1);
@@ -3233,6 +3236,10 @@ RID RendererSceneRenderRD::reflection_atlas_create() {
ra.count = GLOBAL_GET("rendering/quality/reflection_atlas/reflection_count");
ra.size = GLOBAL_GET("rendering/quality/reflection_atlas/reflection_size");
+ ra.cluster_builder = memnew(ClusterBuilderRD);
+ ra.cluster_builder->set_shared(&cluster_builder_shared);
+ ra.cluster_builder->setup(Size2i(ra.size, ra.size), max_cluster_elements, RID(), RID(), RID());
+
return reflection_atlas_owner.make_rid(ra);
}
@@ -3244,6 +3251,8 @@ void RendererSceneRenderRD::reflection_atlas_set_size(RID p_ref_atlas, int p_ref
return; //no changes
}
+ ra->cluster_builder->setup(Size2i(ra->size, ra->size), max_cluster_elements, RID(), RID(), RID());
+
ra->size = p_reflection_size;
ra->count = p_reflection_count;
@@ -3253,7 +3262,6 @@ void RendererSceneRenderRD::reflection_atlas_set_size(RID p_ref_atlas, int p_ref
ra->reflection = RID();
RD::get_singleton()->free(ra->depth_buffer);
ra->depth_buffer = RID();
-
for (int i = 0; i < ra->reflections.size(); i++) {
_clear_reflection_data(ra->reflections.write[i].data);
if (ra->reflections[i].owner.is_null()) {
@@ -5884,6 +5892,11 @@ void RendererSceneRenderRD::render_buffers_configure(RID p_render_buffers, RID p
rb->msaa = p_msaa;
rb->screen_space_aa = p_screen_space_aa;
rb->use_debanding = p_use_debanding;
+ if (rb->cluster_builder == nullptr) {
+ rb->cluster_builder = memnew(ClusterBuilderRD);
+ }
+ rb->cluster_builder->set_shared(&cluster_builder_shared);
+
_free_render_buffer_data(rb);
{
@@ -5924,6 +5937,8 @@ void RendererSceneRenderRD::render_buffers_configure(RID p_render_buffers, RID p
rb->data->configure(rb->texture, rb->depth_texture, p_width, p_height, p_msaa);
_render_buffers_uniform_set_changed(p_render_buffers);
+
+ rb->cluster_builder->setup(Size2i(p_width, p_height), max_cluster_elements, rb->depth_texture, storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED), rb->texture);
}
void RendererSceneRenderRD::sub_surface_scattering_set_quality(RS::SubSurfaceScatteringQuality p_quality) {
@@ -6034,17 +6049,34 @@ RendererSceneRenderRD::RenderBufferData *RendererSceneRenderRD::render_buffers_g
}
void RendererSceneRenderRD::_setup_reflections(const PagedArray<RID> &p_reflections, const Transform &p_camera_inverse_transform, RID p_environment) {
+ cluster.reflection_count = 0;
+
for (uint32_t i = 0; i < (uint32_t)p_reflections.size(); i++) {
- RID rpi = p_reflections[i];
+ if (cluster.reflection_count == cluster.max_reflections) {
+ break;
+ }
- if (i >= cluster.max_reflections) {
- reflection_probe_instance_set_render_index(rpi, 0); //invalid, but something needs to be set
+ ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_reflections[i]);
+ if (!rpi) {
continue;
}
- reflection_probe_instance_set_render_index(rpi, i);
+ cluster.reflection_sort[cluster.reflection_count].instance = rpi;
+ cluster.reflection_sort[cluster.reflection_count].depth = -p_camera_inverse_transform.xform(rpi->transform.origin).z;
+ cluster.reflection_count++;
+ }
+
+ if (cluster.reflection_count > 0) {
+ SortArray<Cluster::InstanceSort<ReflectionProbeInstance>> sort_array;
+ sort_array.sort(cluster.reflection_sort, cluster.reflection_count);
+ }
+
+ for (uint32_t i = 0; i < cluster.reflection_count; i++) {
+ ReflectionProbeInstance *rpi = cluster.reflection_sort[i].instance;
+
+ rpi->render_index = i;
- RID base_probe = reflection_probe_instance_get_probe(rpi);
+ RID base_probe = rpi->probe;
Cluster::ReflectionData &reflection_ubo = cluster.reflections[i];
@@ -6053,7 +6085,7 @@ void RendererSceneRenderRD::_setup_reflections(const PagedArray<RID> &p_reflecti
reflection_ubo.box_extents[0] = extents.x;
reflection_ubo.box_extents[1] = extents.y;
reflection_ubo.box_extents[2] = extents.z;
- reflection_ubo.index = reflection_probe_instance_get_atlas_index(rpi);
+ reflection_ubo.index = rpi->atlas_index;
Vector3 origin_offset = storage->reflection_probe_get_origin_offset(base_probe);
@@ -6074,29 +6106,38 @@ void RendererSceneRenderRD::_setup_reflections(const PagedArray<RID> &p_reflecti
reflection_ubo.ambient[1] = ambient_linear.g * interior_ambient_energy;
reflection_ubo.ambient[2] = ambient_linear.b * interior_ambient_energy;
- Transform transform = reflection_probe_instance_get_transform(rpi);
+ Transform transform = rpi->transform;
Transform proj = (p_camera_inverse_transform * transform).inverse();
RendererStorageRD::store_transform(proj, reflection_ubo.local_matrix);
- cluster.builder.add_reflection_probe(transform, extents);
+ current_cluster_builder->add_box(ClusterBuilderRD::BOX_TYPE_REFLECTION_PROBE, transform, extents);
- reflection_probe_instance_set_render_pass(rpi, RSG::rasterizer->get_frame_number());
+ rpi->last_pass = RSG::rasterizer->get_frame_number();
}
- if (p_reflections.size()) {
- RD::get_singleton()->buffer_update(cluster.reflection_buffer, 0, MIN(cluster.max_reflections, (unsigned int)p_reflections.size()) * sizeof(ReflectionData), cluster.reflections, true);
+ if (cluster.reflection_count) {
+ RD::get_singleton()->buffer_update(cluster.reflection_buffer, 0, cluster.reflection_count * sizeof(ReflectionData), cluster.reflections, true);
}
}
-void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const Transform &p_camera_inverse_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count) {
- uint32_t light_count = 0;
+void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const Transform &p_camera_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count) {
+ Transform inverse_transform = p_camera_transform.affine_inverse();
+
r_directional_light_count = 0;
r_positional_light_count = 0;
sky_scene_state.ubo.directional_light_count = 0;
+ Plane camera_plane(p_camera_transform.origin, -p_camera_transform.basis.get_axis(Vector3::AXIS_Z).normalized());
+
+ cluster.omni_light_count = 0;
+ cluster.spot_light_count = 0;
+
for (int i = 0; i < (int)p_lights.size(); i++) {
- RID li = p_lights[i];
- RID base = light_instance_get_base_light(li);
+ LightInstance *li = light_instance_owner.getornull(p_lights[i]);
+ if (!li) {
+ continue;
+ }
+ RID base = li->light;
ERR_CONTINUE(base.is_null());
@@ -6106,7 +6147,7 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const
// Copy to SkyDirectionalLightData
if (r_directional_light_count < sky_scene_state.max_directional_lights) {
SkyDirectionalLightData &sky_light_data = sky_scene_state.directional_lights[r_directional_light_count];
- Transform light_transform = light_instance_get_base_transform(li);
+ Transform light_transform = li->transform;
Vector3 world_direction = light_transform.basis.xform(Vector3(0, 0, 1)).normalized();
sky_light_data.direction[0] = world_direction.x;
@@ -6142,9 +6183,9 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const
Cluster::DirectionalLightData &light_data = cluster.directional_lights[r_directional_light_count];
- Transform light_transform = light_instance_get_base_transform(li);
+ Transform light_transform = li->transform;
- Vector3 direction = p_camera_inverse_transform.basis.xform(light_transform.basis.xform(Vector3(0, 0, 1))).normalized();
+ Vector3 direction = inverse_transform.basis.xform(light_transform.basis.xform(Vector3(0, 0, 1))).normalized();
light_data.direction[0] = direction.x;
light_data.direction[1] = direction.y;
@@ -6223,28 +6264,28 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const
int limit = smode == RS::LIGHT_DIRECTIONAL_SHADOW_ORTHOGONAL ? 0 : (smode == RS::LIGHT_DIRECTIONAL_SHADOW_PARALLEL_2_SPLITS ? 1 : 3);
light_data.blend_splits = storage->light_directional_get_blend_splits(base);
for (int j = 0; j < 4; j++) {
- Rect2 atlas_rect = light_instance_get_directional_shadow_atlas_rect(li, j);
- CameraMatrix matrix = light_instance_get_shadow_camera(li, j);
- float split = light_instance_get_directional_shadow_split(li, MIN(limit, j));
+ Rect2 atlas_rect = li->shadow_transform[j].atlas_rect;
+ CameraMatrix matrix = li->shadow_transform[j].camera;
+ float split = li->shadow_transform[MIN(limit, j)].split;
CameraMatrix bias;
bias.set_light_bias();
CameraMatrix rectm;
rectm.set_light_atlas_rect(atlas_rect);
- Transform modelview = (p_camera_inverse_transform * light_instance_get_shadow_transform(li, j)).inverse();
+ Transform modelview = (inverse_transform * li->shadow_transform[j].transform).inverse();
CameraMatrix shadow_mtx = rectm * bias * matrix * modelview;
light_data.shadow_split_offsets[j] = split;
- float bias_scale = light_instance_get_shadow_bias_scale(li, j);
+ float bias_scale = li->shadow_transform[j].bias_scale;
light_data.shadow_bias[j] = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * bias_scale;
- light_data.shadow_normal_bias[j] = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * light_instance_get_directional_shadow_texel_size(li, j);
+ light_data.shadow_normal_bias[j] = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * li->shadow_transform[j].shadow_texel_size;
light_data.shadow_transmittance_bias[j] = storage->light_get_transmittance_bias(base) * bias_scale;
- light_data.shadow_z_range[j] = light_instance_get_shadow_range(li, j);
- light_data.shadow_range_begin[j] = light_instance_get_shadow_range_begin(li, j);
+ light_data.shadow_z_range[j] = li->shadow_transform[j].farplane;
+ light_data.shadow_range_begin[j] = li->shadow_transform[j].range_begin;
RendererStorageRD::store_camera(shadow_mtx, light_data.shadow_matrices[j]);
- Vector2 uv_scale = light_instance_get_shadow_uv_scale(li, j);
+ Vector2 uv_scale = li->shadow_transform[j].uv_scale;
uv_scale *= atlas_rect.size; //adapt to atlas size
switch (j) {
case 0: {
@@ -6281,162 +6322,198 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const
r_directional_light_count++;
} break;
- case RS::LIGHT_SPOT:
case RS::LIGHT_OMNI: {
- if (light_count >= cluster.max_lights) {
+ if (cluster.omni_light_count >= cluster.max_lights) {
continue;
}
- Transform light_transform = light_instance_get_base_transform(li);
+ cluster.omni_light_sort[cluster.omni_light_count].instance = li;
+ cluster.omni_light_sort[cluster.omni_light_count].depth = camera_plane.distance_to(li->transform.origin);
+ cluster.omni_light_count++;
+ } break;
+ case RS::LIGHT_SPOT: {
+ if (cluster.spot_light_count >= cluster.max_lights) {
+ continue;
+ }
- Cluster::LightData &light_data = cluster.lights[light_count];
- cluster.lights_instances[light_count] = li;
+ cluster.spot_light_sort[cluster.spot_light_count].instance = li;
+ cluster.spot_light_sort[cluster.spot_light_count].depth = camera_plane.distance_to(li->transform.origin);
+ cluster.spot_light_count++;
+ } break;
+ }
- float sign = storage->light_is_negative(base) ? -1 : 1;
- Color linear_col = storage->light_get_color(base).to_linear();
+ li->last_pass = RSG::rasterizer->get_frame_number();
+ }
- light_data.attenuation = storage->light_get_param(base, RS::LIGHT_PARAM_ATTENUATION);
+ if (cluster.omni_light_count) {
+ SortArray<Cluster::InstanceSort<LightInstance>> sorter;
+ sorter.sort(cluster.omni_light_sort, cluster.omni_light_count);
+ }
- float energy = sign * storage->light_get_param(base, RS::LIGHT_PARAM_ENERGY) * Math_PI;
+ if (cluster.spot_light_count) {
+ SortArray<Cluster::InstanceSort<LightInstance>> sorter;
+ sorter.sort(cluster.spot_light_sort, cluster.spot_light_count);
+ }
- light_data.color[0] = linear_col.r * energy;
- light_data.color[1] = linear_col.g * energy;
- light_data.color[2] = linear_col.b * energy;
- light_data.specular_amount = storage->light_get_param(base, RS::LIGHT_PARAM_SPECULAR);
+ ShadowAtlas *shadow_atlas = nullptr;
- float radius = MAX(0.001, storage->light_get_param(base, RS::LIGHT_PARAM_RANGE));
- light_data.inv_radius = 1.0 / radius;
+ if (p_shadow_atlas.is_valid() && p_using_shadows) {
+ shadow_atlas = shadow_atlas_owner.getornull(p_shadow_atlas);
+ }
- Vector3 pos = p_camera_inverse_transform.xform(light_transform.origin);
+ for (uint32_t i = 0; i < (cluster.omni_light_count + cluster.spot_light_count); i++) {
+ uint32_t index = (i < cluster.omni_light_count) ? i : i - (cluster.omni_light_count);
+ Cluster::LightData &light_data = (i < cluster.omni_light_count) ? cluster.omni_lights[index] : cluster.spot_lights[index];
+ RS::LightType type = (i < cluster.omni_light_count) ? RS::LIGHT_OMNI : RS::LIGHT_SPOT;
+ LightInstance *li = (i < cluster.omni_light_count) ? cluster.omni_light_sort[index].instance : cluster.spot_light_sort[index].instance;
+ RID base = li->light;
- light_data.position[0] = pos.x;
- light_data.position[1] = pos.y;
- light_data.position[2] = pos.z;
+ cluster.lights_instances[i] = li->self;
- Vector3 direction = p_camera_inverse_transform.basis.xform(light_transform.basis.xform(Vector3(0, 0, -1))).normalized();
+ Transform light_transform = li->transform;
- light_data.direction[0] = direction.x;
- light_data.direction[1] = direction.y;
- light_data.direction[2] = direction.z;
+ float sign = storage->light_is_negative(base) ? -1 : 1;
+ Color linear_col = storage->light_get_color(base).to_linear();
- float size = storage->light_get_param(base, RS::LIGHT_PARAM_SIZE);
+ light_data.attenuation = storage->light_get_param(base, RS::LIGHT_PARAM_ATTENUATION);
- light_data.size = size;
+ float energy = sign * storage->light_get_param(base, RS::LIGHT_PARAM_ENERGY) * Math_PI;
- light_data.cone_attenuation = storage->light_get_param(base, RS::LIGHT_PARAM_SPOT_ATTENUATION);
- float spot_angle = storage->light_get_param(base, RS::LIGHT_PARAM_SPOT_ANGLE);
- light_data.cone_angle = Math::cos(Math::deg2rad(spot_angle));
+ light_data.color[0] = linear_col.r * energy;
+ light_data.color[1] = linear_col.g * energy;
+ light_data.color[2] = linear_col.b * energy;
+ light_data.specular_amount = storage->light_get_param(base, RS::LIGHT_PARAM_SPECULAR) * 2.0;
- light_data.mask = storage->light_get_cull_mask(base);
+ float radius = MAX(0.001, storage->light_get_param(base, RS::LIGHT_PARAM_RANGE));
+ light_data.inv_radius = 1.0 / radius;
- light_data.atlas_rect[0] = 0;
- light_data.atlas_rect[1] = 0;
- light_data.atlas_rect[2] = 0;
- light_data.atlas_rect[3] = 0;
+ Vector3 pos = inverse_transform.xform(light_transform.origin);
- RID projector = storage->light_get_projector(base);
+ light_data.position[0] = pos.x;
+ light_data.position[1] = pos.y;
+ light_data.position[2] = pos.z;
- if (projector.is_valid()) {
- Rect2 rect = storage->decal_atlas_get_texture_rect(projector);
+ Vector3 direction = inverse_transform.basis.xform(light_transform.basis.xform(Vector3(0, 0, -1))).normalized();
- if (type == RS::LIGHT_SPOT) {
- light_data.projector_rect[0] = rect.position.x;
- light_data.projector_rect[1] = rect.position.y + rect.size.height; //flip because shadow is flipped
- light_data.projector_rect[2] = rect.size.width;
- light_data.projector_rect[3] = -rect.size.height;
- } else {
- light_data.projector_rect[0] = rect.position.x;
- light_data.projector_rect[1] = rect.position.y;
- light_data.projector_rect[2] = rect.size.width;
- light_data.projector_rect[3] = rect.size.height * 0.5; //used by dp, so needs to be half
- }
- } else {
- light_data.projector_rect[0] = 0;
- light_data.projector_rect[1] = 0;
- light_data.projector_rect[2] = 0;
- light_data.projector_rect[3] = 0;
- }
+ light_data.direction[0] = direction.x;
+ light_data.direction[1] = direction.y;
+ light_data.direction[2] = direction.z;
- if (p_using_shadows && p_shadow_atlas.is_valid() && shadow_atlas_owns_light_instance(p_shadow_atlas, li)) {
- // fill in the shadow information
+ float size = storage->light_get_param(base, RS::LIGHT_PARAM_SIZE);
- light_data.shadow_enabled = true;
+ light_data.size = size;
- if (type == RS::LIGHT_SPOT) {
- light_data.shadow_bias = (storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * radius / 10.0);
- float shadow_texel_size = Math::tan(Math::deg2rad(spot_angle)) * radius * 2.0;
- shadow_texel_size *= light_instance_get_shadow_texel_size(li, p_shadow_atlas);
+ light_data.cone_attenuation = storage->light_get_param(base, RS::LIGHT_PARAM_SPOT_ATTENUATION);
+ float spot_angle = storage->light_get_param(base, RS::LIGHT_PARAM_SPOT_ANGLE);
+ light_data.cone_angle = Math::cos(Math::deg2rad(spot_angle));
- light_data.shadow_normal_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * shadow_texel_size;
+ light_data.mask = storage->light_get_cull_mask(base);
- } else { //omni
- light_data.shadow_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * radius / 10.0;
- float shadow_texel_size = light_instance_get_shadow_texel_size(li, p_shadow_atlas);
- light_data.shadow_normal_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * shadow_texel_size * 2.0; // applied in -1 .. 1 space
- }
+ light_data.atlas_rect[0] = 0;
+ light_data.atlas_rect[1] = 0;
+ light_data.atlas_rect[2] = 0;
+ light_data.atlas_rect[3] = 0;
- light_data.transmittance_bias = storage->light_get_transmittance_bias(base);
+ RID projector = storage->light_get_projector(base);
- Rect2 rect = light_instance_get_shadow_atlas_rect(li, p_shadow_atlas);
+ if (projector.is_valid()) {
+ Rect2 rect = storage->decal_atlas_get_texture_rect(projector);
- light_data.atlas_rect[0] = rect.position.x;
- light_data.atlas_rect[1] = rect.position.y;
- light_data.atlas_rect[2] = rect.size.width;
- light_data.atlas_rect[3] = rect.size.height;
+ if (type == RS::LIGHT_SPOT) {
+ light_data.projector_rect[0] = rect.position.x;
+ light_data.projector_rect[1] = rect.position.y + rect.size.height; //flip because shadow is flipped
+ light_data.projector_rect[2] = rect.size.width;
+ light_data.projector_rect[3] = -rect.size.height;
+ } else {
+ light_data.projector_rect[0] = rect.position.x;
+ light_data.projector_rect[1] = rect.position.y;
+ light_data.projector_rect[2] = rect.size.width;
+ light_data.projector_rect[3] = rect.size.height * 0.5; //used by dp, so needs to be half
+ }
+ } else {
+ light_data.projector_rect[0] = 0;
+ light_data.projector_rect[1] = 0;
+ light_data.projector_rect[2] = 0;
+ light_data.projector_rect[3] = 0;
+ }
- light_data.soft_shadow_scale = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BLUR);
- light_data.shadow_volumetric_fog_fade = 1.0 / storage->light_get_shadow_volumetric_fog_fade(base);
+ if (shadow_atlas && shadow_atlas->shadow_owners.has(li->self)) {
+ // fill in the shadow information
- if (type == RS::LIGHT_OMNI) {
- light_data.atlas_rect[3] *= 0.5; //one paraboloid on top of another
- Transform proj = (p_camera_inverse_transform * light_transform).inverse();
+ light_data.shadow_enabled = true;
- RendererStorageRD::store_transform(proj, light_data.shadow_matrix);
+ if (type == RS::LIGHT_SPOT) {
+ light_data.shadow_bias = (storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * radius / 10.0);
+ float shadow_texel_size = Math::tan(Math::deg2rad(spot_angle)) * radius * 2.0;
+ shadow_texel_size *= light_instance_get_shadow_texel_size(li->self, p_shadow_atlas);
- if (size > 0.0) {
- light_data.soft_shadow_size = size;
- } else {
- light_data.soft_shadow_size = 0.0;
- light_data.soft_shadow_scale *= shadows_quality_radius_get(); // Only use quality radius for PCF
- }
+ light_data.shadow_normal_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * shadow_texel_size;
- } else if (type == RS::LIGHT_SPOT) {
- Transform modelview = (p_camera_inverse_transform * light_transform).inverse();
- CameraMatrix bias;
- bias.set_light_bias();
+ } else { //omni
+ light_data.shadow_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * radius / 10.0;
+ float shadow_texel_size = light_instance_get_shadow_texel_size(li->self, p_shadow_atlas);
+ light_data.shadow_normal_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * shadow_texel_size * 2.0; // applied in -1 .. 1 space
+ }
- CameraMatrix shadow_mtx = bias * light_instance_get_shadow_camera(li, 0) * modelview;
- RendererStorageRD::store_camera(shadow_mtx, light_data.shadow_matrix);
+ light_data.transmittance_bias = storage->light_get_transmittance_bias(base);
- if (size > 0.0) {
- CameraMatrix cm = light_instance_get_shadow_camera(li, 0);
- float half_np = cm.get_z_near() * Math::tan(Math::deg2rad(spot_angle));
- light_data.soft_shadow_size = (size * 0.5 / radius) / (half_np / cm.get_z_near()) * rect.size.width;
- } else {
- light_data.soft_shadow_size = 0.0;
- light_data.soft_shadow_scale *= shadows_quality_radius_get(); // Only use quality radius for PCF
- }
- }
+ Rect2 rect = light_instance_get_shadow_atlas_rect(li->self, p_shadow_atlas);
+
+ light_data.atlas_rect[0] = rect.position.x;
+ light_data.atlas_rect[1] = rect.position.y;
+ light_data.atlas_rect[2] = rect.size.width;
+ light_data.atlas_rect[3] = rect.size.height;
+
+ light_data.soft_shadow_scale = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BLUR);
+ light_data.shadow_volumetric_fog_fade = 1.0 / storage->light_get_shadow_volumetric_fog_fade(base);
+
+ if (type == RS::LIGHT_OMNI) {
+ light_data.atlas_rect[3] *= 0.5; //one paraboloid on top of another
+ Transform proj = (inverse_transform * light_transform).inverse();
+
+ RendererStorageRD::store_transform(proj, light_data.shadow_matrix);
+
+ if (size > 0.0) {
+ light_data.soft_shadow_size = size;
} else {
- light_data.shadow_enabled = false;
+ light_data.soft_shadow_size = 0.0;
+ light_data.soft_shadow_scale *= shadows_quality_radius_get(); // Only use quality radius for PCF
}
- light_instance_set_index(li, light_count);
+ } else if (type == RS::LIGHT_SPOT) {
+ Transform modelview = (inverse_transform * light_transform).inverse();
+ CameraMatrix bias;
+ bias.set_light_bias();
- cluster.builder.add_light(type == RS::LIGHT_SPOT ? LightClusterBuilder::LIGHT_TYPE_SPOT : LightClusterBuilder::LIGHT_TYPE_OMNI, light_transform, radius, spot_angle);
+ CameraMatrix shadow_mtx = bias * li->shadow_transform[0].camera * modelview;
+ RendererStorageRD::store_camera(shadow_mtx, light_data.shadow_matrix);
- light_count++;
- r_positional_light_count++;
- } break;
+ if (size > 0.0) {
+ CameraMatrix cm = li->shadow_transform[0].camera;
+ float half_np = cm.get_z_near() * Math::tan(Math::deg2rad(spot_angle));
+ light_data.soft_shadow_size = (size * 0.5 / radius) / (half_np / cm.get_z_near()) * rect.size.width;
+ } else {
+ light_data.soft_shadow_size = 0.0;
+ light_data.soft_shadow_scale *= shadows_quality_radius_get(); // Only use quality radius for PCF
+ }
+ }
+ } else {
+ light_data.shadow_enabled = false;
}
- light_instance_set_render_pass(li, RSG::rasterizer->get_frame_number());
+ li->light_index = index;
+
+ current_cluster_builder->add_light(type == RS::LIGHT_SPOT ? ClusterBuilderRD::LIGHT_TYPE_SPOT : ClusterBuilderRD::LIGHT_TYPE_OMNI, light_transform, radius, spot_angle);
- //update UBO for forward rendering, blit to texture for clustered
+ r_positional_light_count++;
}
- if (light_count) {
- RD::get_singleton()->buffer_update(cluster.light_buffer, 0, sizeof(Cluster::LightData) * light_count, cluster.lights, true);
+ if (cluster.omni_light_count) {
+ RD::get_singleton()->buffer_update(cluster.omni_light_buffer, 0, sizeof(Cluster::LightData) * cluster.omni_light_count, cluster.omni_lights, true);
+ }
+
+ if (cluster.spot_light_count) {
+ RD::get_singleton()->buffer_update(cluster.spot_light_buffer, 0, sizeof(Cluster::LightData) * cluster.spot_light_count, cluster.spot_lights, true);
}
if (r_directional_light_count) {
@@ -6449,18 +6526,26 @@ void RendererSceneRenderRD::_setup_decals(const PagedArray<RID> &p_decals, const
uv_xform.basis.scale(Vector3(2.0, 1.0, 2.0));
uv_xform.origin = Vector3(-1.0, 0.0, -1.0);
- uint32_t decal_count = MIN((uint32_t)p_decals.size(), cluster.max_decals);
- int idx = 0;
+ uint32_t decal_count = p_decals.size();
+
+ cluster.decal_count = 0;
+
for (uint32_t i = 0; i < decal_count; i++) {
- RID di = p_decals[i];
- RID decal = decal_instance_get_base(di);
+ if (cluster.decal_count == cluster.max_decals) {
+ break;
+ }
+
+ DecalInstance *di = decal_instance_owner.getornull(p_decals[i]);
+ if (!di) {
+ continue;
+ }
+ RID decal = di->decal;
- Transform xform = decal_instance_get_transform(di);
+ Transform xform = di->transform;
- float fade = 1.0;
+ real_t distance = -p_camera_inverse_xform.xform(xform.origin).z;
if (storage->decal_is_distance_fade_enabled(decal)) {
- real_t distance = -p_camera_inverse_xform.xform(xform.origin).z;
float fade_begin = storage->decal_get_distance_fade_begin(decal);
float fade_length = storage->decal_get_distance_fade_length(decal);
@@ -6468,18 +6553,43 @@ void RendererSceneRenderRD::_setup_decals(const PagedArray<RID> &p_decals, const
if (distance > fade_begin + fade_length) {
continue; // do not use this decal, its invisible
}
+ }
+ }
+ cluster.decal_sort[cluster.decal_count].instance = di;
+ cluster.decal_sort[cluster.decal_count].depth = distance;
+ cluster.decal_count++;
+ }
+
+ if (cluster.decal_count > 0) {
+ SortArray<Cluster::InstanceSort<DecalInstance>> sort_array;
+ sort_array.sort(cluster.decal_sort, cluster.decal_count);
+ }
+
+ for (uint32_t i = 0; i < cluster.decal_count; i++) {
+ DecalInstance *di = cluster.decal_sort[i].instance;
+ RID decal = di->decal;
+
+ Transform xform = di->transform;
+ float fade = 1.0;
+
+ if (storage->decal_is_distance_fade_enabled(decal)) {
+ real_t distance = -p_camera_inverse_xform.xform(xform.origin).z;
+ float fade_begin = storage->decal_get_distance_fade_begin(decal);
+ float fade_length = storage->decal_get_distance_fade_length(decal);
+
+ if (distance > fade_begin) {
fade = 1.0 - (distance - fade_begin) / fade_length;
}
}
- Cluster::DecalData &dd = cluster.decals[idx];
+ Cluster::DecalData &dd = cluster.decals[i];
Vector3 decal_extents = storage->decal_get_extents(decal);
Transform scale_xform;
scale_xform.basis.scale(Vector3(decal_extents.x, decal_extents.y, decal_extents.z));
- Transform to_decal_xform = (p_camera_inverse_xform * decal_instance_get_transform(di) * scale_xform * uv_xform).affine_inverse();
+ Transform to_decal_xform = (p_camera_inverse_xform * di->transform * scale_xform * uv_xform).affine_inverse();
RendererStorageRD::store_transform(to_decal_xform, dd.xform);
Vector3 normal = xform.basis.get_axis(Vector3::AXIS_Y).normalized();
@@ -6564,13 +6674,11 @@ void RendererSceneRenderRD::_setup_decals(const PagedArray<RID> &p_decals, const
dd.upper_fade = storage->decal_get_upper_fade(decal);
dd.lower_fade = storage->decal_get_lower_fade(decal);
- cluster.builder.add_decal(xform, decal_extents);
-
- idx++;
+ current_cluster_builder->add_box(ClusterBuilderRD::BOX_TYPE_DECAL, xform, decal_extents);
}
- if (idx > 0) {
- RD::get_singleton()->buffer_update(cluster.decal_buffer, 0, sizeof(Cluster::DecalData) * idx, cluster.decals, true);
+ if (cluster.decal_count > 0) {
+ RD::get_singleton()->buffer_update(cluster.decal_buffer, 0, sizeof(Cluster::DecalData) * cluster.decal_count, cluster.decals, true);
}
}
@@ -6753,8 +6861,10 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e
cluster.lights_shadow_rect_cache_count = 0;
- for (int i = 0; i < p_positional_light_count; i++) {
- if (cluster.lights[i].shadow_enabled != 0) {
+ for (uint32_t i = 0; i < cluster.omni_light_count + cluster.spot_light_count; i++) {
+ Cluster::LightData &ld = i < cluster.omni_light_count ? cluster.omni_lights[i] : cluster.spot_lights[i - cluster.omni_light_count];
+
+ if (ld.shadow_enabled != 0) {
RID li = cluster.lights_instances[i];
ERR_CONTINUE(!shadow_atlas->shadow_owners.has(li));
@@ -6792,7 +6902,7 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e
cluster.lights_shadow_rect_cache_count++;
- if (cluster.lights_shadow_rect_cache_count == cluster.max_lights) {
+ if (cluster.lights_shadow_rect_cache_count == cluster.max_lights * 2) {
break; //light limit reached
}
}
@@ -6889,23 +6999,22 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
u.binding = 3;
- u.ids.push_back(get_positional_light_buffer());
+ u.ids.push_back(get_omni_light_buffer());
uniforms.push_back(u);
}
-
{
RD::Uniform u;
- u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER;
+ u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
u.binding = 4;
- u.ids.push_back(get_directional_light_buffer());
+ u.ids.push_back(get_spot_light_buffer());
uniforms.push_back(u);
}
{
RD::Uniform u;
- u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
+ u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER;
u.binding = 5;
- u.ids.push_back(get_cluster_builder_texture());
+ u.ids.push_back(get_directional_light_buffer());
uniforms.push_back(u);
}
@@ -6913,7 +7022,7 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
u.binding = 6;
- u.ids.push_back(get_cluster_builder_indices_buffer());
+ u.ids.push_back(rb->cluster_builder->get_cluster_buffer());
uniforms.push_back(u);
}
@@ -6973,6 +7082,13 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e
u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED));
uniforms.push_back(u);
}
+ {
+ RD::Uniform u;
+ u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER;
+ u.binding = 14;
+ u.ids.push_back(volumetric_fog.params_ubo);
+ uniforms.push_back(u);
+ }
rb->volumetric_fog->uniform_set = RD::get_singleton()->uniform_set_create(uniforms, volumetric_fog.shader.version_get_shader(volumetric_fog.shader_version, 0), 0);
@@ -7018,7 +7134,7 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e
rb->volumetric_fog->length = env->volumetric_fog_length;
rb->volumetric_fog->spread = env->volumetric_fog_detail_spread;
- VolumetricFogShader::PushConstant push_constant;
+ VolumetricFogShader::ParamsUBO params;
Vector2 frustum_near_size = p_cam_projection.get_viewport_half_extents();
Vector2 frustum_far_size = p_cam_projection.get_far_plane_half_extents();
@@ -7034,51 +7150,71 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e
fog_near_size = Vector2();
}
- push_constant.fog_frustum_size_begin[0] = fog_near_size.x;
- push_constant.fog_frustum_size_begin[1] = fog_near_size.y;
+ params.fog_frustum_size_begin[0] = fog_near_size.x;
+ params.fog_frustum_size_begin[1] = fog_near_size.y;
- push_constant.fog_frustum_size_end[0] = fog_far_size.x;
- push_constant.fog_frustum_size_end[1] = fog_far_size.y;
+ params.fog_frustum_size_end[0] = fog_far_size.x;
+ params.fog_frustum_size_end[1] = fog_far_size.y;
- push_constant.z_near = z_near;
- push_constant.z_far = z_far;
+ params.z_near = z_near;
+ params.z_far = z_far;
- push_constant.fog_frustum_end = fog_end;
+ params.fog_frustum_end = fog_end;
- push_constant.fog_volume_size[0] = rb->volumetric_fog->width;
- push_constant.fog_volume_size[1] = rb->volumetric_fog->height;
- push_constant.fog_volume_size[2] = rb->volumetric_fog->depth;
+ params.fog_volume_size[0] = rb->volumetric_fog->width;
+ params.fog_volume_size[1] = rb->volumetric_fog->height;
+ params.fog_volume_size[2] = rb->volumetric_fog->depth;
- push_constant.directional_light_count = p_directional_light_count;
+ params.directional_light_count = p_directional_light_count;
Color light = env->volumetric_fog_light.to_linear();
- push_constant.light_energy[0] = light.r * env->volumetric_fog_light_energy;
- push_constant.light_energy[1] = light.g * env->volumetric_fog_light_energy;
- push_constant.light_energy[2] = light.b * env->volumetric_fog_light_energy;
- push_constant.base_density = env->volumetric_fog_density;
+ params.light_energy[0] = light.r * env->volumetric_fog_light_energy;
+ params.light_energy[1] = light.g * env->volumetric_fog_light_energy;
+ params.light_energy[2] = light.b * env->volumetric_fog_light_energy;
+ params.base_density = env->volumetric_fog_density;
+
+ params.detail_spread = env->volumetric_fog_detail_spread;
+ params.gi_inject = env->volumetric_fog_gi_inject;
+
+ params.cam_rotation[0] = p_cam_transform.basis[0][0];
+ params.cam_rotation[1] = p_cam_transform.basis[1][0];
+ params.cam_rotation[2] = p_cam_transform.basis[2][0];
+ params.cam_rotation[3] = 0;
+ params.cam_rotation[4] = p_cam_transform.basis[0][1];
+ params.cam_rotation[5] = p_cam_transform.basis[1][1];
+ params.cam_rotation[6] = p_cam_transform.basis[2][1];
+ params.cam_rotation[7] = 0;
+ params.cam_rotation[8] = p_cam_transform.basis[0][2];
+ params.cam_rotation[9] = p_cam_transform.basis[1][2];
+ params.cam_rotation[10] = p_cam_transform.basis[2][2];
+ params.cam_rotation[11] = 0;
+ params.filter_axis = 0;
+ params.max_gi_probes = env->volumetric_fog_gi_inject > 0.001 ? p_gi_probe_count : 0;
- push_constant.detail_spread = env->volumetric_fog_detail_spread;
- push_constant.gi_inject = env->volumetric_fog_gi_inject;
+ {
+ uint32_t cluster_size = rb->cluster_builder->get_cluster_size();
+ params.cluster_shift = get_shift_from_power_of_2(cluster_size);
- push_constant.cam_rotation[0] = p_cam_transform.basis[0][0];
- push_constant.cam_rotation[1] = p_cam_transform.basis[1][0];
- push_constant.cam_rotation[2] = p_cam_transform.basis[2][0];
- push_constant.cam_rotation[3] = 0;
- push_constant.cam_rotation[4] = p_cam_transform.basis[0][1];
- push_constant.cam_rotation[5] = p_cam_transform.basis[1][1];
- push_constant.cam_rotation[6] = p_cam_transform.basis[2][1];
- push_constant.cam_rotation[7] = 0;
- push_constant.cam_rotation[8] = p_cam_transform.basis[0][2];
- push_constant.cam_rotation[9] = p_cam_transform.basis[1][2];
- push_constant.cam_rotation[10] = p_cam_transform.basis[2][2];
- push_constant.cam_rotation[11] = 0;
- push_constant.filter_axis = 0;
- push_constant.max_gi_probes = env->volumetric_fog_gi_inject > 0.001 ? p_gi_probe_count : 0;
+ uint32_t cluster_screen_width = (rb->width - 1) / cluster_size + 1;
+ uint32_t cluster_screen_height = (rb->height - 1) / cluster_size + 1;
+ params.cluster_type_size = cluster_screen_width * cluster_screen_height * (32 + 32);
+ params.cluster_width = cluster_screen_width;
+ params.max_cluster_element_count_div_32 = max_cluster_elements / 32;
+
+ params.screen_size[0] = rb->width;
+ params.screen_size[1] = rb->height;
+ }
/* Vector2 dssize = directional_shadow_get_size();
push_constant.directional_shadow_pixel_size[0] = 1.0 / dssize.x;
push_constant.directional_shadow_pixel_size[1] = 1.0 / dssize.y;
*/
+
+ RENDER_TIMESTAMP(">Volumetric Fog");
+
+ RENDER_TIMESTAMP("Render Fog");
+ RD::get_singleton()->buffer_update(volumetric_fog.params_ubo, 0, sizeof(VolumetricFogShader::ParamsUBO), &params, true);
+
RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
bool use_filter = volumetric_fog_filter_active;
@@ -7086,38 +7222,48 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[using_sdfgi ? VOLUMETRIC_FOG_SHADER_DENSITY_WITH_SDFGI : VOLUMETRIC_FOG_SHADER_DENSITY]);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set, 0);
+
if (using_sdfgi) {
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->sdfgi_uniform_set, 1);
}
- RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VolumetricFogShader::PushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth, 4, 4, 4);
RD::get_singleton()->compute_list_add_barrier(compute_list);
if (use_filter) {
+ RENDER_TIMESTAMP("Filter Fog");
+
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[VOLUMETRIC_FOG_SHADER_FILTER]);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set, 0);
- RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VolumetricFogShader::PushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth, 8, 8, 1);
- RD::get_singleton()->compute_list_add_barrier(compute_list);
+ RD::get_singleton()->compute_list_end();
+ //need restart for buffer update
- push_constant.filter_axis = 1;
+ params.filter_axis = 1;
+ RD::get_singleton()->buffer_update(volumetric_fog.params_ubo, 0, sizeof(VolumetricFogShader::ParamsUBO), &params, true);
+ compute_list = RD::get_singleton()->compute_list_begin();
+ RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[VOLUMETRIC_FOG_SHADER_FILTER]);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set2, 0);
- RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VolumetricFogShader::PushConstant));
+ if (using_sdfgi) {
+ RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->sdfgi_uniform_set, 1);
+ }
RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth, 8, 8, 1);
RD::get_singleton()->compute_list_add_barrier(compute_list);
}
+ RENDER_TIMESTAMP("Integrate Fog");
+
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[VOLUMETRIC_FOG_SHADER_FOG]);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set, 0);
- RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VolumetricFogShader::PushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, 1, 8, 8, 1);
RD::get_singleton()->compute_list_end();
+
+ RENDER_TIMESTAMP("<Volumetric Fog");
}
void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold) {
@@ -7150,7 +7296,24 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform &
gi_probes = &empty;
}
- cluster.builder.begin(p_cam_transform.affine_inverse(), p_cam_projection); //prepare cluster
+ if (render_buffers_owner.owns(p_render_buffers)) {
+ RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers);
+ current_cluster_builder = rb->cluster_builder;
+ } else if (reflection_probe_instance_owner.owns(p_reflection_probe)) {
+ ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_reflection_probe);
+ ReflectionAtlas *ra = reflection_atlas_owner.getornull(rpi->atlas);
+ if (!ra) {
+ ERR_PRINT("reflection probe has no reflection atlas! Bug?");
+ current_cluster_builder = nullptr;
+ } else {
+ current_cluster_builder = ra->cluster_builder;
+ }
+ } else {
+ ERR_PRINT("No cluster builder, bug"); //should never happen, will crash
+ current_cluster_builder = nullptr;
+ }
+
+ current_cluster_builder->begin(p_cam_transform, p_cam_projection, !p_reflection_probe.is_valid());
bool using_shadows = true;
@@ -7165,12 +7328,15 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform &
uint32_t directional_light_count = 0;
uint32_t positional_light_count = 0;
- _setup_lights(*lights, p_cam_transform.affine_inverse(), p_shadow_atlas, using_shadows, directional_light_count, positional_light_count);
+ _setup_lights(*lights, p_cam_transform, p_shadow_atlas, using_shadows, directional_light_count, positional_light_count);
_setup_decals(p_decals, p_cam_transform.affine_inverse());
- cluster.builder.bake_cluster(); //bake to cluster
+
+ current_cluster_builder->bake_cluster();
uint32_t gi_probe_count = 0;
- _setup_giprobes(p_render_buffers, p_cam_transform, *gi_probes, gi_probe_count);
+ if (p_render_buffers.is_valid()) {
+ _setup_giprobes(p_render_buffers, p_cam_transform, *gi_probes, gi_probe_count);
+ }
if (p_render_buffers.is_valid()) {
bool directional_shadows = false;
@@ -7183,9 +7349,30 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform &
_update_volumetric_fog(p_render_buffers, p_environment, p_cam_projection, p_cam_transform, p_shadow_atlas, directional_light_count, directional_shadows, positional_light_count, gi_probe_count);
}
- _render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_ortogonal, p_instances, directional_light_count, *gi_probes, p_lightmaps, p_environment, p_camera_effects, p_shadow_atlas, p_reflection_atlas, p_reflection_probe, p_reflection_probe_pass, clear_color, p_screen_lod_threshold);
+ _render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_ortogonal, p_instances, directional_light_count, *gi_probes, p_lightmaps, p_environment, current_cluster_builder->get_cluster_buffer(), current_cluster_builder->get_cluster_size(), current_cluster_builder->get_max_cluster_elements(), p_camera_effects, p_shadow_atlas, p_reflection_atlas, p_reflection_probe, p_reflection_probe_pass, clear_color, p_screen_lod_threshold);
if (p_render_buffers.is_valid()) {
+ if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_OMNI_LIGHTS || debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_SPOT_LIGHTS || debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_DECALS || debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_REFLECTION_PROBES) {
+ ClusterBuilderRD::ElementType elem_type = ClusterBuilderRD::ELEMENT_TYPE_MAX;
+ switch (debug_draw) {
+ case RS::VIEWPORT_DEBUG_DRAW_CLUSTER_OMNI_LIGHTS:
+ elem_type = ClusterBuilderRD::ELEMENT_TYPE_OMNI_LIGHT;
+ break;
+ case RS::VIEWPORT_DEBUG_DRAW_CLUSTER_SPOT_LIGHTS:
+ elem_type = ClusterBuilderRD::ELEMENT_TYPE_SPOT_LIGHT;
+ break;
+ case RS::VIEWPORT_DEBUG_DRAW_CLUSTER_DECALS:
+ elem_type = ClusterBuilderRD::ELEMENT_TYPE_DECAL;
+ break;
+ case RS::VIEWPORT_DEBUG_DRAW_CLUSTER_REFLECTION_PROBES:
+ elem_type = ClusterBuilderRD::ELEMENT_TYPE_REFLECTION_PROBE;
+ break;
+ default: {
+ }
+ }
+ current_cluster_builder->debug(elem_type);
+ }
+
RENDER_TIMESTAMP("Tonemap");
_render_buffers_post_process_and_tonemap(p_render_buffers, p_environment, p_camera_effects, p_cam_projection);
@@ -7846,6 +8033,9 @@ bool RendererSceneRenderRD::free(RID p_rid) {
if (rb->volumetric_fog) {
_volumetric_fog_erase(rb);
}
+ if (rb->cluster_builder) {
+ memdelete(rb->cluster_builder);
+ }
render_buffers_owner.free(p_rid);
} else if (environment_owner.owns(p_rid)) {
//not much to delete, just free it
@@ -7855,6 +8045,10 @@ bool RendererSceneRenderRD::free(RID p_rid) {
camera_effects_owner.free(p_rid);
} else if (reflection_atlas_owner.owns(p_rid)) {
reflection_atlas_set_size(p_rid, 0, 0);
+ ReflectionAtlas *ra = reflection_atlas_owner.getornull(p_rid);
+ if (ra->cluster_builder) {
+ memdelete(ra->cluster_builder);
+ }
reflection_atlas_owner.free(p_rid);
} else if (reflection_probe_instance_owner.owns(p_rid)) {
//not much to delete, just free it
@@ -8073,20 +8267,17 @@ void RendererSceneRenderRD::sdfgi_set_debug_probe_select(const Vector3 &p_positi
RendererSceneRenderRD *RendererSceneRenderRD::singleton = nullptr;
-RID RendererSceneRenderRD::get_cluster_builder_texture() {
- return cluster.builder.get_cluster_texture();
-}
-
-RID RendererSceneRenderRD::get_cluster_builder_indices_buffer() {
- return cluster.builder.get_cluster_indices_buffer();
-}
-
RID RendererSceneRenderRD::get_reflection_probe_buffer() {
return cluster.reflection_buffer;
}
-RID RendererSceneRenderRD::get_positional_light_buffer() {
- return cluster.light_buffer;
+RID RendererSceneRenderRD::get_omni_light_buffer() {
+ return cluster.omni_light_buffer;
+}
+
+RID RendererSceneRenderRD::get_spot_light_buffer() {
+ return cluster.spot_light_buffer;
}
+
RID RendererSceneRenderRD::get_directional_light_buffer() {
return cluster.directional_light_buffer;
}
@@ -8102,6 +8293,8 @@ bool RendererSceneRenderRD::is_low_end() const {
}
RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) {
+ max_cluster_elements = GLOBAL_GET("rendering/cluster_builder/max_clustered_elements");
+
storage = p_storage;
singleton = this;
@@ -8436,11 +8629,15 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) {
sdfgi_shader.integrate_default_sky_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.integrate.version_get_shader(sdfgi_shader.integrate_shader, 0), 1);
}
}
+ //GK
{
//calculate tables
String defines = "\n#define SDFGI_OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n";
Vector<String> gi_modes;
- gi_modes.push_back("");
+ gi_modes.push_back("\n#define USE_GIPROBE\n");
+ gi_modes.push_back("\n#define USE_SDFGI\n");
+ gi_modes.push_back("\n#define USE_SDFGI\n\n#define USE_GIPROBE\n");
+
gi.shader.initialize(gi_modes, defines);
gi.shader_version = gi.shader.version_create();
for (int i = 0; i < GI::MODE_MAX; i++) {
@@ -8484,30 +8681,29 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) {
default_giprobe_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(GI::GIProbeData) * RenderBuffers::MAX_GIPROBES);
}
- //cluster setup
- uint32_t uniform_max_size = RD::get_singleton()->limit_get(RD::LIMIT_MAX_UNIFORM_BUFFER_SIZE);
-
{ //reflections
- uint32_t reflection_buffer_size;
- if (uniform_max_size < 65536) {
- reflection_buffer_size = uniform_max_size;
- } else {
- reflection_buffer_size = 65536;
- }
- cluster.max_reflections = reflection_buffer_size / sizeof(Cluster::ReflectionData);
+ cluster.max_reflections = max_cluster_elements;
cluster.reflections = memnew_arr(Cluster::ReflectionData, cluster.max_reflections);
- cluster.reflection_buffer = RD::get_singleton()->storage_buffer_create(reflection_buffer_size);
+ cluster.reflection_sort = memnew_arr(Cluster::InstanceSort<ReflectionProbeInstance>, cluster.max_decals);
+ cluster.reflection_buffer = RD::get_singleton()->storage_buffer_create(sizeof(Cluster::ReflectionData) * cluster.max_reflections);
}
{ //lights
- cluster.max_lights = MIN(1024 * 1024, uniform_max_size) / sizeof(Cluster::LightData); //1mb of lights
+ cluster.max_lights = max_cluster_elements;
+
uint32_t light_buffer_size = cluster.max_lights * sizeof(Cluster::LightData);
- cluster.lights = memnew_arr(Cluster::LightData, cluster.max_lights);
- cluster.light_buffer = RD::get_singleton()->storage_buffer_create(light_buffer_size);
+ cluster.omni_lights = memnew_arr(Cluster::LightData, cluster.max_lights);
+ cluster.omni_light_buffer = RD::get_singleton()->storage_buffer_create(light_buffer_size);
+ cluster.omni_light_sort = memnew_arr(Cluster::InstanceSort<LightInstance>, cluster.max_lights);
+ cluster.spot_lights = memnew_arr(Cluster::LightData, cluster.max_lights);
+ cluster.spot_light_buffer = RD::get_singleton()->storage_buffer_create(light_buffer_size);
+ cluster.spot_light_sort = memnew_arr(Cluster::InstanceSort<LightInstance>, cluster.max_lights);
//defines += "\n#define MAX_LIGHT_DATA_STRUCTS " + itos(cluster.max_lights) + "\n";
- cluster.lights_instances = memnew_arr(RID, cluster.max_lights);
- cluster.lights_shadow_rect_cache = memnew_arr(Rect2i, cluster.max_lights);
+
+ //used for volumetric fog shrinking
+ cluster.lights_instances = memnew_arr(RID, cluster.max_lights * 2);
+ cluster.lights_shadow_rect_cache = memnew_arr(Rect2i, cluster.max_lights * 2);
cluster.max_directional_lights = MAX_DIRECTIONAL_LIGHTS;
uint32_t directional_light_buffer_size = cluster.max_directional_lights * sizeof(Cluster::DirectionalLightData);
@@ -8516,14 +8712,13 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) {
}
{ //decals
- cluster.max_decals = MIN(1024 * 1024, uniform_max_size) / sizeof(Cluster::DecalData); //1mb of decals
+ cluster.max_decals = max_cluster_elements;
uint32_t decal_buffer_size = cluster.max_decals * sizeof(Cluster::DecalData);
cluster.decals = memnew_arr(Cluster::DecalData, cluster.max_decals);
+ cluster.decal_sort = memnew_arr(Cluster::InstanceSort<DecalInstance>, cluster.max_decals);
cluster.decal_buffer = RD::get_singleton()->storage_buffer_create(decal_buffer_size);
}
- cluster.builder.setup(16, 8, 24);
-
if (!low_end) {
String defines = "\n#define MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS " + itos(cluster.max_directional_lights) + "\n";
Vector<String> volumetric_fog_modes;
@@ -8536,6 +8731,7 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) {
for (int i = 0; i < VOLUMETRIC_FOG_SHADER_MAX; i++) {
volumetric_fog.pipelines[i] = RD::get_singleton()->compute_pipeline_create(volumetric_fog.shader.version_get_shader(volumetric_fog.shader_version, i));
}
+ volumetric_fog.params_ubo = RD::get_singleton()->uniform_buffer_create(sizeof(VolumetricFogShader::ParamsUBO));
}
{
@@ -8601,6 +8797,7 @@ RendererSceneRenderRD::~RendererSceneRenderRD() {
sdfgi_shader.preprocess.version_free(sdfgi_shader.preprocess_shader);
volumetric_fog.shader.version_free(volumetric_fog.shader_version);
+ RD::get_singleton()->free(volumetric_fog.params_ubo);
memdelete_arr(gi_probe_lights);
}
@@ -8622,15 +8819,21 @@ RendererSceneRenderRD::~RendererSceneRenderRD() {
{
RD::get_singleton()->free(cluster.directional_light_buffer);
- RD::get_singleton()->free(cluster.light_buffer);
+ RD::get_singleton()->free(cluster.omni_light_buffer);
+ RD::get_singleton()->free(cluster.spot_light_buffer);
RD::get_singleton()->free(cluster.reflection_buffer);
RD::get_singleton()->free(cluster.decal_buffer);
memdelete_arr(cluster.directional_lights);
- memdelete_arr(cluster.lights);
+ memdelete_arr(cluster.omni_lights);
+ memdelete_arr(cluster.spot_lights);
+ memdelete_arr(cluster.omni_light_sort);
+ memdelete_arr(cluster.spot_light_sort);
memdelete_arr(cluster.lights_shadow_rect_cache);
memdelete_arr(cluster.lights_instances);
memdelete_arr(cluster.reflections);
+ memdelete_arr(cluster.reflection_sort);
memdelete_arr(cluster.decals);
+ memdelete_arr(cluster.decal_sort);
}
RD::get_singleton()->free(shadow_sampler);
diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.h b/servers/rendering/renderer_rd/renderer_scene_render_rd.h
index 264434a301..3e69335225 100644
--- a/servers/rendering/renderer_rd/renderer_scene_render_rd.h
+++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.h
@@ -34,7 +34,7 @@
#include "core/templates/local_vector.h"
#include "core/templates/rid_owner.h"
#include "servers/rendering/renderer_compositor.h"
-#include "servers/rendering/renderer_rd/light_cluster_builder.h"
+#include "servers/rendering/renderer_rd/cluster_builder_rd.h"
#include "servers/rendering/renderer_rd/renderer_storage_rd.h"
#include "servers/rendering/renderer_rd/shaders/gi.glsl.gen.h"
#include "servers/rendering/renderer_rd/shaders/giprobe.glsl.gen.h"
@@ -104,12 +104,12 @@ protected:
};
virtual RenderBufferData *_create_render_buffer_data() = 0;
- void _setup_lights(const PagedArray<RID> &p_lights, const Transform &p_camera_inverse_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count);
+ void _setup_lights(const PagedArray<RID> &p_lights, const Transform &p_camera_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count);
void _setup_decals(const PagedArray<RID> &p_decals, const Transform &p_camera_inverse_xform);
void _setup_reflections(const PagedArray<RID> &p_reflections, const Transform &p_camera_inverse_transform, RID p_environment);
void _setup_giprobes(RID p_render_buffers, const Transform &p_transform, const PagedArray<RID> &p_gi_probes, uint32_t &r_gi_probes_used);
- virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_color, float p_screen_lod_threshold) = 0;
+ virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_cluster_max_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_color, float p_screen_lod_threshold) = 0;
virtual void _render_shadow(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0) = 0;
virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0;
virtual void _render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0;
@@ -341,6 +341,8 @@ private:
};
Vector<Reflection> reflections;
+
+ ClusterBuilderRD *cluster_builder = nullptr;
};
mutable RID_Owner<ReflectionAtlas> reflection_atlas_owner;
@@ -833,6 +835,9 @@ private:
/* RENDER BUFFERS */
+ ClusterBuilderSharedDataRD cluster_builder_shared;
+ ClusterBuilderRD *current_cluster_builder = nullptr;
+
struct SDFGI;
struct VolumetricFog;
@@ -858,6 +863,8 @@ private:
SDFGI *sdfgi = nullptr;
VolumetricFog *volumetric_fog = nullptr;
+ ClusterBuilderRD *cluster_builder = nullptr;
+
//built-in textures used for ping pong image processing and blurring
struct Blur {
RID texture;
@@ -1259,7 +1266,7 @@ private:
uint32_t max_giprobes;
uint32_t high_quality_vct;
- uint32_t use_sdfgi;
+ uint32_t pad2;
uint32_t orthogonal;
float ao_color[3];
@@ -1269,8 +1276,11 @@ private:
};
RID sdfgi_ubo;
- enum {
- MODE_MAX = 1
+ enum Mode {
+ MODE_GIPROBE,
+ MODE_SDFGI,
+ MODE_COMBINED,
+ MODE_MAX
};
GiShaderRD shader;
@@ -1394,18 +1404,39 @@ private:
float normal_fade;
};
+ template <class T>
+ struct InstanceSort {
+ float depth;
+ T *instance;
+ bool operator<(const InstanceSort &p_sort) const {
+ return depth < p_sort.depth;
+ }
+ };
+
ReflectionData *reflections;
+ InstanceSort<ReflectionProbeInstance> *reflection_sort;
uint32_t max_reflections;
RID reflection_buffer;
uint32_t max_reflection_probes_per_instance;
+ uint32_t reflection_count = 0;
DecalData *decals;
+ InstanceSort<DecalInstance> *decal_sort;
uint32_t max_decals;
RID decal_buffer;
+ uint32_t decal_count;
- LightData *lights;
+ LightData *omni_lights;
+ LightData *spot_lights;
+
+ InstanceSort<LightInstance> *omni_light_sort;
+ InstanceSort<LightInstance> *spot_light_sort;
uint32_t max_lights;
- RID light_buffer;
+ RID omni_light_buffer;
+ RID spot_light_buffer;
+ uint32_t omni_light_count = 0;
+ uint32_t spot_light_count = 0;
+
RID *lights_instances;
Rect2i *lights_shadow_rect_cache;
uint32_t lights_shadow_rect_cache_count = 0;
@@ -1414,8 +1445,6 @@ private:
uint32_t max_directional_lights;
RID directional_light_buffer;
- LightClusterBuilder builder;
-
} cluster;
struct VolumetricFog {
@@ -1445,7 +1474,7 @@ private:
};
struct VolumetricFogShader {
- struct PushConstant {
+ struct ParamsUBO {
float fog_frustum_size_begin[2];
float fog_frustum_size_end[2];
@@ -1463,13 +1492,21 @@ private:
float detail_spread;
float gi_inject;
uint32_t max_gi_probes;
- uint32_t pad;
+ uint32_t cluster_type_size;
+
+ float screen_size[2];
+ uint32_t cluster_shift;
+ uint32_t cluster_width;
+
+ uint32_t cluster_pad[3];
+ uint32_t max_cluster_element_count_div_32;
float cam_rotation[12];
};
VolumetricFogShaderRD shader;
+ RID params_ubo;
RID shader_version;
RID pipelines[VOLUMETRIC_FOG_SHADER_MAX];
@@ -1494,6 +1531,7 @@ private:
float weight;
};
+ uint32_t max_cluster_elements = 512;
bool low_end = false;
public:
@@ -2002,10 +2040,9 @@ public:
virtual void set_time(double p_time, double p_step);
- RID get_cluster_builder_texture();
- RID get_cluster_builder_indices_buffer();
RID get_reflection_probe_buffer();
- RID get_positional_light_buffer();
+ RID get_omni_light_buffer();
+ RID get_spot_light_buffer();
RID get_directional_light_buffer();
RID get_decal_buffer();
int get_max_directional_lights() const;
diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.cpp b/servers/rendering/renderer_rd/renderer_storage_rd.cpp
index b74a1083e7..6203f3ba64 100644
--- a/servers/rendering/renderer_rd/renderer_storage_rd.cpp
+++ b/servers/rendering/renderer_rd/renderer_storage_rd.cpp
@@ -7340,6 +7340,7 @@ void RendererStorageRD::_update_decal_atlas() {
tformat.shareable_formats.push_back(RD::DATA_FORMAT_R8G8B8A8_SRGB);
decal_atlas.texture = RD::get_singleton()->texture_create(tformat, RD::TextureView());
+ RD::get_singleton()->texture_clear(decal_atlas.texture, Color(0, 0, 0, 0), 0, decal_atlas.mipmaps, 0, 1, true);
{
//create the framebuffer
diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.h b/servers/rendering/renderer_rd/renderer_storage_rd.h
index 5ef73f0db8..2fb66ac573 100644
--- a/servers/rendering/renderer_rd/renderer_storage_rd.h
+++ b/servers/rendering/renderer_rd/renderer_storage_rd.h
@@ -95,6 +95,21 @@ public:
p_array[11] = 0;
}
+ static _FORCE_INLINE_ void store_transform_transposed_3x4(const Transform &p_mtx, float *p_array) {
+ p_array[0] = p_mtx.basis.elements[0][0];
+ p_array[1] = p_mtx.basis.elements[0][1];
+ p_array[2] = p_mtx.basis.elements[0][2];
+ p_array[3] = p_mtx.origin.x;
+ p_array[4] = p_mtx.basis.elements[1][0];
+ p_array[5] = p_mtx.basis.elements[1][1];
+ p_array[6] = p_mtx.basis.elements[1][2];
+ p_array[7] = p_mtx.origin.y;
+ p_array[8] = p_mtx.basis.elements[2][0];
+ p_array[9] = p_mtx.basis.elements[2][1];
+ p_array[10] = p_mtx.basis.elements[2][2];
+ p_array[11] = p_mtx.origin.z;
+ }
+
static _FORCE_INLINE_ void store_camera(const CameraMatrix &p_mtx, float *p_array) {
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
diff --git a/servers/rendering/renderer_rd/shaders/SCsub b/servers/rendering/renderer_rd/shaders/SCsub
index deaa9668df..1b0197c1c1 100644
--- a/servers/rendering/renderer_rd/shaders/SCsub
+++ b/servers/rendering/renderer_rd/shaders/SCsub
@@ -44,3 +44,6 @@ if "RD_GLSL" in env["BUILDERS"]:
env.RD_GLSL("particles_copy.glsl")
env.RD_GLSL("sort.glsl")
env.RD_GLSL("skeleton.glsl")
+ env.RD_GLSL("cluster_render.glsl")
+ env.RD_GLSL("cluster_store.glsl")
+ env.RD_GLSL("cluster_debug.glsl")
diff --git a/servers/rendering/renderer_rd/shaders/cluster_debug.glsl b/servers/rendering/renderer_rd/shaders/cluster_debug.glsl
new file mode 100644
index 0000000000..70a875192c
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/cluster_debug.glsl
@@ -0,0 +1,115 @@
+#[compute]
+
+#version 450
+
+VERSION_DEFINES
+
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+const vec3 usage_gradient[33] = vec3[]( // 1 (none) + 32
+ vec3(0.14, 0.17, 0.23),
+ vec3(0.24, 0.44, 0.83),
+ vec3(0.23, 0.57, 0.84),
+ vec3(0.22, 0.71, 0.84),
+ vec3(0.22, 0.85, 0.83),
+ vec3(0.21, 0.85, 0.72),
+ vec3(0.21, 0.85, 0.57),
+ vec3(0.20, 0.85, 0.42),
+ vec3(0.20, 0.85, 0.27),
+ vec3(0.27, 0.86, 0.19),
+ vec3(0.51, 0.85, 0.19),
+ vec3(0.57, 0.86, 0.19),
+ vec3(0.62, 0.85, 0.19),
+ vec3(0.67, 0.86, 0.20),
+ vec3(0.73, 0.85, 0.20),
+ vec3(0.78, 0.85, 0.20),
+ vec3(0.83, 0.85, 0.20),
+ vec3(0.85, 0.82, 0.20),
+ vec3(0.85, 0.76, 0.20),
+ vec3(0.85, 0.81, 0.20),
+ vec3(0.85, 0.65, 0.20),
+ vec3(0.84, 0.60, 0.21),
+ vec3(0.84, 0.56, 0.21),
+ vec3(0.84, 0.51, 0.21),
+ vec3(0.84, 0.46, 0.21),
+ vec3(0.84, 0.41, 0.21),
+ vec3(0.84, 0.36, 0.21),
+ vec3(0.84, 0.31, 0.21),
+ vec3(0.84, 0.27, 0.21),
+ vec3(0.83, 0.22, 0.22),
+ vec3(0.83, 0.22, 0.27),
+ vec3(0.83, 0.22, 0.32),
+ vec3(1.00, 0.63, 0.70));
+layout(push_constant, binding = 0, std430) uniform Params {
+ uvec2 screen_size;
+ uvec2 cluster_screen_size;
+
+ uint cluster_shift;
+ uint cluster_type;
+ float z_near;
+ float z_far;
+
+ bool orthogonal;
+ uint max_cluster_element_count_div_32;
+ uint pad1;
+ uint pad2;
+}
+params;
+
+layout(set = 0, binding = 1, std430) buffer restrict readonly ClusterData {
+ uint data[];
+}
+cluster_data;
+
+layout(rgba16f, set = 0, binding = 2) uniform restrict writeonly image2D screen_buffer;
+layout(set = 0, binding = 3) uniform texture2D depth_buffer;
+layout(set = 0, binding = 4) uniform sampler depth_buffer_sampler;
+
+void main() {
+ uvec2 screen_pos = gl_GlobalInvocationID.xy;
+ if (any(greaterThanEqual(screen_pos, params.screen_size))) {
+ return;
+ }
+
+ uvec2 cluster_pos = screen_pos >> params.cluster_shift;
+
+ uint offset = cluster_pos.y * params.cluster_screen_size.x + cluster_pos.x;
+ offset += params.cluster_screen_size.x * params.cluster_screen_size.y * params.cluster_type;
+ offset *= (params.max_cluster_element_count_div_32 + 32);
+
+ //depth buffers generally can't be accessed via image API
+ float depth = texelFetch(sampler2D(depth_buffer, depth_buffer_sampler), ivec2(screen_pos), 0).r * 2.0 - 1.0;
+
+ if (params.orthogonal) {
+ depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0;
+ } else {
+ depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near));
+ }
+ depth /= params.z_far;
+
+ uint slice = uint(clamp(floor(depth * 32.0), 0.0, 31.0));
+ uint slice_minmax = cluster_data.data[offset + params.max_cluster_element_count_div_32 + slice];
+ uint item_min = slice_minmax & 0xFFFF;
+ uint item_max = slice_minmax >> 16;
+
+ uint item_count = 0;
+ for (uint i = 0; i < params.max_cluster_element_count_div_32; i++) {
+ uint slice_bits = cluster_data.data[offset + i];
+ while (slice_bits != 0) {
+ uint bit = findLSB(slice_bits);
+ uint item = i * 32 + bit;
+ if ((item >= item_min && item < item_max)) {
+ item_count++;
+ }
+ slice_bits &= ~(1 << bit);
+ }
+ }
+
+ item_count = min(item_count, 32);
+
+ vec3 color = usage_gradient[item_count];
+
+ color = mix(color * 1.2, color * 0.3, float(slice) / 31.0);
+
+ imageStore(screen_buffer, ivec2(screen_pos), vec4(color, 1.0));
+}
diff --git a/servers/rendering/renderer_rd/shaders/cluster_render.glsl b/servers/rendering/renderer_rd/shaders/cluster_render.glsl
new file mode 100644
index 0000000000..8723ea78e4
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/cluster_render.glsl
@@ -0,0 +1,168 @@
+#[vertex]
+
+#version 450
+
+VERSION_DEFINES
+
+layout(location = 0) in vec3 vertex_attrib;
+
+layout(location = 0) out float depth_interp;
+layout(location = 1) out flat uint element_index;
+
+layout(push_constant, binding = 0, std430) uniform Params {
+ uint base_index;
+ uint pad0;
+ uint pad1;
+ uint pad2;
+}
+params;
+
+layout(set = 0, binding = 1, std140) uniform State {
+ mat4 projection;
+
+ float inv_z_far;
+ uint screen_to_clusters_shift; // shift to obtain coordinates in block indices
+ uint cluster_screen_width; //
+ uint cluster_data_size; // how much data for a single cluster takes
+
+ uint cluster_depth_offset;
+ uint pad0;
+ uint pad1;
+ uint pad2;
+}
+state;
+
+struct RenderElement {
+ uint type; //0-4
+ bool touches_near;
+ bool touches_far;
+ uint original_index;
+ mat3x4 transform_inv;
+ vec3 scale;
+ uint pad;
+};
+
+layout(set = 0, binding = 2, std430) buffer restrict readonly RenderElements {
+ RenderElement data[];
+}
+render_elements;
+
+void main() {
+ element_index = params.base_index + gl_InstanceIndex;
+
+ vec3 vertex = vertex_attrib;
+ vertex *= render_elements.data[element_index].scale;
+
+ vertex = vec4(vertex, 1.0) * render_elements.data[element_index].transform_inv;
+ depth_interp = -vertex.z;
+
+ gl_Position = state.projection * vec4(vertex, 1.0);
+}
+
+#[fragment]
+
+#version 450
+
+VERSION_DEFINES
+
+#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic) && defined(GL_KHR_shader_subgroup_vote)
+
+#extension GL_KHR_shader_subgroup_ballot : enable
+#extension GL_KHR_shader_subgroup_arithmetic : enable
+#extension GL_KHR_shader_subgroup_vote : enable
+
+#define USE_SUBGROUPS
+#endif
+
+layout(location = 0) in float depth_interp;
+layout(location = 1) in flat uint element_index;
+
+layout(set = 0, binding = 1, std140) uniform State {
+ mat4 projection;
+ float inv_z_far;
+ uint screen_to_clusters_shift; // shift to obtain coordinates in block indices
+ uint cluster_screen_width; //
+ uint cluster_data_size; // how much data for a single cluster takes
+ uint cluster_depth_offset;
+ uint pad0;
+ uint pad1;
+ uint pad2;
+}
+state;
+
+//cluster data is layout linearly, each cell contains the follow information:
+// - list of bits for every element to mark as used, so (max_elem_count/32)*4 uints
+// - a uint for each element to mark the depth bits used when rendering (0-31)
+
+layout(set = 0, binding = 3, std430) buffer restrict ClusterRender {
+ uint data[];
+}
+cluster_render;
+
+void main() {
+ //convert from screen to cluster
+ uvec2 cluster = uvec2(gl_FragCoord.xy) >> state.screen_to_clusters_shift;
+
+ //get linear cluster offset from screen poss
+ uint cluster_offset = cluster.x + state.cluster_screen_width * cluster.y;
+ //multiply by data size to position at the beginning of the element list for this cluster
+ cluster_offset *= state.cluster_data_size;
+
+ //find the current element in the list and plot the bit to mark it as used
+ uint usage_write_offset = cluster_offset + (element_index >> 5);
+ uint usage_write_bit = 1 << (element_index & 0x1F);
+
+#ifdef USE_SUBGROUPS
+
+ uint cluster_thread_group_index;
+
+ if (!gl_HelperInvocation) {
+ //http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf
+
+ uvec4 mask;
+
+ while (true) {
+ // find the cluster offset of the first active thread
+ // threads that did break; go inactive and no longer count
+ uint first = subgroupBroadcastFirst(cluster_offset);
+ // update the mask for thread that match this cluster
+ mask = subgroupBallot(first == cluster_offset);
+ if (first == cluster_offset) {
+ // This thread belongs to the group of threads that match this offset,
+ // so exit the loop.
+ break;
+ }
+ }
+
+ cluster_thread_group_index = subgroupBallotExclusiveBitCount(mask);
+
+ if (cluster_thread_group_index == 0) {
+ atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
+ }
+ }
+#else
+ if (!gl_HelperInvocation) {
+ atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
+ }
+#endif
+ //find the current element in the depth usage list and mark the current depth as used
+ float unit_depth = depth_interp * state.inv_z_far;
+
+ uint z_bit = clamp(uint(floor(unit_depth * 32.0)), 0, 31);
+
+ uint z_write_offset = cluster_offset + state.cluster_depth_offset + element_index;
+ uint z_write_bit = 1 << z_bit;
+
+#ifdef USE_SUBGROUPS
+ if (!gl_HelperInvocation) {
+ z_write_bit = subgroupOr(z_write_bit); //merge all Zs
+ if (cluster_thread_group_index == 0) {
+ atomicOr(cluster_render.data[z_write_offset], z_write_bit);
+ }
+ }
+#else
+ if (!gl_HelperInvocation) {
+ atomicOr(cluster_render.data[z_write_offset], z_write_bit);
+ }
+#endif
+}
diff --git a/servers/rendering/renderer_rd/shaders/cluster_store.glsl b/servers/rendering/renderer_rd/shaders/cluster_store.glsl
new file mode 100644
index 0000000000..5be0893c4f
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/cluster_store.glsl
@@ -0,0 +1,119 @@
+#[compute]
+
+#version 450
+
+VERSION_DEFINES
+
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+layout(push_constant, binding = 0, std430) uniform Params {
+ uint cluster_render_data_size; // how much data for a single cluster takes
+ uint max_render_element_count_div_32; //divided by 32
+ uvec2 cluster_screen_size;
+ uint render_element_count_div_32; //divided by 32
+
+ uint max_cluster_element_count_div_32; //divided by 32
+ uint pad1;
+ uint pad2;
+}
+params;
+
+layout(set = 0, binding = 1, std430) buffer restrict readonly ClusterRender {
+ uint data[];
+}
+cluster_render;
+
+layout(set = 0, binding = 2, std430) buffer restrict ClusterStore {
+ uint data[];
+}
+cluster_store;
+
+struct RenderElement {
+ uint type; //0-4
+ bool touches_near;
+ bool touches_far;
+ uint original_index;
+ mat3x4 transform_inv;
+ vec3 scale;
+ uint pad;
+};
+
+layout(set = 0, binding = 3, std430) buffer restrict readonly RenderElements {
+ RenderElement data[];
+}
+render_elements;
+
+void main() {
+ uvec2 pos = gl_GlobalInvocationID.xy;
+ if (any(greaterThanEqual(pos, params.cluster_screen_size))) {
+ return;
+ }
+
+ //counter for each type of render_element
+
+ //base offset for this cluster
+ uint base_offset = (pos.x + params.cluster_screen_size.x * pos.y);
+ uint src_offset = base_offset * params.cluster_render_data_size;
+
+ uint render_element_offset = 0;
+
+ //check all render_elements and see which one was written to
+ while (render_element_offset < params.render_element_count_div_32) {
+ uint bits = cluster_render.data[src_offset + render_element_offset];
+ while (bits != 0) {
+ //if bits exist, check the render_element
+ uint index_bit = findLSB(bits);
+ uint index = render_element_offset * 32 + index_bit;
+ uint type = render_elements.data[index].type;
+
+ uint z_range_offset = src_offset + params.max_render_element_count_div_32 + index;
+ uint z_range = cluster_render.data[z_range_offset];
+
+ //if object was written, z was written, but check just in case
+ if (z_range != 0) { //should always be > 0
+
+ uint from_z = findLSB(z_range);
+ uint to_z = findMSB(z_range) + 1;
+
+ if (render_elements.data[index].touches_near) {
+ from_z = 0;
+ }
+
+ if (render_elements.data[index].touches_far) {
+ to_z = 32;
+ }
+
+ // find cluster offset in the buffer used for indexing in the renderer
+ uint dst_offset = (base_offset + type * (params.cluster_screen_size.x * params.cluster_screen_size.y)) * (params.max_cluster_element_count_div_32 + 32);
+
+ uint orig_index = render_elements.data[index].original_index;
+ //store this index in the Z slices by setting the relevant bit
+ for (uint i = from_z; i < to_z; i++) {
+ uint slice_ofs = dst_offset + params.max_cluster_element_count_div_32 + i;
+
+ uint minmax = cluster_store.data[slice_ofs];
+
+ if (minmax == 0) {
+ minmax = 0xFFFF; //min 0, max 0xFFFF
+ }
+
+ uint elem_min = min(orig_index, minmax & 0xFFFF);
+ uint elem_max = max(orig_index + 1, minmax >> 16); //always store plus one, so zero means range is empty when not written to
+
+ minmax = elem_min | (elem_max << 16);
+ cluster_store.data[slice_ofs] = minmax;
+ }
+
+ uint store_word = orig_index >> 5;
+ uint store_bit = orig_index & 0x1F;
+
+ //store the actual render_element index at the end, so the rendering code can reference it
+ cluster_store.data[dst_offset + store_word] |= 1 << store_bit;
+ }
+
+ bits &= ~(1 << index_bit); //clear the bit to continue iterating
+ }
+
+ render_element_offset++;
+ }
+}
diff --git a/servers/rendering/renderer_rd/shaders/gi.glsl b/servers/rendering/renderer_rd/shaders/gi.glsl
index 8011dadc72..c2965f9874 100644
--- a/servers/rendering/renderer_rd/shaders/gi.glsl
+++ b/servers/rendering/renderer_rd/shaders/gi.glsl
@@ -99,7 +99,7 @@ layout(push_constant, binding = 0, std430) uniform Params {
uint max_giprobes;
bool high_quality_vct;
- bool use_sdfgi;
+ uint pad2;
bool orthogonal;
vec3 ao_color;
@@ -331,7 +331,7 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o
}
ambient_light.rgb = diffuse;
-#if 1
+
if (roughness < 0.2) {
vec3 pos_to_uvw = 1.0 / sdfgi.grid_size;
vec4 light_accum = vec4(0.0);
@@ -363,7 +363,6 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o
//ray_pos += ray_dir * (bias / sdfgi.cascades[cascade].to_cell); //bias to avoid self occlusion
ray_pos += (ray_dir * 1.0 / max(abs_ray_dir.x, max(abs_ray_dir.y, abs_ray_dir.z)) + cam_normal * 1.4) * bias / sdfgi.cascades[cascade].to_cell;
}
-
float softness = 0.2 + min(1.0, roughness * 5.0) * 4.0; //approximation to roughness so it does not seem like a hard fade
while (length(ray_pos) < max_distance) {
for (uint i = 0; i < sdfgi.max_cascades; i++) {
@@ -434,8 +433,6 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o
}
}
-#endif
-
reflection_light.rgb = specular;
ambient_light.rgb *= sdfgi.energy;
@@ -621,11 +618,12 @@ void main() {
vec3 reflection = normalize(reflect(normalize(vertex), normal));
- if (params.use_sdfgi) {
- sdfgi_process(vertex, normal, reflection, roughness, ambient_light, reflection_light);
- }
+#ifdef USE_SDFGI
+ sdfgi_process(vertex, normal, reflection, roughness, ambient_light, reflection_light);
+#endif
- if (params.max_giprobes > 0) {
+#ifdef USE_GIPROBES
+ {
uvec2 giprobe_tex = texelFetch(usampler2D(giprobe_buffer, linear_sampler), pos, 0).rg;
roughness *= roughness;
//find arbitrary tangent and bitangent, then build a matrix
@@ -656,6 +654,7 @@ void main() {
ambient_light = amb_accum;
}
}
+#endif
}
imageStore(ambient_buffer, pos, ambient_light);
diff --git a/servers/rendering/renderer_rd/shaders/scene_forward.glsl b/servers/rendering/renderer_rd/shaders/scene_forward.glsl
index 7fa5f7b0fe..c3e7e2acbf 100644
--- a/servers/rendering/renderer_rd/shaders/scene_forward.glsl
+++ b/servers/rendering/renderer_rd/shaders/scene_forward.glsl
@@ -541,7 +541,7 @@ vec3 F0(float metallic, float specular, vec3 albedo) {
return mix(vec3(dielectric), albedo, vec3(metallic));
}
-void light_compute(vec3 N, vec3 L, vec3 V, vec3 light_color, float attenuation, vec3 f0, uint orms,
+void light_compute(vec3 N, vec3 L, vec3 V, vec3 light_color, float attenuation, vec3 f0, uint orms, float specular_amount,
#ifdef LIGHT_BACKLIGHT_USED
vec3 backlight,
#endif
@@ -710,7 +710,7 @@ LIGHT_SHADER_CODE
blinn *= (shininess + 8.0) * (1.0 / (8.0 * M_PI));
float intensity = blinn;
- specular_light += light_color * intensity * attenuation;
+ specular_light += light_color * intensity * attenuation * specular_amount;
#elif defined(SPECULAR_PHONG)
@@ -721,7 +721,7 @@ LIGHT_SHADER_CODE
phong *= (shininess + 8.0) * (1.0 / (8.0 * M_PI));
float intensity = (phong) / max(4.0 * cNdotV * cNdotL, 0.75);
- specular_light += light_color * intensity * attenuation;
+ specular_light += light_color * intensity * attenuation * specular_amount;
#elif defined(SPECULAR_TOON)
@@ -730,7 +730,7 @@ LIGHT_SHADER_CODE
float mid = 1.0 - roughness;
mid *= mid;
float intensity = smoothstep(mid - roughness * 0.5, mid + roughness * 0.5, RdotV) * mid;
- diffuse_light += light_color * intensity * attenuation; // write to diffuse_light, as in toon shading you generally want no reflection
+ diffuse_light += light_color * intensity * attenuation * specular_amount; // write to diffuse_light, as in toon shading you generally want no reflection
#elif defined(SPECULAR_DISABLED)
// none..
@@ -760,7 +760,7 @@ LIGHT_SHADER_CODE
vec3 specular_brdf_NL = cNdotL * D * F * G;
- specular_light += specular_brdf_NL * light_color * attenuation;
+ specular_light += specular_brdf_NL * light_color * attenuation * specular_amount;
#endif
#if defined(LIGHT_CLEARCOAT_USED)
@@ -774,7 +774,7 @@ LIGHT_SHADER_CODE
float clearcoat_specular_brdf_NL = 0.25 * clearcoat * Gr * Fr * Dr * cNdotL;
- specular_light += clearcoat_specular_brdf_NL * light_color * attenuation;
+ specular_light += clearcoat_specular_brdf_NL * light_color * attenuation * specular_amount;
#endif
}
@@ -903,28 +903,28 @@ float get_omni_attenuation(float distance, float inv_range, float decay) {
float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) {
#ifndef USE_NO_SHADOWS
- if (lights.data[idx].shadow_enabled) {
+ if (omni_lights.data[idx].shadow_enabled) {
// there is a shadowmap
- vec3 light_rel_vec = lights.data[idx].position - vertex;
+ vec3 light_rel_vec = omni_lights.data[idx].position - vertex;
float light_length = length(light_rel_vec);
vec4 v = vec4(vertex, 1.0);
- vec4 splane = (lights.data[idx].shadow_matrix * v);
+ vec4 splane = (omni_lights.data[idx].shadow_matrix * v);
float shadow_len = length(splane.xyz); //need to remember shadow len from here
{
- vec3 nofs = normal_interp * lights.data[idx].shadow_normal_bias / lights.data[idx].inv_radius;
+ vec3 nofs = normal_interp * omni_lights.data[idx].shadow_normal_bias / omni_lights.data[idx].inv_radius;
nofs *= (1.0 - max(0.0, dot(normalize(light_rel_vec), normalize(normal_interp))));
v.xyz += nofs;
- splane = (lights.data[idx].shadow_matrix * v);
+ splane = (omni_lights.data[idx].shadow_matrix * v);
}
float shadow;
#ifdef USE_SOFT_SHADOWS
- if (lights.data[idx].soft_shadow_size > 0.0) {
+ if (omni_lights.data[idx].soft_shadow_size > 0.0) {
//soft shadow
//find blocker
@@ -944,10 +944,10 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) {
vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0);
vec3 tangent = normalize(cross(v0, normal));
vec3 bitangent = normalize(cross(tangent, normal));
- float z_norm = shadow_len * lights.data[idx].inv_radius;
+ float z_norm = shadow_len * omni_lights.data[idx].inv_radius;
- tangent *= lights.data[idx].soft_shadow_size * lights.data[idx].soft_shadow_scale;
- bitangent *= lights.data[idx].soft_shadow_size * lights.data[idx].soft_shadow_scale;
+ tangent *= omni_lights.data[idx].soft_shadow_size * omni_lights.data[idx].soft_shadow_scale;
+ bitangent *= omni_lights.data[idx].soft_shadow_size * omni_lights.data[idx].soft_shadow_scale;
for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) {
vec2 disk = disk_rotation * scene_data.penumbra_shadow_kernel[i].xy;
@@ -955,7 +955,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) {
vec3 pos = splane.xyz + tangent * disk.x + bitangent * disk.y;
pos = normalize(pos);
- vec4 uv_rect = lights.data[idx].atlas_rect;
+ vec4 uv_rect = omni_lights.data[idx].atlas_rect;
if (pos.z >= 0.0) {
pos.z += 1.0;
@@ -983,7 +983,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) {
tangent *= penumbra;
bitangent *= penumbra;
- z_norm -= lights.data[idx].inv_radius * lights.data[idx].shadow_bias;
+ z_norm -= omni_lights.data[idx].inv_radius * omni_lights.data[idx].shadow_bias;
shadow = 0.0;
for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) {
@@ -991,7 +991,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) {
vec3 pos = splane.xyz + tangent * disk.x + bitangent * disk.y;
pos = normalize(pos);
- vec4 uv_rect = lights.data[idx].atlas_rect;
+ vec4 uv_rect = omni_lights.data[idx].atlas_rect;
if (pos.z >= 0.0) {
pos.z += 1.0;
@@ -1016,7 +1016,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) {
} else {
#endif
splane.xyz = normalize(splane.xyz);
- vec4 clamp_rect = lights.data[idx].atlas_rect;
+ vec4 clamp_rect = omni_lights.data[idx].atlas_rect;
if (splane.z >= 0.0) {
splane.z += 1.0;
@@ -1030,10 +1030,10 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) {
splane.xy /= splane.z;
splane.xy = splane.xy * 0.5 + 0.5;
- splane.z = (shadow_len - lights.data[idx].shadow_bias) * lights.data[idx].inv_radius;
+ splane.z = (shadow_len - omni_lights.data[idx].shadow_bias) * omni_lights.data[idx].inv_radius;
splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw;
splane.w = 1.0; //needed? i think it should be 1 already
- shadow = sample_pcf_shadow(shadow_atlas, lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, splane);
+ shadow = sample_pcf_shadow(shadow_atlas, omni_lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, splane);
#ifdef USE_SOFT_SHADOWS
}
#endif
@@ -1068,17 +1068,17 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
inout float alpha,
#endif
inout vec3 diffuse_light, inout vec3 specular_light) {
- vec3 light_rel_vec = lights.data[idx].position - vertex;
+ vec3 light_rel_vec = omni_lights.data[idx].position - vertex;
float light_length = length(light_rel_vec);
- float omni_attenuation = get_omni_attenuation(light_length, lights.data[idx].inv_radius, lights.data[idx].attenuation);
+ float omni_attenuation = get_omni_attenuation(light_length, omni_lights.data[idx].inv_radius, omni_lights.data[idx].attenuation);
float light_attenuation = omni_attenuation;
- vec3 color = lights.data[idx].color;
+ vec3 color = omni_lights.data[idx].color;
#ifdef USE_SOFT_SHADOWS
float size_A = 0.0;
- if (lights.data[idx].size > 0.0) {
- float t = lights.data[idx].size / max(0.001, light_length);
+ if (omni_lights.data[idx].size > 0.0) {
+ float t = omni_lights.data[idx].size / max(0.001, light_length);
size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t));
}
#endif
@@ -1087,10 +1087,10 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
float transmittance_z = transmittance_depth; //no transmittance by default
transmittance_color.a *= light_attenuation;
{
- vec4 clamp_rect = lights.data[idx].atlas_rect;
+ vec4 clamp_rect = omni_lights.data[idx].atlas_rect;
//redo shadowmapping, but shrink the model a bit to avoid arctifacts
- vec4 splane = (lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * lights.data[idx].transmittance_bias, 1.0));
+ vec4 splane = (omni_lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * omni_lights.data[idx].transmittance_bias, 1.0));
shadow_len = length(splane.xyz);
splane = normalize(splane.xyz);
@@ -1104,22 +1104,22 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
splane.xy /= splane.z;
splane.xy = splane.xy * 0.5 + 0.5;
- splane.z = shadow_len * lights.data[idx].inv_radius;
+ splane.z = shadow_len * omni_lights.data[idx].inv_radius;
splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw;
splane.w = 1.0; //needed? i think it should be 1 already
float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r;
- transmittance_z = (splane.z - shadow_z) / lights.data[idx].inv_radius;
+ transmittance_z = (splane.z - shadow_z) / omni_lights.data[idx].inv_radius;
}
#endif
#if 0
- if (lights.data[idx].projector_rect != vec4(0.0)) {
- vec3 local_v = (lights.data[idx].shadow_matrix * vec4(vertex, 1.0)).xyz;
+ if (omni_lights.data[idx].projector_rect != vec4(0.0)) {
+ vec3 local_v = (omni_lights.data[idx].shadow_matrix * vec4(vertex, 1.0)).xyz;
local_v = normalize(local_v);
- vec4 atlas_rect = lights.data[idx].projector_rect;
+ vec4 atlas_rect = omni_lights.data[idx].projector_rect;
if (local_v.z >= 0.0) {
local_v.z += 1.0;
@@ -1136,7 +1136,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
vec2 proj_uv_ddx;
vec2 proj_uv_ddy;
{
- vec3 local_v_ddx = (lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddx, 1.0)).xyz;
+ vec3 local_v_ddx = (omni_lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddx, 1.0)).xyz;
local_v_ddx = normalize(local_v_ddx);
if (local_v_ddx.z >= 0.0) {
@@ -1150,7 +1150,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
proj_uv_ddx = local_v_ddx.xy * atlas_rect.zw - proj_uv;
- vec3 local_v_ddy = (lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddy, 1.0)).xyz;
+ vec3 local_v_ddy = (omni_lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddy, 1.0)).xyz;
local_v_ddy = normalize(local_v_ddy);
if (local_v_ddy.z >= 0.0) {
@@ -1172,7 +1172,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
light_attenuation *= shadow;
- light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms,
+ light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms, omni_lights.data[idx].specular_amount,
#ifdef LIGHT_BACKLIGHT_USED
backlight,
#endif
@@ -1204,37 +1204,37 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) {
#ifndef USE_NO_SHADOWS
- if (lights.data[idx].shadow_enabled) {
- vec3 light_rel_vec = lights.data[idx].position - vertex;
+ if (spot_lights.data[idx].shadow_enabled) {
+ vec3 light_rel_vec = spot_lights.data[idx].position - vertex;
float light_length = length(light_rel_vec);
- vec3 spot_dir = lights.data[idx].direction;
+ vec3 spot_dir = spot_lights.data[idx].direction;
//there is a shadowmap
vec4 v = vec4(vertex, 1.0);
- v.xyz -= spot_dir * lights.data[idx].shadow_bias;
+ v.xyz -= spot_dir * spot_lights.data[idx].shadow_bias;
- float z_norm = dot(spot_dir, -light_rel_vec) * lights.data[idx].inv_radius;
+ float z_norm = dot(spot_dir, -light_rel_vec) * spot_lights.data[idx].inv_radius;
float depth_bias_scale = 1.0 / (max(0.0001, z_norm)); //the closer to the light origin, the more you have to offset to reach 1px in the map
- vec3 normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(spot_dir, -normalize(normal_interp)))) * lights.data[idx].shadow_normal_bias * depth_bias_scale;
+ vec3 normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(spot_dir, -normalize(normal_interp)))) * spot_lights.data[idx].shadow_normal_bias * depth_bias_scale;
normal_bias -= spot_dir * dot(spot_dir, normal_bias); //only XY, no Z
v.xyz += normal_bias;
//adjust with bias
- z_norm = dot(spot_dir, v.xyz - lights.data[idx].position) * lights.data[idx].inv_radius;
+ z_norm = dot(spot_dir, v.xyz - spot_lights.data[idx].position) * spot_lights.data[idx].inv_radius;
float shadow;
- vec4 splane = (lights.data[idx].shadow_matrix * v);
+ vec4 splane = (spot_lights.data[idx].shadow_matrix * v);
splane /= splane.w;
#ifdef USE_SOFT_SHADOWS
- if (lights.data[idx].soft_shadow_size > 0.0) {
+ if (spot_lights.data[idx].soft_shadow_size > 0.0) {
//soft shadow
//find blocker
- vec2 shadow_uv = splane.xy * lights.data[idx].atlas_rect.zw + lights.data[idx].atlas_rect.xy;
+ vec2 shadow_uv = splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy;
float blocker_count = 0.0;
float blocker_average = 0.0;
@@ -1247,11 +1247,11 @@ float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) {
disk_rotation = mat2(vec2(cr, -sr), vec2(sr, cr));
}
- float uv_size = lights.data[idx].soft_shadow_size * z_norm * lights.data[idx].soft_shadow_scale;
- vec2 clamp_max = lights.data[idx].atlas_rect.xy + lights.data[idx].atlas_rect.zw;
+ float uv_size = spot_lights.data[idx].soft_shadow_size * z_norm * spot_lights.data[idx].soft_shadow_scale;
+ vec2 clamp_max = spot_lights.data[idx].atlas_rect.xy + spot_lights.data[idx].atlas_rect.zw;
for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) {
vec2 suv = shadow_uv + (disk_rotation * scene_data.penumbra_shadow_kernel[i].xy) * uv_size;
- suv = clamp(suv, lights.data[idx].atlas_rect.xy, clamp_max);
+ suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max);
float d = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), suv, 0.0).r;
if (d < z_norm) {
blocker_average += d;
@@ -1268,7 +1268,7 @@ float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) {
shadow = 0.0;
for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) {
vec2 suv = shadow_uv + (disk_rotation * scene_data.penumbra_shadow_kernel[i].xy) * uv_size;
- suv = clamp(suv, lights.data[idx].atlas_rect.xy, clamp_max);
+ suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max);
shadow += textureProj(sampler2DShadow(shadow_atlas, shadow_sampler), vec4(suv, z_norm, 1.0));
}
@@ -1282,9 +1282,9 @@ float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) {
} else {
#endif
//hard shadow
- vec4 shadow_uv = vec4(splane.xy * lights.data[idx].atlas_rect.zw + lights.data[idx].atlas_rect.xy, z_norm, 1.0);
+ vec4 shadow_uv = vec4(splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy, z_norm, 1.0);
- shadow = sample_pcf_shadow(shadow_atlas, lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, shadow_uv);
+ shadow = sample_pcf_shadow(shadow_atlas, spot_lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, shadow_uv);
#ifdef USE_SOFT_SHADOWS
}
#endif
@@ -1321,28 +1321,28 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
#endif
inout vec3 diffuse_light,
inout vec3 specular_light) {
- vec3 light_rel_vec = lights.data[idx].position - vertex;
+ vec3 light_rel_vec = spot_lights.data[idx].position - vertex;
float light_length = length(light_rel_vec);
- float spot_attenuation = get_omni_attenuation(light_length, lights.data[idx].inv_radius, lights.data[idx].attenuation);
- vec3 spot_dir = lights.data[idx].direction;
- float scos = max(dot(-normalize(light_rel_vec), spot_dir), lights.data[idx].cone_angle);
- float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - lights.data[idx].cone_angle));
- spot_attenuation *= 1.0 - pow(spot_rim, lights.data[idx].cone_attenuation);
+ float spot_attenuation = get_omni_attenuation(light_length, spot_lights.data[idx].inv_radius, spot_lights.data[idx].attenuation);
+ vec3 spot_dir = spot_lights.data[idx].direction;
+ float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_lights.data[idx].cone_angle);
+ float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_lights.data[idx].cone_angle));
+ spot_attenuation *= 1.0 - pow(spot_rim, spot_lights.data[idx].cone_attenuation);
float light_attenuation = spot_attenuation;
- vec3 color = lights.data[idx].color;
- float specular_amount = lights.data[idx].specular_amount;
+ vec3 color = spot_lights.data[idx].color;
+ float specular_amount = spot_lights.data[idx].specular_amount;
#ifdef USE_SOFT_SHADOWS
float size_A = 0.0;
- if (lights.data[idx].size > 0.0) {
- float t = lights.data[idx].size / max(0.001, light_length);
+ if (spot_lights.data[idx].size > 0.0) {
+ float t = spot_lights.data[idx].size / max(0.001, light_length);
size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t));
}
#endif
/*
- if (lights.data[idx].atlas_rect!=vec4(0.0)) {
+ if (spot_lights.data[idx].atlas_rect!=vec4(0.0)) {
//use projector texture
}
*/
@@ -1351,13 +1351,13 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
float transmittance_z = transmittance_depth;
transmittance_color.a *= light_attenuation;
{
- splane = (lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * lights.data[idx].transmittance_bias, 1.0));
+ splane = (spot_lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * spot_lights.data[idx].transmittance_bias, 1.0));
splane /= splane.w;
- splane.xy = splane.xy * lights.data[idx].atlas_rect.zw + lights.data[idx].atlas_rect.xy;
+ splane.xy = splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy;
float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r;
//reconstruct depth
- shadow_z /= lights.data[idx].inv_radius;
+ shadow_z /= spot_lights.data[idx].inv_radius;
//distance to light plane
float z = dot(spot_dir, -light_rel_vec);
transmittance_z = z - shadow_z;
@@ -1366,7 +1366,7 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
light_attenuation *= shadow;
- light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms,
+ light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms, spot_lights.data[idx].specular_amount,
#ifdef LIGHT_BACKLIGHT_USED
backlight,
#endif
@@ -1785,7 +1785,43 @@ vec4 fog_process(vec3 vertex) {
return vec4(fog_color, fog_amount);
}
+void cluster_get_item_range(uint p_offset, out uint item_min, out uint item_max, out uint item_from, out uint item_to) {
+ uint item_min_max = cluster_buffer.data[p_offset];
+ item_min = item_min_max & 0xFFFF;
+ item_max = item_min_max >> 16;
+ ;
+
+ item_from = item_min >> 5;
+ item_to = (item_max == 0) ? 0 : ((item_max - 1) >> 5) + 1; //side effect of how it is stored, as item_max 0 means no elements
+}
+
+uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) {
+ int local_min = clamp(int(z_min) - int(i) * 32, 0, 31);
+ int mask_width = min(int(z_max) - int(z_min), 32 - local_min);
+ return bitfieldInsert(uint(0), uint(0xFFFFFFFF), local_min, mask_width);
+}
+
+float blur_shadow(float shadow) {
+ return shadow;
+#if 0
+ //disabling for now, will investigate later
+ float interp_shadow = shadow;
+ if (gl_HelperInvocation) {
+ interp_shadow = -4.0; // technically anything below -4 will do but just to make sure
+ }
+
+ uvec2 fc2 = uvec2(gl_FragCoord.xy);
+ interp_shadow -= dFdx(interp_shadow) * (float(fc2.x & 1) - 0.5);
+ interp_shadow -= dFdy(interp_shadow) * (float(fc2.y & 1) - 0.5);
+
+ if (interp_shadow >= 0.0) {
+ shadow = interp_shadow;
+ }
+ return shadow;
#endif
+}
+
+#endif //!MODE_RENDER DEPTH
void main() {
#ifdef MODE_DUAL_PARABOLOID
@@ -2003,67 +2039,98 @@ FRAGMENT_SHADER_CODE
#ifndef MODE_RENDER_DEPTH
- uvec4 cluster_cell = texture(usampler3D(cluster_texture, material_samplers[SAMPLER_NEAREST_CLAMP]), vec3(screen_uv, (abs(vertex.z) - scene_data.z_near) / (scene_data.z_far - scene_data.z_near)));
+ uvec2 cluster_pos = uvec2(gl_FragCoord.xy) >> scene_data.cluster_shift;
+ uint cluster_offset = (scene_data.cluster_width * cluster_pos.y + cluster_pos.x) * (scene_data.max_cluster_element_count_div_32 + 32);
+
+ uint cluster_z = uint(clamp((-vertex.z / scene_data.z_far) * 32.0, 0.0, 31.0));
+
//used for interpolating anything cluster related
vec3 vertex_ddx = dFdx(vertex);
vec3 vertex_ddy = dFdy(vertex);
{ // process decals
- uint decal_count = cluster_cell.w >> CLUSTER_COUNTER_SHIFT;
- uint decal_pointer = cluster_cell.w & CLUSTER_POINTER_MASK;
+ uint cluster_decal_offset = cluster_offset + scene_data.cluster_type_size * 2;
- //do outside for performance and avoiding arctifacts
+ uint item_min;
+ uint item_max;
+ uint item_from;
+ uint item_to;
- for (uint i = 0; i < decal_count; i++) {
- uint decal_index = cluster_data.indices[decal_pointer + i];
- if (!bool(decals.data[decal_index].mask & draw_call.layer_mask)) {
- continue; //not masked
- }
+ cluster_get_item_range(cluster_decal_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
- vec3 uv_local = (decals.data[decal_index].xform * vec4(vertex, 1.0)).xyz;
- if (any(lessThan(uv_local, vec3(0.0, -1.0, 0.0))) || any(greaterThan(uv_local, vec3(1.0)))) {
- continue; //out of decal
- }
+#ifdef USE_SUBGROUPS
+ item_from = subgroupBroadcastFirst(subgroupMin(item_from));
+ item_to = subgroupBroadcastFirst(subgroupMax(item_to));
+#endif
- //we need ddx/ddy for mipmaps, so simulate them
- vec2 ddx = (decals.data[decal_index].xform * vec4(vertex_ddx, 0.0)).xz;
- vec2 ddy = (decals.data[decal_index].xform * vec4(vertex_ddy, 0.0)).xz;
+ for (uint i = item_from; i < item_to; i++) {
+ uint mask = cluster_buffer.data[cluster_decal_offset + i];
+ mask &= cluster_get_range_clip_mask(i, item_min, item_max);
+#ifdef USE_SUBGROUPS
+ uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
+#else
+ uint merged_mask = mask;
+#endif
- float fade = pow(1.0 - (uv_local.y > 0.0 ? uv_local.y : -uv_local.y), uv_local.y > 0.0 ? decals.data[decal_index].upper_fade : decals.data[decal_index].lower_fade);
+ while (merged_mask != 0) {
+ uint bit = findMSB(merged_mask);
+ merged_mask &= ~(1 << bit);
+#ifdef USE_SUBGROUPS
+ if (((1 << bit) & mask) == 0) { //do not process if not originally here
+ continue;
+ }
+#endif
+ uint decal_index = 32 * i + bit;
- if (decals.data[decal_index].normal_fade > 0.0) {
- fade *= smoothstep(decals.data[decal_index].normal_fade, 1.0, dot(normal_interp, decals.data[decal_index].normal) * 0.5 + 0.5);
- }
+ if (!bool(decals.data[decal_index].mask & draw_call.layer_mask)) {
+ continue; //not masked
+ }
+
+ vec3 uv_local = (decals.data[decal_index].xform * vec4(vertex, 1.0)).xyz;
+ if (any(lessThan(uv_local, vec3(0.0, -1.0, 0.0))) || any(greaterThan(uv_local, vec3(1.0)))) {
+ continue; //out of decal
+ }
+
+ //we need ddx/ddy for mipmaps, so simulate them
+ vec2 ddx = (decals.data[decal_index].xform * vec4(vertex_ddx, 0.0)).xz;
+ vec2 ddy = (decals.data[decal_index].xform * vec4(vertex_ddy, 0.0)).xz;
- if (decals.data[decal_index].albedo_rect != vec4(0.0)) {
- //has albedo
- vec4 decal_albedo = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].albedo_rect.zw + decals.data[decal_index].albedo_rect.xy, ddx * decals.data[decal_index].albedo_rect.zw, ddy * decals.data[decal_index].albedo_rect.zw);
- decal_albedo *= decals.data[decal_index].modulate;
- decal_albedo.a *= fade;
- albedo = mix(albedo, decal_albedo.rgb, decal_albedo.a * decals.data[decal_index].albedo_mix);
-
- if (decals.data[decal_index].normal_rect != vec4(0.0)) {
- vec3 decal_normal = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].normal_rect.zw + decals.data[decal_index].normal_rect.xy, ddx * decals.data[decal_index].normal_rect.zw, ddy * decals.data[decal_index].normal_rect.zw).xyz;
- decal_normal.xy = decal_normal.xy * vec2(2.0, -2.0) - vec2(1.0, -1.0); //users prefer flipped y normal maps in most authoring software
- decal_normal.z = sqrt(max(0.0, 1.0 - dot(decal_normal.xy, decal_normal.xy)));
- //convert to view space, use xzy because y is up
- decal_normal = (decals.data[decal_index].normal_xform * decal_normal.xzy).xyz;
-
- normal = normalize(mix(normal, decal_normal, decal_albedo.a));
+ float fade = pow(1.0 - (uv_local.y > 0.0 ? uv_local.y : -uv_local.y), uv_local.y > 0.0 ? decals.data[decal_index].upper_fade : decals.data[decal_index].lower_fade);
+
+ if (decals.data[decal_index].normal_fade > 0.0) {
+ fade *= smoothstep(decals.data[decal_index].normal_fade, 1.0, dot(normal_interp, decals.data[decal_index].normal) * 0.5 + 0.5);
}
- if (decals.data[decal_index].orm_rect != vec4(0.0)) {
- vec3 decal_orm = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].orm_rect.zw + decals.data[decal_index].orm_rect.xy, ddx * decals.data[decal_index].orm_rect.zw, ddy * decals.data[decal_index].orm_rect.zw).xyz;
- ao = mix(ao, decal_orm.r, decal_albedo.a);
- roughness = mix(roughness, decal_orm.g, decal_albedo.a);
- metallic = mix(metallic, decal_orm.b, decal_albedo.a);
+ if (decals.data[decal_index].albedo_rect != vec4(0.0)) {
+ //has albedo
+ vec4 decal_albedo = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].albedo_rect.zw + decals.data[decal_index].albedo_rect.xy, ddx * decals.data[decal_index].albedo_rect.zw, ddy * decals.data[decal_index].albedo_rect.zw);
+ decal_albedo *= decals.data[decal_index].modulate;
+ decal_albedo.a *= fade;
+ albedo = mix(albedo, decal_albedo.rgb, decal_albedo.a * decals.data[decal_index].albedo_mix);
+
+ if (decals.data[decal_index].normal_rect != vec4(0.0)) {
+ vec3 decal_normal = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].normal_rect.zw + decals.data[decal_index].normal_rect.xy, ddx * decals.data[decal_index].normal_rect.zw, ddy * decals.data[decal_index].normal_rect.zw).xyz;
+ decal_normal.xy = decal_normal.xy * vec2(2.0, -2.0) - vec2(1.0, -1.0); //users prefer flipped y normal maps in most authoring software
+ decal_normal.z = sqrt(max(0.0, 1.0 - dot(decal_normal.xy, decal_normal.xy)));
+ //convert to view space, use xzy because y is up
+ decal_normal = (decals.data[decal_index].normal_xform * decal_normal.xzy).xyz;
+
+ normal = normalize(mix(normal, decal_normal, decal_albedo.a));
+ }
+
+ if (decals.data[decal_index].orm_rect != vec4(0.0)) {
+ vec3 decal_orm = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].orm_rect.zw + decals.data[decal_index].orm_rect.xy, ddx * decals.data[decal_index].orm_rect.zw, ddy * decals.data[decal_index].orm_rect.zw).xyz;
+ ao = mix(ao, decal_orm.r, decal_albedo.a);
+ roughness = mix(roughness, decal_orm.g, decal_albedo.a);
+ metallic = mix(metallic, decal_orm.b, decal_albedo.a);
+ }
}
- }
- if (decals.data[decal_index].emission_rect != vec4(0.0)) {
- //emission is additive, so its independent from albedo
- emission += textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].emission_energy * fade;
+ if (decals.data[decal_index].emission_rect != vec4(0.0)) {
+ //emission is additive, so its independent from albedo
+ emission += textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].emission_energy * fade;
+ }
}
}
}
@@ -2348,12 +2415,45 @@ FRAGMENT_SHADER_CODE
vec4 reflection_accum = vec4(0.0, 0.0, 0.0, 0.0);
vec4 ambient_accum = vec4(0.0, 0.0, 0.0, 0.0);
- uint reflection_probe_count = cluster_cell.z >> CLUSTER_COUNTER_SHIFT;
- uint reflection_probe_pointer = cluster_cell.z & CLUSTER_POINTER_MASK;
+ uint cluster_reflection_offset = cluster_offset + scene_data.cluster_type_size * 3;
+
+ uint item_min;
+ uint item_max;
+ uint item_from;
+ uint item_to;
+
+ cluster_get_item_range(cluster_reflection_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
+
+#ifdef USE_SUBGROUPS
+ item_from = subgroupBroadcastFirst(subgroupMin(item_from));
+ item_to = subgroupBroadcastFirst(subgroupMax(item_to));
+#endif
+
+ for (uint i = item_from; i < item_to; i++) {
+ uint mask = cluster_buffer.data[cluster_reflection_offset + i];
+ mask &= cluster_get_range_clip_mask(i, item_min, item_max);
+#ifdef USE_SUBGROUPS
+ uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
+#else
+ uint merged_mask = mask;
+#endif
- for (uint i = 0; i < reflection_probe_count; i++) {
- uint ref_index = cluster_data.indices[reflection_probe_pointer + i];
- reflection_process(ref_index, vertex, normal, roughness, ambient_light, specular_light, ambient_accum, reflection_accum);
+ while (merged_mask != 0) {
+ uint bit = findMSB(merged_mask);
+ merged_mask &= ~(1 << bit);
+#ifdef USE_SUBGROUPS
+ if (((1 << bit) & mask) == 0) { //do not process if not originally here
+ continue;
+ }
+#endif
+ uint reflection_index = 32 * i + bit;
+
+ if (!bool(reflections.data[reflection_index].mask & draw_call.layer_mask)) {
+ continue; //not masked
+ }
+
+ reflection_process(reflection_index, vertex, normal, roughness, ambient_light, specular_light, ambient_accum, reflection_accum);
+ }
}
if (reflection_accum.a > 0.0) {
@@ -2800,7 +2900,9 @@ FRAGMENT_SHADER_CODE
shadow = float(shadow1 >> ((i - 4) * 8) & 0xFF) / 255.0;
}
- light_compute(normal, directional_lights.data[i].direction, normalize(view), directional_lights.data[i].color * directional_lights.data[i].energy, shadow, f0, orms,
+ blur_shadow(shadow);
+
+ light_compute(normal, directional_lights.data[i].direction, normalize(view), directional_lights.data[i].color * directional_lights.data[i].energy, shadow, f0, orms, 1.0,
#ifdef LIGHT_BACKLIGHT_USED
backlight,
#endif
@@ -2833,154 +2935,146 @@ FRAGMENT_SHADER_CODE
{ //omni lights
- uint omni_light_count = cluster_cell.x >> CLUSTER_COUNTER_SHIFT;
- uint omni_light_pointer = cluster_cell.x & CLUSTER_POINTER_MASK;
+ uint cluster_omni_offset = cluster_offset;
- // Do shadow and lighting in two passes to reduce register pressure
- uint shadow0 = 0;
- uint shadow1 = 0;
- uint shadow2 = 0;
+ uint item_min;
+ uint item_max;
+ uint item_from;
+ uint item_to;
- for (uint i = 0; i < 18; i++) {
- if (i >= omni_light_count) {
- break;
- }
- uint light_index = cluster_data.indices[omni_light_pointer + i];
+ cluster_get_item_range(cluster_omni_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
- if (!bool(lights.data[light_index].mask & draw_call.layer_mask)) {
- continue; //not masked
- }
+#ifdef USE_SUBGROUPS
+ item_from = subgroupBroadcastFirst(subgroupMin(item_from));
+ item_to = subgroupBroadcastFirst(subgroupMax(item_to));
+#endif
- float s = light_process_omni_shadow(light_index, vertex, view);
- if (i < 6) {
- shadow0 |= uint(clamp(s * 31.0, 0.0, 31.0)) << (i * 5);
- } else if (i < 12) {
- shadow1 |= uint(clamp(s * 31.0, 0.0, 31.0)) << ((i - 6) * 5);
- } else {
- shadow2 |= uint(clamp(s * 31.0, 0.0, 31.0)) << ((i - 12) * 5);
- }
- }
+ for (uint i = item_from; i < item_to; i++) {
+ uint mask = cluster_buffer.data[cluster_omni_offset + i];
+ mask &= cluster_get_range_clip_mask(i, item_min, item_max);
+#ifdef USE_SUBGROUPS
+ uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
+#else
+ uint merged_mask = mask;
+#endif
- for (uint i = 0; i < 18; i++) {
- if (i == omni_light_count) {
- break;
- }
- uint light_index = cluster_data.indices[omni_light_pointer + i];
+ while (merged_mask != 0) {
+ uint bit = findMSB(merged_mask);
+ merged_mask &= ~(1 << bit);
+#ifdef USE_SUBGROUPS
+ if (((1 << bit) & mask) == 0) { //do not process if not originally here
+ continue;
+ }
+#endif
+ uint light_index = 32 * i + bit;
- if (!bool(lights.data[light_index].mask & draw_call.layer_mask)) {
- continue; //not masked
- }
+ if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) {
+ continue; //not masked
+ }
- float shadow;
- if (i < 6) {
- shadow = float(shadow0 >> (i * 5) & 0x1F) / 31.0;
- } else if (i < 12) {
- shadow = float(shadow1 >> ((i - 6) * 5) & 0x1F) / 31.0;
- } else {
- shadow = float(shadow1 >> ((i - 12) * 5) & 0x1F) / 31.0;
- }
+ float shadow = light_process_omni_shadow(light_index, vertex, view);
+
+ shadow = blur_shadow(shadow);
- light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow,
+ light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow,
#ifdef LIGHT_BACKLIGHT_USED
- backlight,
+ backlight,
#endif
#ifdef LIGHT_TRANSMITTANCE_USED
- transmittance_color,
- transmittance_depth,
- transmittance_curve,
- transmittance_boost,
+ transmittance_color,
+ transmittance_depth,
+ transmittance_curve,
+ transmittance_boost,
#endif
#ifdef LIGHT_RIM_USED
- rim,
- rim_tint,
- albedo,
+ rim,
+ rim_tint,
+ albedo,
#endif
#ifdef LIGHT_CLEARCOAT_USED
- clearcoat, clearcoat_gloss,
+ clearcoat, clearcoat_gloss,
#endif
#ifdef LIGHT_ANISOTROPY_USED
- tangent, binormal, anisotropy,
+ tangent, binormal, anisotropy,
#endif
#ifdef USE_SHADOW_TO_OPACITY
- alpha,
+ alpha,
#endif
- diffuse_light, specular_light);
+ diffuse_light, specular_light);
+ }
}
}
{ //spot lights
- uint spot_light_count = cluster_cell.y >> CLUSTER_COUNTER_SHIFT;
- uint spot_light_pointer = cluster_cell.y & CLUSTER_POINTER_MASK;
- // Do shadow and lighting in two passes to reduce register pressure
- uint shadow0 = 0;
- uint shadow1 = 0;
- uint shadow2 = 0;
+ uint cluster_spot_offset = cluster_offset + scene_data.cluster_type_size;
- for (uint i = 0; i < 18; i++) {
- if (i >= spot_light_count) {
- break;
- }
- uint light_index = cluster_data.indices[spot_light_pointer + i];
+ uint item_min;
+ uint item_max;
+ uint item_from;
+ uint item_to;
- if (!bool(lights.data[light_index].mask & draw_call.layer_mask)) {
- continue; //not masked
- }
+ cluster_get_item_range(cluster_spot_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
- float s = light_process_spot_shadow(light_index, vertex, view);
- if (i < 6) {
- shadow0 |= uint(clamp(s * 31.0, 0.0, 31.0)) << (i * 5);
- } else if (i < 12) {
- shadow1 |= uint(clamp(s * 31.0, 0.0, 31.0)) << ((i - 6) * 5);
- } else {
- shadow2 |= uint(clamp(s * 31.0, 0.0, 31.0)) << ((i - 12) * 5);
- }
- }
+#ifdef USE_SUBGROUPS
+ item_from = subgroupBroadcastFirst(subgroupMin(item_from));
+ item_to = subgroupBroadcastFirst(subgroupMax(item_to));
+#endif
- for (uint i = 0; i < 18; i++) {
- if (i == spot_light_count) {
- break;
- }
- uint light_index = cluster_data.indices[spot_light_pointer + i];
+ for (uint i = item_from; i < item_to; i++) {
+ uint mask = cluster_buffer.data[cluster_spot_offset + i];
+ mask &= cluster_get_range_clip_mask(i, item_min, item_max);
+#ifdef USE_SUBGROUPS
+ uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
+#else
+ uint merged_mask = mask;
+#endif
- if (!bool(lights.data[light_index].mask & draw_call.layer_mask)) {
- continue; //not masked
- }
+ while (merged_mask != 0) {
+ uint bit = findMSB(merged_mask);
+ merged_mask &= ~(1 << bit);
+#ifdef USE_SUBGROUPS
+ if (((1 << bit) & mask) == 0) { //do not process if not originally here
+ continue;
+ }
+#endif
- float shadow;
- if (i < 6) {
- shadow = float(shadow0 >> (i * 5) & 0x1F) / 31.0;
- } else if (i < 12) {
- shadow = float(shadow1 >> ((i - 6) * 5) & 0x1F) / 31.0;
- } else {
- shadow = float(shadow1 >> ((i - 12) * 5) & 0x1F) / 31.0;
- }
+ uint light_index = 32 * i + bit;
- light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow,
+ if (!bool(spot_lights.data[light_index].mask & draw_call.layer_mask)) {
+ continue; //not masked
+ }
+
+ float shadow = light_process_spot_shadow(light_index, vertex, view);
+
+ shadow = blur_shadow(shadow);
+
+ light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow,
#ifdef LIGHT_BACKLIGHT_USED
- backlight,
+ backlight,
#endif
#ifdef LIGHT_TRANSMITTANCE_USED
- transmittance_color,
- transmittance_depth,
- transmittance_curve,
- transmittance_boost,
+ transmittance_color,
+ transmittance_depth,
+ transmittance_curve,
+ transmittance_boost,
#endif
#ifdef LIGHT_RIM_USED
- rim,
- rim_tint,
- albedo,
+ rim,
+ rim_tint,
+ albedo,
#endif
#ifdef LIGHT_CLEARCOAT_USED
- clearcoat, clearcoat_gloss,
+ clearcoat, clearcoat_gloss,
#endif
#ifdef LIGHT_ANISOTROPY_USED
- tangent, binormal, anisotropy,
+ tangent, binormal, anisotropy,
#endif
#ifdef USE_SHADOW_TO_OPACITY
- alpha,
+ alpha,
#endif
- diffuse_light, specular_light);
+ diffuse_light, specular_light);
+ }
}
}
diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl
index 87ce74ba88..a37e32e1fc 100644
--- a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl
+++ b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl
@@ -3,6 +3,15 @@
#define MAX_GI_PROBES 8
+#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic)
+
+#extension GL_KHR_shader_subgroup_ballot : enable
+#extension GL_KHR_shader_subgroup_arithmetic : enable
+
+#define USE_SUBGROUPS
+
+#endif
+
#include "cluster_data_inc.glsl"
#if !defined(MODE_RENDER_DEPTH) || defined(MODE_RENDER_MATERIAL) || defined(MODE_RENDER_SDF) || defined(MODE_RENDER_NORMAL_ROUGHNESS) || defined(MODE_RENDER_GIPROBE) || defined(TANGENT_USED) || defined(NORMAL_MAP_USED)
@@ -52,6 +61,11 @@ layout(set = 0, binding = 3, std140) uniform SceneData {
vec2 viewport_size;
vec2 screen_pixel_size;
+ uint cluster_shift;
+ uint cluster_width;
+ uint cluster_type_size;
+ uint max_cluster_element_count_div_32;
+
//use vec4s because std140 doesnt play nice with vec2s, z and w are wasted
vec4 directional_penumbra_shadow_kernel[32];
vec4 directional_soft_shadow_kernel[32];
@@ -139,17 +153,22 @@ scene_data;
#define INSTANCE_FLAGS_SKELETON (1 << 19)
#define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 20)
-layout(set = 0, binding = 5, std430) restrict readonly buffer Lights {
+layout(set = 0, binding = 5, std430) restrict readonly buffer OmniLights {
+ LightData data[];
+}
+omni_lights;
+
+layout(set = 0, binding = 6, std430) restrict readonly buffer SpotLights {
LightData data[];
}
-lights;
+spot_lights;
-layout(set = 0, binding = 6) buffer restrict readonly ReflectionProbeData {
+layout(set = 0, binding = 7) buffer restrict readonly ReflectionProbeData {
ReflectionData data[];
}
reflections;
-layout(set = 0, binding = 7, std140) uniform DirectionalLights {
+layout(set = 0, binding = 8, std140) uniform DirectionalLights {
DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS];
}
directional_lights;
@@ -183,16 +202,9 @@ layout(set = 0, binding = 14, std430) restrict readonly buffer Decals {
}
decals;
-layout(set = 0, binding = 15) uniform utexture3D cluster_texture;
-
-layout(set = 0, binding = 16, std430) restrict readonly buffer ClusterData {
- uint indices[];
-}
-cluster_data;
+layout(set = 0, binding = 15) uniform texture2D directional_shadow_atlas;
-layout(set = 0, binding = 17) uniform texture2D directional_shadow_atlas;
-
-layout(set = 0, binding = 18, std430) restrict readonly buffer GlobalVariableData {
+layout(set = 0, binding = 16, std430) restrict readonly buffer GlobalVariableData {
vec4 data[];
}
global_variables;
@@ -206,7 +218,7 @@ struct SDFGIProbeCascadeData {
float to_cell; // 1/bounds * grid_size
};
-layout(set = 0, binding = 19, std140) uniform SDFGI {
+layout(set = 0, binding = 17, std140) uniform SDFGI {
vec3 grid_size;
uint max_cascades;
@@ -262,14 +274,19 @@ layout(set = 1, binding = 3) uniform texture2DArray lightmap_textures[MAX_LIGHTM
layout(set = 1, binding = 4) uniform texture3D gi_probe_textures[MAX_GI_PROBES];
#endif
+layout(set = 1, binding = 5, std430) buffer restrict readonly ClusterBuffer {
+ uint data[];
+}
+cluster_buffer;
+
/* Set 3, Render Buffers */
#ifdef MODE_RENDER_SDF
-layout(r16ui, set = 1, binding = 5) uniform restrict writeonly uimage3D albedo_volume_grid;
-layout(r32ui, set = 1, binding = 6) uniform restrict writeonly uimage3D emission_grid;
-layout(r32ui, set = 1, binding = 7) uniform restrict writeonly uimage3D emission_aniso_grid;
-layout(r32ui, set = 1, binding = 8) uniform restrict uimage3D geom_facing_grid;
+layout(r16ui, set = 1, binding = 6) uniform restrict writeonly uimage3D albedo_volume_grid;
+layout(r32ui, set = 1, binding = 7) uniform restrict writeonly uimage3D emission_grid;
+layout(r32ui, set = 1, binding = 8) uniform restrict writeonly uimage3D emission_aniso_grid;
+layout(r32ui, set = 1, binding = 9) uniform restrict uimage3D geom_facing_grid;
//still need to be present for shaders that use it, so remap them to something
#define depth_buffer shadow_atlas
@@ -278,17 +295,17 @@ layout(r32ui, set = 1, binding = 8) uniform restrict uimage3D geom_facing_grid;
#else
-layout(set = 1, binding = 5) uniform texture2D depth_buffer;
-layout(set = 1, binding = 6) uniform texture2D color_buffer;
+layout(set = 1, binding = 6) uniform texture2D depth_buffer;
+layout(set = 1, binding = 7) uniform texture2D color_buffer;
#ifndef LOW_END_MODE
-layout(set = 1, binding = 7) uniform texture2D normal_roughness_buffer;
-layout(set = 1, binding = 8) uniform texture2D ao_buffer;
-layout(set = 1, binding = 9) uniform texture2D ambient_buffer;
-layout(set = 1, binding = 10) uniform texture2D reflection_buffer;
-layout(set = 1, binding = 11) uniform texture2DArray sdfgi_lightprobe_texture;
-layout(set = 1, binding = 12) uniform texture3D sdfgi_occlusion_cascades;
+layout(set = 1, binding = 8) uniform texture2D normal_roughness_buffer;
+layout(set = 1, binding = 9) uniform texture2D ao_buffer;
+layout(set = 1, binding = 10) uniform texture2D ambient_buffer;
+layout(set = 1, binding = 11) uniform texture2D reflection_buffer;
+layout(set = 1, binding = 12) uniform texture2DArray sdfgi_lightprobe_texture;
+layout(set = 1, binding = 13) uniform texture3D sdfgi_occlusion_cascades;
struct GIProbeData {
mat4 xform;
@@ -306,12 +323,12 @@ struct GIProbeData {
uint mipmaps;
};
-layout(set = 1, binding = 13, std140) uniform GIProbes {
+layout(set = 1, binding = 14, std140) uniform GIProbes {
GIProbeData data[MAX_GI_PROBES];
}
gi_probes;
-layout(set = 1, binding = 14) uniform texture3D volumetric_fog_texture;
+layout(set = 1, binding = 15) uniform texture3D volumetric_fog_texture;
#endif // LOW_END_MODE
diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl
index 30dbf5871f..ed0a8a4b86 100644
--- a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl
+++ b/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl
@@ -143,10 +143,78 @@ void main() {
uint voxel_albedo = process_voxels.data[voxel_index].albedo;
vec3 albedo = vec3(uvec3(voxel_albedo >> 10, voxel_albedo >> 5, voxel_albedo) & uvec3(0x1F)) / float(0x1F);
- vec3 light_accum[6];
-
+ vec3 light_accum[6] = vec3[](vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0));
uint valid_aniso = (voxel_albedo >> 15) & 0x3F;
+ const vec3 aniso_dir[6] = vec3[](
+ vec3(1, 0, 0),
+ vec3(0, 1, 0),
+ vec3(0, 0, 1),
+ vec3(-1, 0, 0),
+ vec3(0, -1, 0),
+ vec3(0, 0, -1));
+
+ // Add indirect light first, in order to save computation resources
+#ifdef MODE_PROCESS_DYNAMIC
+ if (params.multibounce) {
+ vec3 pos = (vec3(positioni) + vec3(0.5)) * float(params.probe_axis_size - 1) / params.grid_size;
+ ivec3 probe_base_pos = ivec3(pos);
+
+ float weight_accum[6] = float[](0, 0, 0, 0, 0, 0);
+
+ ivec3 tex_pos = ivec3(probe_base_pos.xy, int(params.cascade));
+ tex_pos.x += probe_base_pos.z * int(params.probe_axis_size);
+
+ tex_pos.xy = tex_pos.xy * (OCT_SIZE + 2) + ivec2(1);
+
+ vec3 base_tex_posf = vec3(tex_pos);
+ vec2 tex_pixel_size = 1.0 / vec2(ivec2((OCT_SIZE + 2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE + 2) * params.probe_axis_size));
+ vec3 probe_uv_offset = (ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx;
+
+ for (uint j = 0; j < 8; j++) {
+ ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1);
+ ivec3 probe_posi = probe_base_pos;
+ probe_posi += offset;
+
+ // Compute weight
+
+ vec3 probe_pos = vec3(probe_posi);
+ vec3 probe_to_pos = pos - probe_pos;
+ vec3 probe_dir = normalize(-probe_to_pos);
+
+ // Compute lightprobe texture position
+
+ vec3 trilinear = vec3(1.0) - abs(probe_to_pos);
+
+ for (uint k = 0; k < 6; k++) {
+ if (bool(valid_aniso & (1 << k))) {
+ vec3 n = aniso_dir[k];
+ float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(n, probe_dir));
+
+ vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0);
+ tex_posf.xy *= tex_pixel_size;
+
+ vec3 pos_uvw = tex_posf;
+ pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy;
+ pos_uvw.x += float(offset.z) * probe_uv_offset.z;
+ vec3 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb;
+
+ light_accum[k] += indirect_light * weight;
+ weight_accum[k] += weight;
+ }
+ }
+ }
+
+ for (uint k = 0; k < 6; k++) {
+ if (weight_accum[k] > 0.0) {
+ light_accum[k] /= weight_accum[k];
+ light_accum[k] *= albedo;
+ }
+ }
+ }
+
+#endif
+
{
uint rgbe = process_voxels.data[voxel_index].light;
@@ -162,18 +230,10 @@ void main() {
uint aniso = process_voxels.data[voxel_index].light_aniso;
for (uint i = 0; i < 6; i++) {
float strength = ((aniso >> (i * 5)) & 0x1F) / float(0x1F);
- light_accum[i] = l * strength;
+ light_accum[i] += l * strength;
}
}
- const vec3 aniso_dir[6] = vec3[](
- vec3(1, 0, 0),
- vec3(0, 1, 0),
- vec3(0, 0, 1),
- vec3(-1, 0, 0),
- vec3(0, -1, 0),
- vec3(0, 0, -1));
-
// Raytrace light
vec3 pos_to_uvw = 1.0 / params.grid_size;
@@ -292,65 +352,6 @@ void main() {
}
}
- // Add indirect light
-
- if (params.multibounce) {
- vec3 pos = (vec3(positioni) + vec3(0.5)) * float(params.probe_axis_size - 1) / params.grid_size;
- ivec3 probe_base_pos = ivec3(pos);
-
- vec4 probe_accum[6] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0));
- float weight_accum[6] = float[](0, 0, 0, 0, 0, 0);
-
- ivec3 tex_pos = ivec3(probe_base_pos.xy, int(params.cascade));
- tex_pos.x += probe_base_pos.z * int(params.probe_axis_size);
-
- tex_pos.xy = tex_pos.xy * (OCT_SIZE + 2) + ivec2(1);
-
- vec3 base_tex_posf = vec3(tex_pos);
- vec2 tex_pixel_size = 1.0 / vec2(ivec2((OCT_SIZE + 2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE + 2) * params.probe_axis_size));
- vec3 probe_uv_offset = (ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx;
-
- for (uint j = 0; j < 8; j++) {
- ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1);
- ivec3 probe_posi = probe_base_pos;
- probe_posi += offset;
-
- // Compute weight
-
- vec3 probe_pos = vec3(probe_posi);
- vec3 probe_to_pos = pos - probe_pos;
- vec3 probe_dir = normalize(-probe_to_pos);
-
- // Compute lightprobe texture position
-
- vec3 trilinear = vec3(1.0) - abs(probe_to_pos);
-
- for (uint k = 0; k < 6; k++) {
- if (bool(valid_aniso & (1 << k))) {
- vec3 n = aniso_dir[k];
- float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(n, probe_dir));
-
- vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0);
- tex_posf.xy *= tex_pixel_size;
-
- vec3 pos_uvw = tex_posf;
- pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy;
- pos_uvw.x += float(offset.z) * probe_uv_offset.z;
- vec4 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0);
-
- probe_accum[k] += indirect_light * weight;
- weight_accum[k] += weight;
- }
- }
- }
-
- for (uint k = 0; k < 6; k++) {
- if (weight_accum[k] > 0.0) {
- light_accum[k] += probe_accum[k].rgb * albedo / weight_accum[k];
- }
- }
- }
-
// Store the light in the light texture
float lumas[6];
diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl
index d516ab22c3..67630a3aa1 100644
--- a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl
+++ b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl
@@ -136,12 +136,24 @@ uint rgbe_encode(vec3 color) {
return (uint(sRed) & 0x1FF) | ((uint(sGreen) & 0x1FF) << 9) | ((uint(sBlue) & 0x1FF) << 18) | ((uint(exps) & 0x1F) << 27);
}
+struct SH {
+#if (SH_SIZE == 16)
+ float c[48];
+#else
+ float c[28];
+#endif
+};
+
+shared SH sh_accum[64]; //8x8
+
void main() {
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
if (any(greaterThanEqual(pos, params.image_size))) { //too large, do nothing
return;
}
+ uint probe_index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 8;
+
#ifdef MODE_PROCESS
float probe_cell_size = float(params.grid_size.x / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell;
@@ -154,27 +166,9 @@ void main() {
vec3 probe_pos = cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size;
vec3 pos_to_uvw = 1.0 / params.grid_size;
- vec4 probe_sh_accum[SH_SIZE] = vec4[](
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0)
-#if (SH_SIZE == 16)
- ,
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0)
-#endif
- );
+ for (uint i = 0; i < SH_SIZE * 3; i++) {
+ sh_accum[probe_index].c[i] = 0.0;
+ }
// quickly ensure each probe has a different "offset" for the vogel function, based on integer world position
uvec3 h3 = hash3(uvec3(params.world_offset + probe_cell));
@@ -278,33 +272,33 @@ void main() {
}
vec3 ray_dir2 = ray_dir * ray_dir;
- float c[SH_SIZE] = float[](
-
- 0.282095, //l0
- 0.488603 * ray_dir.y, //l1n1
- 0.488603 * ray_dir.z, //l1n0
- 0.488603 * ray_dir.x, //l1p1
- 1.092548 * ray_dir.x * ray_dir.y, //l2n2
- 1.092548 * ray_dir.y * ray_dir.z, //l2n1
- 0.315392 * (3.0 * ray_dir2.z - 1.0), //l20
- 1.092548 * ray_dir.x * ray_dir.z, //l2p1
- 0.546274 * (ray_dir2.x - ray_dir2.y) //l2p2
+
+#define SH_ACCUM(m_idx, m_value) \
+ { \
+ vec3 l = light.rgb * (m_value); \
+ sh_accum[probe_index].c[m_idx * 3 + 0] += l.r; \
+ sh_accum[probe_index].c[m_idx * 3 + 1] += l.g; \
+ sh_accum[probe_index].c[m_idx * 3 + 2] += l.b; \
+ }
+ SH_ACCUM(0, 0.282095); //l0
+ SH_ACCUM(1, 0.488603 * ray_dir.y); //l1n1
+ SH_ACCUM(2, 0.488603 * ray_dir.z); //l1n0
+ SH_ACCUM(3, 0.488603 * ray_dir.x); //l1p1
+ SH_ACCUM(4, 1.092548 * ray_dir.x * ray_dir.y); //l2n2
+ SH_ACCUM(5, 1.092548 * ray_dir.y * ray_dir.z); //l2n1
+ SH_ACCUM(6, 0.315392 * (3.0 * ray_dir2.z - 1.0)); //l20
+ SH_ACCUM(7, 1.092548 * ray_dir.x * ray_dir.z); //l2p1
+ SH_ACCUM(8, 0.546274 * (ray_dir2.x - ray_dir2.y)); //l2p2
#if (SH_SIZE == 16)
- ,
- 0.590043 * ray_dir.y * (3.0f * ray_dir2.x - ray_dir2.y),
- 2.890611 * ray_dir.y * ray_dir.x * ray_dir.z,
- 0.646360 * ray_dir.y * (-1.0f + 5.0f * ray_dir2.z),
- 0.373176 * (5.0f * ray_dir2.z * ray_dir.z - 3.0f * ray_dir.z),
- 0.457045 * ray_dir.x * (-1.0f + 5.0f * ray_dir2.z),
- 1.445305 * (ray_dir2.x - ray_dir2.y) * ray_dir.z,
- 0.590043 * ray_dir.x * (ray_dir2.x - 3.0f * ray_dir2.y)
+ SH_ACCUM(9, 0.590043 * ray_dir.y * (3.0f * ray_dir2.x - ray_dir2.y));
+ SH_ACCUM(10, 2.890611 * ray_dir.y * ray_dir.x * ray_dir.z);
+ SH_ACCUM(11, 0.646360 * ray_dir.y * (-1.0f + 5.0f * ray_dir2.z));
+ SH_ACCUM(12, 0.373176 * (5.0f * ray_dir2.z * ray_dir.z - 3.0f * ray_dir.z));
+ SH_ACCUM(13, 0.457045 * ray_dir.x * (-1.0f + 5.0f * ray_dir2.z));
+ SH_ACCUM(14, 1.445305 * (ray_dir2.x - ray_dir2.y) * ray_dir.z);
+ SH_ACCUM(15, 0.590043 * ray_dir.x * (ray_dir2.x - 3.0f * ray_dir2.y));
#endif
- );
-
- for (uint j = 0; j < SH_SIZE; j++) {
- probe_sh_accum[j] += light * c[j];
- }
}
for (uint i = 0; i < SH_SIZE; i++) {
@@ -312,7 +306,7 @@ void main() {
ivec3 prev_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(params.history_index));
ivec2 average_pos = prev_pos.xy;
- vec4 value = probe_sh_accum[i] * 4.0 / float(params.ray_count);
+ vec4 value = vec4(sh_accum[probe_index].c[i * 3 + 0], sh_accum[probe_index].c[i * 3 + 1], sh_accum[probe_index].c[i * 3 + 2], 1.0) * 4.0 / float(params.ray_count);
ivec4 ivalue = clamp(ivec4(value * float(1 << HISTORY_BITS)), -32768, 32767); //clamp to 16 bits, so higher values don't break average
@@ -344,37 +338,11 @@ void main() {
ivec2 oct_pos = (pos / OCT_SIZE) * (OCT_SIZE + 2) + ivec2(1);
ivec2 local_pos = pos % OCT_SIZE;
- //fill the spherical harmonic
- vec4 sh[SH_SIZE];
-
- for (uint i = 0; i < SH_SIZE; i++) {
- // store in history texture
- ivec2 average_pos = sh_pos + ivec2(0, i);
- ivec4 average = imageLoad(lightprobe_average_texture, average_pos);
-
- sh[i] = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS);
- }
-
//compute the octahedral normal for this texel
vec3 normal = octahedron_encode(vec2(local_pos) / float(OCT_SIZE));
- /*
+
// read the spherical harmonic
- const float c1 = 0.429043;
- const float c2 = 0.511664;
- const float c3 = 0.743125;
- const float c4 = 0.886227;
- const float c5 = 0.247708;
- vec4 light = (c1 * sh[8] * (normal.x * normal.x - normal.y * normal.y) +
- c3 * sh[6] * normal.z * normal.z +
- c4 * sh[0] -
- c5 * sh[6] +
- 2.0 * c1 * sh[4] * normal.x * normal.y +
- 2.0 * c1 * sh[7] * normal.x * normal.z +
- 2.0 * c1 * sh[5] * normal.y * normal.z +
- 2.0 * c2 * sh[3] * normal.x +
- 2.0 * c2 * sh[1] * normal.y +
- 2.0 * c2 * sh[2] * normal.z);
-*/
+
vec3 normal2 = normal * normal;
float c[SH_SIZE] = float[](
@@ -426,7 +394,14 @@ void main() {
vec3 radiance = vec3(0.0);
for (uint i = 0; i < SH_SIZE; i++) {
- vec3 m = sh[i].rgb * c[i] * 4.0;
+ // store in history texture
+ ivec2 average_pos = sh_pos + ivec2(0, i);
+ ivec4 average = imageLoad(lightprobe_average_texture, average_pos);
+
+ vec4 sh = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS);
+
+ vec3 m = sh.rgb * c[i] * 4.0;
+
irradiance += m * l_mult[i];
radiance += m;
}
diff --git a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl
index 6215e721ce..aa32809a06 100644
--- a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl
+++ b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl
@@ -4,6 +4,15 @@
VERSION_DEFINES
+/* Do not use subgroups here, seems there is not much advantage and causes glitches
+#extension GL_KHR_shader_subgroup_ballot: enable
+#extension GL_KHR_shader_subgroup_arithmetic: enable
+
+#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic)
+#define USE_SUBGROUPS
+#endif
+*/
+
#if defined(MODE_FOG) || defined(MODE_FILTER)
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
@@ -23,22 +32,25 @@ layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;
layout(set = 0, binding = 1) uniform texture2D shadow_atlas;
layout(set = 0, binding = 2) uniform texture2D directional_shadow_atlas;
-layout(set = 0, binding = 3, std430) restrict readonly buffer Lights {
+layout(set = 0, binding = 3, std430) restrict readonly buffer OmniLights {
LightData data[];
}
-lights;
+omni_lights;
-layout(set = 0, binding = 4, std140) uniform DirectionalLights {
+layout(set = 0, binding = 4, std430) restrict readonly buffer SpotLights {
+ LightData data[];
+}
+spot_lights;
+
+layout(set = 0, binding = 5, std140) uniform DirectionalLights {
DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS];
}
directional_lights;
-layout(set = 0, binding = 5) uniform utexture3D cluster_texture;
-
-layout(set = 0, binding = 6, std430) restrict readonly buffer ClusterData {
- uint indices[];
+layout(set = 0, binding = 6, std430) buffer restrict readonly ClusterBuffer {
+ uint data[];
}
-cluster_data;
+cluster_buffer;
layout(set = 0, binding = 7) uniform sampler linear_sampler;
@@ -132,7 +144,7 @@ layout(set = 1, binding = 2) uniform texture3D sdfgi_occlusion_texture;
#endif //SDFGI
-layout(push_constant, binding = 0, std430) uniform Params {
+layout(set = 0, binding = 14, std140) uniform Params {
vec2 fog_frustum_size_begin;
vec2 fog_frustum_size_end;
@@ -150,7 +162,14 @@ layout(push_constant, binding = 0, std430) uniform Params {
float detail_spread;
float gi_inject;
uint max_gi_probes;
- uint pad;
+ uint cluster_type_size;
+
+ vec2 screen_size;
+ uint cluster_shift;
+ uint cluster_width;
+
+ uvec3 cluster_pad;
+ uint max_cluster_element_count_div_32;
mat3x4 cam_rotation;
}
@@ -178,6 +197,22 @@ float get_omni_attenuation(float distance, float inv_range, float decay) {
return nd * pow(max(distance, 0.0001), -decay);
}
+void cluster_get_item_range(uint p_offset, out uint item_min, out uint item_max, out uint item_from, out uint item_to) {
+ uint item_min_max = cluster_buffer.data[p_offset];
+ item_min = item_min_max & 0xFFFF;
+ item_max = item_min_max >> 16;
+ ;
+
+ item_from = item_min >> 5;
+ item_to = (item_max == 0) ? 0 : ((item_max - 1) >> 5) + 1; //side effect of how it is stored, as item_max 0 means no elements
+}
+
+uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) {
+ int local_min = clamp(int(z_min) - int(i) * 32, 0, 31);
+ int mask_width = min(int(z_max) - int(z_min), 32 - local_min);
+ return bitfieldInsert(uint(0), uint(0xFFFFFFFF), local_min, mask_width);
+}
+
void main() {
vec3 fog_cell_size = 1.0 / vec3(params.fog_volume_size);
@@ -193,6 +228,12 @@ void main() {
//posf += mix(vec3(0.0),vec3(1.0),0.3) * hash3f(uvec3(pos)) * 2.0 - 1.0;
vec3 fog_unit_pos = posf * fog_cell_size + fog_cell_size * 0.5; //center of voxels
+
+ uvec2 screen_pos = uvec2(fog_unit_pos.xy * params.screen_size);
+ uvec2 cluster_pos = screen_pos >> params.cluster_shift;
+ uint cluster_offset = (params.cluster_width * cluster_pos.y + cluster_pos.x) * (params.max_cluster_element_count_div_32 + 32);
+ //positions in screen are too spread apart, no hopes for optimizing with subgroups
+
fog_unit_pos.z = pow(fog_unit_pos.z, params.detail_spread);
vec3 view_pos;
@@ -200,6 +241,8 @@ void main() {
view_pos.z = -params.fog_frustum_end * fog_unit_pos.z;
view_pos.y = -view_pos.y;
+ uint cluster_z = uint(clamp((abs(view_pos.z) / params.z_far) * 32.0, 0.0, 31.0));
+
vec3 total_light = params.light_color;
float total_density = params.base_density;
@@ -266,95 +309,160 @@ void main() {
//compute lights from cluster
- vec3 cluster_pos;
- cluster_pos.xy = fog_unit_pos.xy;
- cluster_pos.z = clamp((abs(view_pos.z) - params.z_near) / (params.z_far - params.z_near), 0.0, 1.0);
+ { //omni lights
+
+ uint cluster_omni_offset = cluster_offset;
+
+ uint item_min;
+ uint item_max;
+ uint item_from;
+ uint item_to;
+
+ cluster_get_item_range(cluster_omni_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
+
+#ifdef USE_SUBGROUPS
+ item_from = subgroupBroadcastFirst(subgroupMin(item_from));
+ item_to = subgroupBroadcastFirst(subgroupMax(item_to));
+#endif
- uvec4 cluster_cell = texture(usampler3D(cluster_texture, linear_sampler), cluster_pos);
+ for (uint i = item_from; i < item_to; i++) {
+ uint mask = cluster_buffer.data[cluster_omni_offset + i];
+ mask &= cluster_get_range_clip_mask(i, item_min, item_max);
+#ifdef USE_SUBGROUPS
+ uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
+#else
+ uint merged_mask = mask;
+#endif
- uint omni_light_count = cluster_cell.x >> CLUSTER_COUNTER_SHIFT;
- uint omni_light_pointer = cluster_cell.x & CLUSTER_POINTER_MASK;
+ while (merged_mask != 0) {
+ uint bit = findMSB(merged_mask);
+ merged_mask &= ~(1 << bit);
+#ifdef USE_SUBGROUPS
+ if (((1 << bit) & mask) == 0) { //do not process if not originally here
+ continue;
+ }
+#endif
+ uint light_index = 32 * i + bit;
- for (uint i = 0; i < omni_light_count; i++) {
- uint light_index = cluster_data.indices[omni_light_pointer + i];
+ //if (!bool(omni_omni_lights.data[light_index].mask & draw_call.layer_mask)) {
+ // continue; //not masked
+ //}
- vec3 light_pos = lights.data[i].position;
- float d = distance(lights.data[i].position, view_pos);
- float shadow_attenuation = 1.0;
+ vec3 light_pos = omni_lights.data[light_index].position;
+ float d = distance(omni_lights.data[light_index].position, view_pos);
+ float shadow_attenuation = 1.0;
- if (d * lights.data[i].inv_radius < 1.0) {
- float attenuation = get_omni_attenuation(d, lights.data[i].inv_radius, lights.data[i].attenuation);
+ if (d * omni_lights.data[light_index].inv_radius < 1.0) {
+ float attenuation = get_omni_attenuation(d, omni_lights.data[light_index].inv_radius, omni_lights.data[light_index].attenuation);
- vec3 light = lights.data[i].color / M_PI;
+ vec3 light = omni_lights.data[light_index].color / M_PI;
- if (lights.data[i].shadow_enabled) {
- //has shadow
- vec4 v = vec4(view_pos, 1.0);
+ if (omni_lights.data[light_index].shadow_enabled) {
+ //has shadow
+ vec4 v = vec4(view_pos, 1.0);
- vec4 splane = (lights.data[i].shadow_matrix * v);
- float shadow_len = length(splane.xyz); //need to remember shadow len from here
+ vec4 splane = (omni_lights.data[light_index].shadow_matrix * v);
+ float shadow_len = length(splane.xyz); //need to remember shadow len from here
- splane.xyz = normalize(splane.xyz);
- vec4 clamp_rect = lights.data[i].atlas_rect;
+ splane.xyz = normalize(splane.xyz);
+ vec4 clamp_rect = omni_lights.data[light_index].atlas_rect;
- if (splane.z >= 0.0) {
- splane.z += 1.0;
+ if (splane.z >= 0.0) {
+ splane.z += 1.0;
- clamp_rect.y += clamp_rect.w;
+ clamp_rect.y += clamp_rect.w;
- } else {
- splane.z = 1.0 - splane.z;
- }
+ } else {
+ splane.z = 1.0 - splane.z;
+ }
- splane.xy /= splane.z;
+ splane.xy /= splane.z;
- splane.xy = splane.xy * 0.5 + 0.5;
- splane.z = shadow_len * lights.data[i].inv_radius;
- splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw;
- splane.w = 1.0; //needed? i think it should be 1 already
+ splane.xy = splane.xy * 0.5 + 0.5;
+ splane.z = shadow_len * omni_lights.data[light_index].inv_radius;
+ splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw;
+ splane.w = 1.0; //needed? i think it should be 1 already
- float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r;
+ float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r;
- shadow_attenuation = exp(min(0.0, (depth - splane.z)) / lights.data[i].inv_radius * lights.data[i].shadow_volumetric_fog_fade);
+ shadow_attenuation = exp(min(0.0, (depth - splane.z)) / omni_lights.data[light_index].inv_radius * omni_lights.data[light_index].shadow_volumetric_fog_fade);
+ }
+ total_light += light * attenuation * shadow_attenuation;
+ }
}
- total_light += light * attenuation * shadow_attenuation;
}
}
- uint spot_light_count = cluster_cell.y >> CLUSTER_COUNTER_SHIFT;
- uint spot_light_pointer = cluster_cell.y & CLUSTER_POINTER_MASK;
+ { //spot lights
+
+ uint cluster_spot_offset = cluster_offset + params.cluster_type_size;
+
+ uint item_min;
+ uint item_max;
+ uint item_from;
+ uint item_to;
+
+ cluster_get_item_range(cluster_spot_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
+
+#ifdef USE_SUBGROUPS
+ item_from = subgroupBroadcastFirst(subgroupMin(item_from));
+ item_to = subgroupBroadcastFirst(subgroupMax(item_to));
+#endif
+
+ for (uint i = item_from; i < item_to; i++) {
+ uint mask = cluster_buffer.data[cluster_spot_offset + i];
+ mask &= cluster_get_range_clip_mask(i, item_min, item_max);
+#ifdef USE_SUBGROUPS
+ uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
+#else
+ uint merged_mask = mask;
+#endif
- for (uint i = 0; i < spot_light_count; i++) {
- uint light_index = cluster_data.indices[spot_light_pointer + i];
+ while (merged_mask != 0) {
+ uint bit = findMSB(merged_mask);
+ merged_mask &= ~(1 << bit);
+#ifdef USE_SUBGROUPS
+ if (((1 << bit) & mask) == 0) { //do not process if not originally here
+ continue;
+ }
+#endif
- vec3 light_pos = lights.data[i].position;
- vec3 light_rel_vec = lights.data[i].position - view_pos;
- float d = length(light_rel_vec);
- float shadow_attenuation = 1.0;
+ //if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) {
+ // continue; //not masked
+ //}
- if (d * lights.data[i].inv_radius < 1.0) {
- float attenuation = get_omni_attenuation(d, lights.data[i].inv_radius, lights.data[i].attenuation);
+ uint light_index = 32 * i + bit;
- vec3 spot_dir = lights.data[i].direction;
- float scos = max(dot(-normalize(light_rel_vec), spot_dir), lights.data[i].cone_angle);
- float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - lights.data[i].cone_angle));
- attenuation *= 1.0 - pow(spot_rim, lights.data[i].cone_attenuation);
+ vec3 light_pos = omni_lights.data[light_index].position;
+ vec3 light_rel_vec = omni_lights.data[light_index].position - view_pos;
+ float d = length(light_rel_vec);
+ float shadow_attenuation = 1.0;
- vec3 light = lights.data[i].color / M_PI;
+ if (d * omni_lights.data[light_index].inv_radius < 1.0) {
+ float attenuation = get_omni_attenuation(d, omni_lights.data[light_index].inv_radius, omni_lights.data[light_index].attenuation);
- if (lights.data[i].shadow_enabled) {
- //has shadow
- vec4 v = vec4(view_pos, 1.0);
+ vec3 spot_dir = omni_lights.data[light_index].direction;
+ float scos = max(dot(-normalize(light_rel_vec), spot_dir), omni_lights.data[light_index].cone_angle);
+ float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - omni_lights.data[light_index].cone_angle));
+ attenuation *= 1.0 - pow(spot_rim, omni_lights.data[light_index].cone_attenuation);
- vec4 splane = (lights.data[i].shadow_matrix * v);
- splane /= splane.w;
+ vec3 light = omni_lights.data[light_index].color / M_PI;
- float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r;
+ if (omni_lights.data[light_index].shadow_enabled) {
+ //has shadow
+ vec4 v = vec4(view_pos, 1.0);
- shadow_attenuation = exp(min(0.0, (depth - splane.z)) / lights.data[i].inv_radius * lights.data[i].shadow_volumetric_fog_fade);
- }
+ vec4 splane = (omni_lights.data[light_index].shadow_matrix * v);
+ splane /= splane.w;
- total_light += light * attenuation * shadow_attenuation;
+ float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r;
+
+ shadow_attenuation = exp(min(0.0, (depth - splane.z)) / omni_lights.data[light_index].inv_radius * omni_lights.data[light_index].shadow_volumetric_fog_fade);
+ }
+
+ total_light += light * attenuation * shadow_attenuation;
+ }
+ }
}
}
diff --git a/servers/rendering/renderer_scene_cull.cpp b/servers/rendering/renderer_scene_cull.cpp
index d3979521b1..e1f179aa3b 100644
--- a/servers/rendering/renderer_scene_cull.cpp
+++ b/servers/rendering/renderer_scene_cull.cpp
@@ -436,7 +436,7 @@ void RendererSceneCull::instance_set_base(RID p_instance, RID p_base) {
case RS::INSTANCE_LIGHT: {
InstanceLightData *light = static_cast<InstanceLightData *>(instance->base_data);
- if (scenario && RSG::storage->light_get_type(instance->base) != RS::LIGHT_DIRECTIONAL && light->bake_mode == RS::LIGHT_BAKE_DYNAMIC) {
+ if (scenario && instance->visible && RSG::storage->light_get_type(instance->base) != RS::LIGHT_DIRECTIONAL && light->bake_mode == RS::LIGHT_BAKE_DYNAMIC) {
scenario->dynamic_lights.erase(light->instance);
}
@@ -783,6 +783,17 @@ void RendererSceneCull::instance_set_visible(RID p_instance, bool p_visible) {
_unpair_instance(instance);
}
+ if (instance->base_type == RS::INSTANCE_LIGHT) {
+ InstanceLightData *light = static_cast<InstanceLightData *>(instance->base_data);
+ if (instance->scenario && RSG::storage->light_get_type(instance->base) != RS::LIGHT_DIRECTIONAL && light->bake_mode == RS::LIGHT_BAKE_DYNAMIC) {
+ if (p_visible) {
+ instance->scenario->dynamic_lights.push_back(light->instance);
+ } else {
+ instance->scenario->dynamic_lights.erase(light->instance);
+ }
+ }
+ }
+
if (instance->base_type == RS::INSTANCE_PARTICLES_COLLISION) {
InstanceParticlesCollisionData *collision = static_cast<InstanceParticlesCollisionData *>(instance->base_data);
RSG::storage->particles_collision_instance_set_active(collision->instance, p_visible);
@@ -1150,13 +1161,13 @@ void RendererSceneCull::_update_instance(Instance *p_instance) {
RS::LightBakeMode bake_mode = RSG::storage->light_get_bake_mode(p_instance->base);
if (RSG::storage->light_get_type(p_instance->base) != RS::LIGHT_DIRECTIONAL && bake_mode != light->bake_mode) {
- if (p_instance->scenario && light->bake_mode == RS::LIGHT_BAKE_DYNAMIC) {
+ if (p_instance->visible && p_instance->scenario && light->bake_mode == RS::LIGHT_BAKE_DYNAMIC) {
p_instance->scenario->dynamic_lights.erase(light->instance);
}
light->bake_mode = bake_mode;
- if (p_instance->scenario && light->bake_mode == RS::LIGHT_BAKE_DYNAMIC) {
+ if (p_instance->visible && p_instance->scenario && light->bake_mode == RS::LIGHT_BAKE_DYNAMIC) {
p_instance->scenario->dynamic_lights.push_back(light->instance);
}
}
diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp
index 73c86a0a1d..9ae3e5819e 100644
--- a/servers/rendering/rendering_device.cpp
+++ b/servers/rendering/rendering_device.cpp
@@ -262,10 +262,10 @@ void RenderingDevice::_bind_methods() {
ClassDB::bind_method(D_METHOD("texture_resolve_multisample", "from_texture", "to_texture", "sync_with_draw"), &RenderingDevice::texture_resolve_multisample, DEFVAL(false));
ClassDB::bind_method(D_METHOD("framebuffer_format_create", "attachments"), &RenderingDevice::_framebuffer_format_create);
- ClassDB::bind_method(D_METHOD("framebuffer_format_create_empty", "size"), &RenderingDevice::framebuffer_format_create_empty);
+ ClassDB::bind_method(D_METHOD("framebuffer_format_create_empty", "samples"), &RenderingDevice::framebuffer_format_create_empty, DEFVAL(TEXTURE_SAMPLES_1));
ClassDB::bind_method(D_METHOD("framebuffer_format_get_texture_samples", "format"), &RenderingDevice::framebuffer_format_get_texture_samples);
ClassDB::bind_method(D_METHOD("framebuffer_create", "textures", "validate_with_format"), &RenderingDevice::_framebuffer_create, DEFVAL(INVALID_FORMAT_ID));
- ClassDB::bind_method(D_METHOD("framebuffer_create_empty", "size", "validate_with_format"), &RenderingDevice::framebuffer_create_empty, DEFVAL(INVALID_FORMAT_ID));
+ ClassDB::bind_method(D_METHOD("framebuffer_create_empty", "size", "samples", "validate_with_format"), &RenderingDevice::framebuffer_create_empty, DEFVAL(TEXTURE_SAMPLES_1), DEFVAL(INVALID_FORMAT_ID));
ClassDB::bind_method(D_METHOD("framebuffer_get_format", "framebuffer"), &RenderingDevice::framebuffer_get_format);
ClassDB::bind_method(D_METHOD("sampler_create", "state"), &RenderingDevice::_sampler_create);
@@ -288,6 +288,7 @@ void RenderingDevice::_bind_methods() {
ClassDB::bind_method(D_METHOD("uniform_set_is_valid", "uniform_set"), &RenderingDevice::uniform_set_is_valid);
ClassDB::bind_method(D_METHOD("buffer_update", "buffer", "offset", "size_bytes", "data", "sync_with_draw"), &RenderingDevice::_buffer_update, DEFVAL(true));
+ ClassDB::bind_method(D_METHOD("buffer_clear", "buffer", "offset", "size_bytes", "sync_with_draw"), &RenderingDevice::_buffer_update, DEFVAL(true));
ClassDB::bind_method(D_METHOD("buffer_get_data", "buffer"), &RenderingDevice::buffer_get_data);
ClassDB::bind_method(D_METHOD("render_pipeline_create", "shader", "framebuffer_format", "vertex_format", "primitive", "rasterization_state", "multisample_state", "stencil_state", "color_blend_state", "dynamic_state_flags"), &RenderingDevice::_render_pipeline_create, DEFVAL(0));
diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h
index 787805ea6a..f67e4dfbab 100644
--- a/servers/rendering/rendering_device.h
+++ b/servers/rendering/rendering_device.h
@@ -468,11 +468,11 @@ public:
// This ID is warranted to be unique for the same formats, does not need to be freed
virtual FramebufferFormatID framebuffer_format_create(const Vector<AttachmentFormat> &p_format) = 0;
- virtual FramebufferFormatID framebuffer_format_create_empty(const Size2i &p_size) = 0;
+ virtual FramebufferFormatID framebuffer_format_create_empty(TextureSamples p_samples = TEXTURE_SAMPLES_1) = 0;
virtual TextureSamples framebuffer_format_get_texture_samples(FramebufferFormatID p_format) = 0;
virtual RID framebuffer_create(const Vector<RID> &p_texture_attachments, FramebufferFormatID p_format_check = INVALID_ID) = 0;
- virtual RID framebuffer_create_empty(const Size2i &p_size, FramebufferFormatID p_format_check = INVALID_ID) = 0;
+ virtual RID framebuffer_create_empty(const Size2i &p_size, TextureSamples p_samples = TEXTURE_SAMPLES_1, FramebufferFormatID p_format_check = INVALID_ID) = 0;
virtual FramebufferFormatID framebuffer_get_format(RID p_framebuffer) = 0;
@@ -650,6 +650,7 @@ public:
virtual bool uniform_set_is_valid(RID p_uniform_set) = 0;
virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, bool p_sync_with_draw = false) = 0; //this function can be used from any thread and it takes effect at the beginning of the frame, unless sync with draw is used, which is used to mix updates with draw calls
+ virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, bool p_sync_with_draw = false) = 0;
virtual Vector<uint8_t> buffer_get_data(RID p_buffer) = 0; //this causes stall, only use to retrieve large buffers for saving
/*************************/