diff options
author | reduz <reduzio@gmail.com> | 2021-01-17 13:25:38 -0300 |
---|---|---|
committer | Juan Linietsky <reduzio@gmail.com> | 2021-01-19 23:31:06 +0100 |
commit | 099dee35f47db3e293cb8e60287ffe6a44f3d5d4 (patch) | |
tree | dea148899efa156adf4c7b9ff32464871cef4253 /servers/rendering/renderer_rd | |
parent | 7008e3c6eafa374e5d64ee7867608abe696698c2 (diff) |
Added GPU based cluster builder
Clustering is now GPU based, uses an implementation based on the Activision algorithm.
Diffstat (limited to 'servers/rendering/renderer_rd')
20 files changed, 2590 insertions, 1328 deletions
diff --git a/servers/rendering/renderer_rd/cluster_builder_rd.cpp b/servers/rendering/renderer_rd/cluster_builder_rd.cpp new file mode 100644 index 0000000000..8d9cff0f43 --- /dev/null +++ b/servers/rendering/renderer_rd/cluster_builder_rd.cpp @@ -0,0 +1,550 @@ +/*************************************************************************/ +/* cluster_builder_rd.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "cluster_builder_rd.h" +#include "servers/rendering/rendering_device.h" +#include "servers/rendering/rendering_server_globals.h" + +ClusterBuilderSharedDataRD::ClusterBuilderSharedDataRD() { + RD::VertexFormatID vertex_format; + + { + Vector<RD::VertexAttribute> attributes; + { + RD::VertexAttribute va; + va.format = RD::DATA_FORMAT_R32G32B32_SFLOAT; + va.stride = sizeof(float) * 3; + attributes.push_back(va); + } + vertex_format = RD::get_singleton()->vertex_format_create(attributes); + } + + { + Vector<String> versions; + versions.push_back(""); + cluster_render.cluster_render_shader.initialize(versions); + cluster_render.shader_version = cluster_render.cluster_render_shader.version_create(); + cluster_render.shader = cluster_render.cluster_render_shader.version_get_shader(cluster_render.shader_version, 0); + cluster_render.shader_pipelines[ClusterRender::PIPELINE_NORMAL] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, RD::get_singleton()->framebuffer_format_create_empty(), vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), RD::PipelineColorBlendState(), 0); + RD::PipelineMultisampleState ms; + ms.sample_count = RD::TEXTURE_SAMPLES_4; + cluster_render.shader_pipelines[ClusterRender::PIPELINE_MSAA] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, RD::get_singleton()->framebuffer_format_create_empty(), vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), ms, RD::PipelineDepthStencilState(), RD::PipelineColorBlendState(), 0); + } + { + Vector<String> versions; + versions.push_back(""); + cluster_store.cluster_store_shader.initialize(versions); + cluster_store.shader_version = cluster_store.cluster_store_shader.version_create(); + cluster_store.shader = cluster_store.cluster_store_shader.version_get_shader(cluster_store.shader_version, 0); + cluster_store.shader_pipeline = RD::get_singleton()->compute_pipeline_create(cluster_store.shader); + } + { + Vector<String> versions; + versions.push_back(""); + cluster_debug.cluster_debug_shader.initialize(versions); + cluster_debug.shader_version = cluster_debug.cluster_debug_shader.version_create(); + cluster_debug.shader = cluster_debug.cluster_debug_shader.version_get_shader(cluster_debug.shader_version, 0); + cluster_debug.shader_pipeline = RD::get_singleton()->compute_pipeline_create(cluster_debug.shader); + } + + { // SPHERE + static const uint32_t icosphere_vertex_count = 42; + static const float icosphere_vertices[icosphere_vertex_count * 3] = { + 0, 0, -1, 0.7236073, -0.5257253, -0.4472195, -0.276388, -0.8506492, -0.4472199, -0.8944262, 0, -0.4472156, -0.276388, 0.8506492, -0.4472199, 0.7236073, 0.5257253, -0.4472195, 0.276388, -0.8506492, 0.4472199, -0.7236073, -0.5257253, 0.4472195, -0.7236073, 0.5257253, 0.4472195, 0.276388, 0.8506492, 0.4472199, 0.8944262, 0, 0.4472156, 0, 0, 1, -0.1624555, -0.4999952, -0.8506544, 0.4253227, -0.3090114, -0.8506542, 0.2628688, -0.8090116, -0.5257377, 0.8506479, 0, -0.5257359, 0.4253227, 0.3090114, -0.8506542, -0.5257298, 0, -0.8506517, -0.6881894, -0.4999969, -0.5257362, -0.1624555, 0.4999952, -0.8506544, -0.6881894, 0.4999969, -0.5257362, 0.2628688, 0.8090116, -0.5257377, 0.9510579, -0.3090126, 0, 0.9510579, 0.3090126, 0, 0, -1, 0, 0.5877856, -0.8090167, 0, -0.9510579, -0.3090126, 0, -0.5877856, -0.8090167, 0, -0.5877856, 0.8090167, 0, -0.9510579, 0.3090126, 0, 0.5877856, 0.8090167, 0, 0, 1, 0, 0.6881894, -0.4999969, 0.5257362, -0.2628688, -0.8090116, 0.5257377, -0.8506479, 0, 0.5257359, -0.2628688, 0.8090116, 0.5257377, 0.6881894, 0.4999969, 0.5257362, 0.1624555, -0.4999952, 0.8506544, 0.5257298, 0, 0.8506517, -0.4253227, -0.3090114, 0.8506542, -0.4253227, 0.3090114, 0.8506542, 0.1624555, 0.4999952, 0.8506544 + }; + static const uint32_t icosphere_triangle_count = 80; + static const uint32_t icosphere_triangle_indices[icosphere_triangle_count * 3] = { + 0, 13, 12, 1, 13, 15, 0, 12, 17, 0, 17, 19, 0, 19, 16, 1, 15, 22, 2, 14, 24, 3, 18, 26, 4, 20, 28, 5, 21, 30, 1, 22, 25, 2, 24, 27, 3, 26, 29, 4, 28, 31, 5, 30, 23, 6, 32, 37, 7, 33, 39, 8, 34, 40, 9, 35, 41, 10, 36, 38, 38, 41, 11, 38, 36, 41, 36, 9, 41, 41, 40, 11, 41, 35, 40, 35, 8, 40, 40, 39, 11, 40, 34, 39, 34, 7, 39, 39, 37, 11, 39, 33, 37, 33, 6, 37, 37, 38, 11, 37, 32, 38, 32, 10, 38, 23, 36, 10, 23, 30, 36, 30, 9, 36, 31, 35, 9, 31, 28, 35, 28, 8, 35, 29, 34, 8, 29, 26, 34, 26, 7, 34, 27, 33, 7, 27, 24, 33, 24, 6, 33, 25, 32, 6, 25, 22, 32, 22, 10, 32, 30, 31, 9, 30, 21, 31, 21, 4, 31, 28, 29, 8, 28, 20, 29, 20, 3, 29, 26, 27, 7, 26, 18, 27, 18, 2, 27, 24, 25, 6, 24, 14, 25, 14, 1, 25, 22, 23, 10, 22, 15, 23, 15, 5, 23, 16, 21, 5, 16, 19, 21, 19, 4, 21, 19, 20, 4, 19, 17, 20, 17, 3, 20, 17, 18, 3, 17, 12, 18, 12, 2, 18, 15, 16, 5, 15, 13, 16, 13, 0, 16, 12, 14, 2, 12, 13, 14, 13, 1, 14 + }; + + Vector<uint8_t> vertex_data; + vertex_data.resize(sizeof(float) * icosphere_vertex_count * 3); + copymem(vertex_data.ptrw(), icosphere_vertices, vertex_data.size()); + + sphere_vertex_buffer = RD::get_singleton()->vertex_buffer_create(vertex_data.size(), vertex_data); + + Vector<uint8_t> index_data; + index_data.resize(sizeof(uint32_t) * icosphere_triangle_count * 3); + copymem(index_data.ptrw(), icosphere_triangle_indices, index_data.size()); + + sphere_index_buffer = RD::get_singleton()->index_buffer_create(icosphere_triangle_count * 3, RD::INDEX_BUFFER_FORMAT_UINT32, index_data); + + Vector<RID> buffers; + buffers.push_back(sphere_vertex_buffer); + + sphere_vertex_array = RD::get_singleton()->vertex_array_create(icosphere_vertex_count, vertex_format, buffers); + + sphere_index_array = RD::get_singleton()->index_array_create(sphere_index_buffer, 0, icosphere_triangle_count * 3); + + float min_d = 1e20; + for (uint32_t i = 0; i < icosphere_triangle_count; i++) { + Vector3 vertices[3]; + for (uint32_t j = 0; j < 3; j++) { + uint32_t index = icosphere_triangle_indices[i * 3 + j]; + for (uint32_t k = 0; k < 3; k++) { + vertices[j][k] = icosphere_vertices[index * 3 + k]; + } + } + Plane p(vertices[0], vertices[1], vertices[2]); + min_d = MIN(Math::abs(p.d), min_d); + } + sphere_overfit = 1.0 / min_d; + } + + { // CONE + static const uint32_t cone_vertex_count = 99; + static const float cone_vertices[cone_vertex_count * 3] = { + 0, 1, -1, 0.1950903, 0.9807853, -1, 0.3826835, 0.9238795, -1, 0.5555703, 0.8314696, -1, 0.7071068, 0.7071068, -1, 0.8314697, 0.5555702, -1, 0.9238795, 0.3826834, -1, 0.9807853, 0.1950903, -1, 1, 0, -1, 0.9807853, -0.1950902, -1, 0.9238796, -0.3826833, -1, 0.8314697, -0.5555702, -1, 0.7071068, -0.7071068, -1, 0.5555702, -0.8314697, -1, 0.3826833, -0.9238796, -1, 0.1950901, -0.9807853, -1, -3.25841e-7, -1, -1, -0.1950907, -0.9807852, -1, -0.3826839, -0.9238793, -1, -0.5555707, -0.8314693, -1, -0.7071073, -0.7071063, -1, -0.83147, -0.5555697, -1, -0.9238799, -0.3826827, -1, 0, 0, 0, -0.9807854, -0.1950894, -1, -1, 9.65599e-7, -1, -0.9807851, 0.1950913, -1, -0.9238791, 0.3826845, -1, -0.8314689, 0.5555713, -1, -0.7071059, 0.7071077, -1, -0.5555691, 0.8314704, -1, -0.3826821, 0.9238801, -1, -0.1950888, 0.9807856, -1 + }; + static const uint32_t cone_triangle_count = 62; + static const uint32_t cone_triangle_indices[cone_triangle_count * 3] = { + 0, 23, 1, 1, 23, 2, 2, 23, 3, 3, 23, 4, 4, 23, 5, 5, 23, 6, 6, 23, 7, 7, 23, 8, 8, 23, 9, 9, 23, 10, 10, 23, 11, 11, 23, 12, 12, 23, 13, 13, 23, 14, 14, 23, 15, 15, 23, 16, 16, 23, 17, 17, 23, 18, 18, 23, 19, 19, 23, 20, 20, 23, 21, 21, 23, 22, 22, 23, 24, 24, 23, 25, 25, 23, 26, 26, 23, 27, 27, 23, 28, 28, 23, 29, 29, 23, 30, 30, 23, 31, 31, 23, 32, 32, 23, 0, 7, 15, 24, 32, 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 3, 6, 7, 3, 7, 8, 9, 9, 10, 7, 10, 11, 7, 11, 12, 15, 12, 13, 15, 13, 14, 15, 15, 16, 17, 17, 18, 19, 19, 20, 24, 20, 21, 24, 21, 22, 24, 24, 25, 26, 26, 27, 28, 28, 29, 30, 30, 31, 32, 32, 1, 3, 15, 17, 24, 17, 19, 24, 24, 26, 32, 26, 28, 32, 28, 30, 32, 32, 3, 7, 7, 11, 15, 32, 7, 24 + }; + + Vector<uint8_t> vertex_data; + vertex_data.resize(sizeof(float) * cone_vertex_count * 3); + copymem(vertex_data.ptrw(), cone_vertices, vertex_data.size()); + + cone_vertex_buffer = RD::get_singleton()->vertex_buffer_create(vertex_data.size(), vertex_data); + + Vector<uint8_t> index_data; + index_data.resize(sizeof(uint32_t) * cone_triangle_count * 3); + copymem(index_data.ptrw(), cone_triangle_indices, index_data.size()); + + cone_index_buffer = RD::get_singleton()->index_buffer_create(cone_triangle_count * 3, RD::INDEX_BUFFER_FORMAT_UINT32, index_data); + + Vector<RID> buffers; + buffers.push_back(cone_vertex_buffer); + + cone_vertex_array = RD::get_singleton()->vertex_array_create(cone_vertex_count, vertex_format, buffers); + + cone_index_array = RD::get_singleton()->index_array_create(cone_index_buffer, 0, cone_triangle_count * 3); + + float min_d = 1e20; + for (uint32_t i = 0; i < cone_triangle_count; i++) { + Vector3 vertices[3]; + int32_t zero_index = -1; + for (uint32_t j = 0; j < 3; j++) { + uint32_t index = cone_triangle_indices[i * 3 + j]; + for (uint32_t k = 0; k < 3; k++) { + vertices[j][k] = cone_vertices[index * 3 + k]; + } + if (vertices[j] == Vector3()) { + zero_index = j; + } + } + + if (zero_index != -1) { + Vector3 a = vertices[(zero_index + 1) % 3]; + Vector3 b = vertices[(zero_index + 2) % 3]; + Vector3 c = a + Vector3(0, 0, 1); + Plane p(a, b, c); + min_d = MIN(Math::abs(p.d), min_d); + } + } + cone_overfit = 1.0 / min_d; + } + + { // BOX + static const uint32_t box_vertex_count = 8; + static const float box_vertices[box_vertex_count * 3] = { + -1, -1, -1, -1, -1, 1, -1, 1, -1, -1, 1, 1, 1, -1, -1, 1, -1, 1, 1, 1, -1, 1, 1, 1 + }; + static const uint32_t box_triangle_count = 12; + static const uint32_t box_triangle_indices[box_triangle_count * 3] = { + 1, 2, 0, 3, 6, 2, 7, 4, 6, 5, 0, 4, 6, 0, 2, 3, 5, 7, 1, 3, 2, 3, 7, 6, 7, 5, 4, 5, 1, 0, 6, 4, 0, 3, 1, 5 + }; + + Vector<uint8_t> vertex_data; + vertex_data.resize(sizeof(float) * box_vertex_count * 3); + copymem(vertex_data.ptrw(), box_vertices, vertex_data.size()); + + box_vertex_buffer = RD::get_singleton()->vertex_buffer_create(vertex_data.size(), vertex_data); + + Vector<uint8_t> index_data; + index_data.resize(sizeof(uint32_t) * box_triangle_count * 3); + copymem(index_data.ptrw(), box_triangle_indices, index_data.size()); + + box_index_buffer = RD::get_singleton()->index_buffer_create(box_triangle_count * 3, RD::INDEX_BUFFER_FORMAT_UINT32, index_data); + + Vector<RID> buffers; + buffers.push_back(box_vertex_buffer); + + box_vertex_array = RD::get_singleton()->vertex_array_create(box_vertex_count, vertex_format, buffers); + + box_index_array = RD::get_singleton()->index_array_create(box_index_buffer, 0, box_triangle_count * 3); + } +} +ClusterBuilderSharedDataRD::~ClusterBuilderSharedDataRD() { + RD::get_singleton()->free(sphere_vertex_buffer); + RD::get_singleton()->free(sphere_index_buffer); + RD::get_singleton()->free(cone_vertex_buffer); + RD::get_singleton()->free(cone_index_buffer); + RD::get_singleton()->free(box_vertex_buffer); + RD::get_singleton()->free(box_index_buffer); + + cluster_render.cluster_render_shader.version_free(cluster_render.shader_version); + cluster_store.cluster_store_shader.version_free(cluster_store.shader_version); + cluster_debug.cluster_debug_shader.version_free(cluster_debug.shader_version); +} + +///////////////////////////// + +void ClusterBuilderRD::_clear() { + if (cluster_buffer.is_null()) { + return; //nothing to clear + } + RD::get_singleton()->free(cluster_buffer); + RD::get_singleton()->free(cluster_render_buffer); + RD::get_singleton()->free(element_buffer); + cluster_buffer = RID(); + cluster_render_buffer = RID(); + element_buffer = RID(); + + memfree(render_elements); + + render_elements = nullptr; + render_element_max = 0; + render_element_count = 0; + + RD::get_singleton()->free(framebuffer); + framebuffer = RID(); + + cluster_render_uniform_set = RID(); + cluster_store_uniform_set = RID(); +} + +void ClusterBuilderRD::setup(Size2i p_screen_size, uint32_t p_max_elements, RID p_depth_buffer, RID p_depth_buffer_sampler, RID p_color_buffer) { + ERR_FAIL_COND(p_max_elements == 0); + ERR_FAIL_COND(p_screen_size.x < 1); + ERR_FAIL_COND(p_screen_size.y < 1); + + _clear(); + + screen_size = p_screen_size; + + cluster_screen_size.width = (p_screen_size.width - 1) / cluster_size + 1; + cluster_screen_size.height = (p_screen_size.height - 1) / cluster_size + 1; + + max_elements_by_type = p_max_elements; + if (max_elements_by_type % 32) { //need to be 32 aligned + max_elements_by_type += 32 - (max_elements_by_type % 32); + } + + cluster_buffer_size = cluster_screen_size.x * cluster_screen_size.y * (max_elements_by_type / 32 + 32) * ELEMENT_TYPE_MAX * 4; + + render_element_max = max_elements_by_type * ELEMENT_TYPE_MAX; + + uint32_t element_tag_bits_size = render_element_max / 32; + uint32_t element_tag_depth_bits_size = render_element_max; + cluster_render_buffer_size = cluster_screen_size.x * cluster_screen_size.y * (element_tag_bits_size + element_tag_depth_bits_size) * 4; // tag bits (element was used) and tag depth (depth range in which it was used) + + cluster_render_buffer = RD::get_singleton()->storage_buffer_create(cluster_render_buffer_size); + cluster_buffer = RD::get_singleton()->storage_buffer_create(cluster_buffer_size); + + render_elements = (RenderElementData *)memalloc(sizeof(RenderElementData *) * render_element_max); + render_element_count = 0; + + element_buffer = RD::get_singleton()->storage_buffer_create(sizeof(RenderElementData) * render_element_max); + + uint32_t div_value = 1 << divisor; + if (use_msaa) { + framebuffer = RD::get_singleton()->framebuffer_create_empty(p_screen_size / div_value, RD::TEXTURE_SAMPLES_4); + } else { + framebuffer = RD::get_singleton()->framebuffer_create_empty(p_screen_size / div_value); + } + + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 1; + u.ids.push_back(state_uniform); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + u.ids.push_back(element_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 3; + u.ids.push_back(cluster_render_buffer); + uniforms.push_back(u); + } + + cluster_render_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shared->cluster_render.shader, 0); + } + + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 1; + u.ids.push_back(cluster_render_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + u.ids.push_back(cluster_buffer); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 3; + u.ids.push_back(element_buffer); + uniforms.push_back(u); + } + + cluster_store_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shared->cluster_store.shader, 0); + } + + if (p_color_buffer.is_valid()) { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 1; + u.ids.push_back(cluster_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.ids.push_back(p_color_buffer); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 3; + u.ids.push_back(p_depth_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 4; + u.ids.push_back(p_depth_buffer_sampler); + uniforms.push_back(u); + } + + debug_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shared->cluster_debug.shader, 0); + } else { + debug_uniform_set = RID(); + } +} + +void ClusterBuilderRD::begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection, bool p_flip_y) { + view_xform = p_view_transform.affine_inverse(); + projection = p_cam_projection; + z_near = projection.get_z_near(); + z_far = projection.get_z_far(); + orthogonal = p_cam_projection.is_orthogonal(); + adjusted_projection = projection; + if (!orthogonal) { + adjusted_projection.adjust_perspective_znear(0.0001); + } + + CameraMatrix correction; + correction.set_depth_correction(p_flip_y); + projection = correction * projection; + adjusted_projection = correction * adjusted_projection; + + //reset counts + render_element_count = 0; + for (uint32_t i = 0; i < ELEMENT_TYPE_MAX; i++) { + cluster_count_by_type[i] = 0; + } +} + +void ClusterBuilderRD::bake_cluster() { + RENDER_TIMESTAMP(">Bake Cluster"); + + //clear cluster buffer + RD::get_singleton()->buffer_clear(cluster_buffer, 0, cluster_buffer_size, true); + + if (render_element_count > 0) { + //clear render buffer + RD::get_singleton()->buffer_clear(cluster_render_buffer, 0, cluster_render_buffer_size, true); + + { //fill state uniform + + StateUniform state; + + RendererStorageRD::store_camera(adjusted_projection, state.projection); + state.inv_z_far = 1.0 / z_far; + state.screen_to_clusters_shift = get_shift_from_power_of_2(cluster_size); + state.screen_to_clusters_shift -= divisor; //screen is smaller, shift one less + + state.cluster_screen_width = cluster_screen_size.x; + state.cluster_depth_offset = (render_element_max / 32); + state.cluster_data_size = state.cluster_depth_offset + render_element_max; + + RD::get_singleton()->buffer_update(state_uniform, 0, sizeof(StateUniform), &state, true); + } + + //update instances + + RD::get_singleton()->buffer_update(element_buffer, 0, sizeof(RenderElementData) * render_element_count, render_elements, true); + + RENDER_TIMESTAMP("Render Elements"); + + //render elements + { + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD); + ClusterBuilderSharedDataRD::ClusterRender::PushConstant push_constant = {}; + + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, shared->cluster_render.shader_pipelines[use_msaa ? ClusterBuilderSharedDataRD::ClusterRender::PIPELINE_MSAA : ClusterBuilderSharedDataRD::ClusterRender::PIPELINE_NORMAL]); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, cluster_render_uniform_set, 0); + + for (uint32_t i = 0; i < render_element_count;) { + push_constant.base_index = i; + switch (render_elements[i].type) { + case ELEMENT_TYPE_OMNI_LIGHT: { + RD::get_singleton()->draw_list_bind_vertex_array(draw_list, shared->sphere_vertex_array); + RD::get_singleton()->draw_list_bind_index_array(draw_list, shared->sphere_index_array); + } break; + case ELEMENT_TYPE_SPOT_LIGHT: { + RD::get_singleton()->draw_list_bind_vertex_array(draw_list, shared->cone_vertex_array); + RD::get_singleton()->draw_list_bind_index_array(draw_list, shared->cone_index_array); + } break; + case ELEMENT_TYPE_DECAL: + case ELEMENT_TYPE_REFLECTION_PROBE: { + RD::get_singleton()->draw_list_bind_vertex_array(draw_list, shared->box_vertex_array); + RD::get_singleton()->draw_list_bind_index_array(draw_list, shared->box_index_array); + } break; + } + + RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterRender::PushConstant)); + + uint32_t instances = 1; +#if 0 + for (uint32_t j = i+1; j < element_count; j++) { + if (elements[i].type!=elements[j].type) { + break; + } + instances++; + } +#endif + RD::get_singleton()->draw_list_draw(draw_list, true, instances); + i += instances; + } + RD::get_singleton()->draw_list_end(); + } + //store elements + RENDER_TIMESTAMP("Pack Elements"); + + { + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shared->cluster_store.shader_pipeline); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cluster_store_uniform_set, 0); + + ClusterBuilderSharedDataRD::ClusterStore::PushConstant push_constant; + push_constant.cluster_render_data_size = render_element_max / 32 + render_element_max; + push_constant.max_render_element_count_div_32 = render_element_max / 32; + push_constant.cluster_screen_size[0] = cluster_screen_size.x; + push_constant.cluster_screen_size[1] = cluster_screen_size.y; + push_constant.render_element_count_div_32 = render_element_count > 0 ? (render_element_count - 1) / 32 + 1 : 0; + push_constant.max_cluster_element_count_div_32 = max_elements_by_type / 32; + push_constant.pad1 = 0; + push_constant.pad2 = 0; + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterStore::PushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cluster_screen_size.x, cluster_screen_size.y, 1, 8, 8, 1); + + RD::get_singleton()->compute_list_end(); + } + } + RENDER_TIMESTAMP("<Bake Cluster"); +} + +void ClusterBuilderRD::debug(ElementType p_element) { + ERR_FAIL_COND(debug_uniform_set.is_null()); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shared->cluster_debug.shader_pipeline); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, debug_uniform_set, 0); + + ClusterBuilderSharedDataRD::ClusterDebug::PushConstant push_constant; + push_constant.screen_size[0] = screen_size.x; + push_constant.screen_size[1] = screen_size.y; + push_constant.cluster_screen_size[0] = cluster_screen_size.x; + push_constant.cluster_screen_size[1] = cluster_screen_size.y; + push_constant.cluster_shift = get_shift_from_power_of_2(cluster_size); + push_constant.cluster_type = p_element; + push_constant.orthogonal = orthogonal; + push_constant.z_far = z_far; + push_constant.z_near = z_near; + push_constant.max_cluster_element_count_div_32 = max_elements_by_type / 32; + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterDebug::PushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, screen_size.x, screen_size.y, 1, 8, 8, 1); + + RD::get_singleton()->compute_list_end(); +} + +RID ClusterBuilderRD::get_cluster_buffer() const { + return cluster_buffer; +} + +uint32_t ClusterBuilderRD::get_cluster_size() const { + return cluster_size; +} + +uint32_t ClusterBuilderRD::get_max_cluster_elements() const { + return max_elements_by_type; +} + +void ClusterBuilderRD::set_shared(ClusterBuilderSharedDataRD *p_shared) { + shared = p_shared; +} + +ClusterBuilderRD::ClusterBuilderRD() { + state_uniform = RD::get_singleton()->uniform_buffer_create(sizeof(StateUniform)); +} + +ClusterBuilderRD::~ClusterBuilderRD() { + _clear(); + RD::get_singleton()->free(state_uniform); +} diff --git a/servers/rendering/renderer_rd/cluster_builder_rd.h b/servers/rendering/renderer_rd/cluster_builder_rd.h new file mode 100644 index 0000000000..dc1707b534 --- /dev/null +++ b/servers/rendering/renderer_rd/cluster_builder_rd.h @@ -0,0 +1,378 @@ +/*************************************************************************/ +/* cluster_builder_rd.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef CLUSTER_BUILDER_RD_H +#define CLUSTER_BUILDER_RD_H + +#include "servers/rendering/renderer_rd/renderer_storage_rd.h" +#include "servers/rendering/renderer_rd/shaders/cluster_debug.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/cluster_render.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/cluster_store.glsl.gen.h" + +class ClusterBuilderSharedDataRD { + friend class ClusterBuilderRD; + + RID sphere_vertex_buffer; + RID sphere_vertex_array; + RID sphere_index_buffer; + RID sphere_index_array; + float sphere_overfit = 0.0; //because an icosphere is not a perfect sphere, we need to enlarge it to cover the sphere area + + RID cone_vertex_buffer; + RID cone_vertex_array; + RID cone_index_buffer; + RID cone_index_array; + float cone_overfit = 0.0; //because an cone mesh is not a perfect sphere, we need to enlarge it to cover the actual cone area + + RID box_vertex_buffer; + RID box_vertex_array; + RID box_index_buffer; + RID box_index_array; + + enum Divisor { + DIVISOR_1, + DIVISOR_2, + DIVISOR_4, + }; + + struct ClusterRender { + struct PushConstant { + uint32_t base_index; + uint32_t pad0; + uint32_t pad1; + uint32_t pad2; + }; + + ClusterRenderShaderRD cluster_render_shader; + RID shader_version; + RID shader; + enum PipelineVersion { + PIPELINE_NORMAL, + PIPELINE_MSAA, + PIPELINE_MAX + }; + + RID shader_pipelines[PIPELINE_MAX]; + } cluster_render; + + struct ClusterStore { + struct PushConstant { + uint32_t cluster_render_data_size; // how much data for a single cluster takes + uint32_t max_render_element_count_div_32; //divided by 32 + uint32_t cluster_screen_size[2]; + uint32_t render_element_count_div_32; //divided by 32 + uint32_t max_cluster_element_count_div_32; //divided by 32 + uint32_t pad1; + uint32_t pad2; + }; + + ClusterStoreShaderRD cluster_store_shader; + RID shader_version; + RID shader; + RID shader_pipeline; + } cluster_store; + + struct ClusterDebug { + struct PushConstant { + uint32_t screen_size[2]; + uint32_t cluster_screen_size[2]; + + uint32_t cluster_shift; + uint32_t cluster_type; + float z_near; + float z_far; + + uint32_t orthogonal; + uint32_t max_cluster_element_count_div_32; + uint32_t pad1; + uint32_t pad2; + }; + + ClusterDebugShaderRD cluster_debug_shader; + RID shader_version; + RID shader; + RID shader_pipeline; + } cluster_debug; + +public: + ClusterBuilderSharedDataRD(); + ~ClusterBuilderSharedDataRD(); +}; + +class ClusterBuilderRD { +public: + enum LightType { + LIGHT_TYPE_OMNI, + LIGHT_TYPE_SPOT + }; + + enum BoxType { + BOX_TYPE_REFLECTION_PROBE, + BOX_TYPE_DECAL, + }; + + enum ElementType { + ELEMENT_TYPE_OMNI_LIGHT, + ELEMENT_TYPE_SPOT_LIGHT, + ELEMENT_TYPE_DECAL, + ELEMENT_TYPE_REFLECTION_PROBE, + ELEMENT_TYPE_MAX, + + }; + +private: + ClusterBuilderSharedDataRD *shared = nullptr; + + struct RenderElementData { + uint32_t type; //0-4 + uint32_t touches_near; + uint32_t touches_far; + uint32_t original_index; + float transform_inv[12]; //transposed transform for less space + float scale[3]; + uint32_t pad; + }; + + uint32_t cluster_count_by_type[ELEMENT_TYPE_MAX] = {}; + uint32_t max_elements_by_type = 0; + + RenderElementData *render_elements = nullptr; + uint32_t render_element_count = 0; + uint32_t render_element_max = 0; + + Transform view_xform; + CameraMatrix adjusted_projection; + CameraMatrix projection; + float z_far = 0; + float z_near = 0; + bool orthogonal = false; + + enum Divisor { + DIVISOR_1, + DIVISOR_2, + DIVISOR_4, + }; + + uint32_t cluster_size = 32; + bool use_msaa = true; + Divisor divisor = DIVISOR_4; + + Size2i screen_size; + Size2i cluster_screen_size; + + RID framebuffer; + RID cluster_render_buffer; //used for creating + RID cluster_buffer; //used for rendering + RID element_buffer; //used for storing, to hint element touches far plane or near plane + uint32_t cluster_render_buffer_size = 0; + uint32_t cluster_buffer_size = 0; + + RID cluster_render_uniform_set; + RID cluster_store_uniform_set; + + //persistent data + + void _clear(); + + struct StateUniform { + float projection[16]; + float inv_z_far; + uint32_t screen_to_clusters_shift; // shift to obtain coordinates in block indices + uint32_t cluster_screen_width; // + uint32_t cluster_data_size; // how much data for a single cluster takes + uint32_t cluster_depth_offset; + uint32_t pad0; + uint32_t pad1; + uint32_t pad2; + }; + + RID state_uniform; + + RID debug_uniform_set; + +public: + void setup(Size2i p_screen_size, uint32_t p_max_elements, RID p_depth_buffer, RID p_depth_buffer_sampler, RID p_color_buffer); + + void begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection, bool p_flip_y); + + _FORCE_INLINE_ void add_light(LightType p_type, const Transform &p_transform, float p_radius, float p_spot_aperture) { + if (p_type == LIGHT_TYPE_OMNI && cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT] == max_elements_by_type) { + return; //max number elements reached + } + if (p_type == LIGHT_TYPE_SPOT && cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT] == max_elements_by_type) { + return; //max number elements reached + } + + RenderElementData &e = render_elements[render_element_count]; + + Transform xform = view_xform * p_transform; + + float radius = xform.basis.get_uniform_scale(); + if (radius > 0.98 || radius < 1.02) { + xform.basis.orthonormalize(); + } + + radius *= p_radius; + + if (p_type == LIGHT_TYPE_OMNI) { + radius *= shared->sphere_overfit; // overfit icosphere + + //omni + float depth = -xform.origin.z; + if (orthogonal) { + e.touches_near = (depth - radius) < z_near; + } else { + //contains camera inside light + float radius2 = radius * shared->sphere_overfit; // overfit again for outer size (camera may be outside actual sphere but behind an icosphere vertex) + e.touches_near = xform.origin.length_squared() < radius2 * radius2; + } + + e.touches_far = (depth + radius) > z_far; + e.scale[0] = radius; + e.scale[1] = radius; + e.scale[2] = radius; + e.type = ELEMENT_TYPE_OMNI_LIGHT; + e.original_index = cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT]; + + RendererStorageRD::store_transform_transposed_3x4(xform, e.transform_inv); + + cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT]++; + + } else { + //spot + radius *= shared->cone_overfit; // overfit icosphere + + real_t len = Math::tan(Math::deg2rad(p_spot_aperture)) * radius; + //approximate, probably better to use a cone support function + float max_d = -1e20; + float min_d = 1e20; +#define CONE_MINMAX(m_x, m_y) \ + { \ + float d = -xform.xform(Vector3(len * m_x, len * m_y, -radius)).z; \ + min_d = MIN(d, min_d); \ + max_d = MAX(d, max_d); \ + } + + CONE_MINMAX(1, 1); + CONE_MINMAX(-1, 1); + CONE_MINMAX(-1, -1); + CONE_MINMAX(1, -1); + + if (orthogonal) { + e.touches_near = min_d < z_near; + } else { + //contains camera inside light + Plane base_plane(xform.origin, -xform.basis.get_axis(Vector3::AXIS_Z)); + float dist = base_plane.distance_to(Vector3()); + if (dist >= 0 && dist < radius) { + //inside, check angle + float angle = Math::rad2deg(Math::acos((-xform.origin.normalized()).dot(-xform.basis.get_axis(Vector3::AXIS_Z)))); + e.touches_near = angle < p_spot_aperture * 1.05; //overfit aperture a little due to cone overfit + } else { + e.touches_near = false; + } + } + + e.touches_far = max_d > z_far; + + e.scale[0] = len * shared->cone_overfit; + e.scale[1] = len * shared->cone_overfit; + e.scale[2] = radius; + + e.type = ELEMENT_TYPE_SPOT_LIGHT; + e.original_index = cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]; //use omni since they share index + + RendererStorageRD::store_transform_transposed_3x4(xform, e.transform_inv); + + cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]++; + } + + render_element_count++; + } + + _FORCE_INLINE_ void add_box(BoxType p_box_type, const Transform &p_transform, const Vector3 &p_half_extents) { + if (p_box_type == BOX_TYPE_DECAL && cluster_count_by_type[ELEMENT_TYPE_DECAL] == max_elements_by_type) { + return; //max number elements reached + } + if (p_box_type == BOX_TYPE_REFLECTION_PROBE && cluster_count_by_type[ELEMENT_TYPE_REFLECTION_PROBE] == max_elements_by_type) { + return; //max number elements reached + } + + RenderElementData &e = render_elements[render_element_count]; + Transform xform = view_xform * p_transform; + + //extract scale and scale the matrix by it, makes things simpler + Vector3 scale = p_half_extents; + for (uint32_t i = 0; i < 3; i++) { + float s = xform.basis.elements[i].length(); + scale[i] *= s; + xform.basis.elements[i] /= s; + }; + + float box_depth = Math::abs(xform.basis.xform_inv(Vector3(0, 0, -1)).dot(scale)); + float depth = -xform.origin.z; + + if (orthogonal) { + e.touches_near = depth - box_depth < z_near; + } else { + //contains camera inside box + Vector3 inside = xform.xform_inv(Vector3(0, 0, 0)).abs(); + e.touches_near = inside.x < scale.x && inside.y < scale.y && inside.z < scale.z; + } + + e.touches_far = depth + box_depth > z_far; + + e.scale[0] = scale.x; + e.scale[1] = scale.y; + e.scale[2] = scale.z; + + e.type = (p_box_type == BOX_TYPE_DECAL) ? ELEMENT_TYPE_DECAL : ELEMENT_TYPE_REFLECTION_PROBE; + e.original_index = cluster_count_by_type[e.type]; + + RendererStorageRD::store_transform_transposed_3x4(xform, e.transform_inv); + + cluster_count_by_type[e.type]++; + render_element_count++; + } + + void bake_cluster(); + void debug(ElementType p_element); + + RID get_cluster_buffer() const; + uint32_t get_cluster_size() const; + uint32_t get_max_cluster_elements() const; + + void set_shared(ClusterBuilderSharedDataRD *p_shared); + + ClusterBuilderRD(); + ~ClusterBuilderRD(); +}; + +#endif // CLUSTER_BUILDER_H diff --git a/servers/rendering/renderer_rd/light_cluster_builder.cpp b/servers/rendering/renderer_rd/light_cluster_builder.cpp deleted file mode 100644 index bb807ca4ca..0000000000 --- a/servers/rendering/renderer_rd/light_cluster_builder.cpp +++ /dev/null @@ -1,252 +0,0 @@ -/*************************************************************************/ -/* light_cluster_builder.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -#include "light_cluster_builder.h" - -void LightClusterBuilder::begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection) { - view_xform = p_view_transform; - projection = p_cam_projection; - z_near = -projection.get_z_near(); - z_far = -projection.get_z_far(); - - //reset counts - light_count = 0; - refprobe_count = 0; - decal_count = 0; - item_count = 0; - sort_id_count = 0; -} - -void LightClusterBuilder::bake_cluster() { - float slice_depth = (z_near - z_far) / depth; - - uint8_t *cluster_dataw = cluster_data.ptrw(); - Cell *cluster_data_ptr = (Cell *)cluster_dataw; - //clear the cluster - zeromem(cluster_data_ptr, (width * height * depth * sizeof(Cell))); - - /* Step 1, create cell positions and count them */ - - for (uint32_t i = 0; i < item_count; i++) { - const Item &item = items[i]; - - int from_slice = Math::floor((z_near - (item.aabb.position.z + item.aabb.size.z)) / slice_depth); - int to_slice = Math::floor((z_near - item.aabb.position.z) / slice_depth); - - if (from_slice >= (int)depth || to_slice < 0) { - continue; //sorry no go - } - - from_slice = MAX(0, from_slice); - to_slice = MIN((int)depth - 1, to_slice); - - for (int j = from_slice; j <= to_slice; j++) { - Vector3 min = item.aabb.position; - Vector3 max = item.aabb.position + item.aabb.size; - - float limit_near = MIN((z_near - slice_depth * j), max.z); - float limit_far = MAX((z_near - slice_depth * (j + 1)), min.z); - - max.z = limit_near; - min.z = limit_near; - - Vector3 proj_min = projection.xform(min); - Vector3 proj_max = projection.xform(max); - - int near_from_x = int(Math::floor((proj_min.x * 0.5 + 0.5) * width)); - int near_from_y = int(Math::floor((-proj_max.y * 0.5 + 0.5) * height)); - int near_to_x = int(Math::floor((proj_max.x * 0.5 + 0.5) * width)); - int near_to_y = int(Math::floor((-proj_min.y * 0.5 + 0.5) * height)); - - max.z = limit_far; - min.z = limit_far; - - proj_min = projection.xform(min); - proj_max = projection.xform(max); - - int far_from_x = int(Math::floor((proj_min.x * 0.5 + 0.5) * width)); - int far_from_y = int(Math::floor((-proj_max.y * 0.5 + 0.5) * height)); - int far_to_x = int(Math::floor((proj_max.x * 0.5 + 0.5) * width)); - int far_to_y = int(Math::floor((-proj_min.y * 0.5 + 0.5) * height)); - - //print_line(itos(j) + " near - " + Vector2i(near_from_x, near_from_y) + " -> " + Vector2i(near_to_x, near_to_y)); - //print_line(itos(j) + " far - " + Vector2i(far_from_x, far_from_y) + " -> " + Vector2i(far_to_x, far_to_y)); - - int from_x = MIN(near_from_x, far_from_x); - int from_y = MIN(near_from_y, far_from_y); - int to_x = MAX(near_to_x, far_to_x); - int to_y = MAX(near_to_y, far_to_y); - - if (from_x >= (int)width || to_x < 0 || from_y >= (int)height || to_y < 0) { - continue; - } - - int sx = MAX(0, from_x); - int sy = MAX(0, from_y); - int dx = MIN((int)width - 1, to_x); - int dy = MIN((int)height - 1, to_y); - - //print_line(itos(j) + " - " + Vector2i(sx, sy) + " -> " + Vector2i(dx, dy)); - - for (int x = sx; x <= dx; x++) { - for (int y = sy; y <= dy; y++) { - uint32_t offset = j * (width * height) + y * width + x; - - if (unlikely(sort_id_count == sort_id_max)) { - sort_id_max = nearest_power_of_2_templated(sort_id_max + 1); - sort_ids = (SortID *)memrealloc(sort_ids, sizeof(SortID) * sort_id_max); - if (ids.size()) { - ids.resize(sort_id_max); - RD::get_singleton()->free(items_buffer); - items_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * sort_id_max); - } - } - - sort_ids[sort_id_count].cell_index = offset; - sort_ids[sort_id_count].item_index = item.index; - sort_ids[sort_id_count].item_type = item.type; - - sort_id_count++; - - //for now, only count - cluster_data_ptr[offset].item_pointers[item.type]++; - //print_line("at offset " + itos(offset) + " value: " + itos(cluster_data_ptr[offset].item_pointers[item.type])); - } - } - } - } - - /* Step 2, Assign pointers (and reset counters) */ - - uint32_t offset = 0; - for (uint32_t i = 0; i < (width * height * depth); i++) { - for (int j = 0; j < ITEM_TYPE_MAX; j++) { - uint32_t count = cluster_data_ptr[i].item_pointers[j]; //save count - cluster_data_ptr[i].item_pointers[j] = offset; //replace count by pointer - offset += count; //increase offset by count; - } - } - - //print_line("offset: " + itos(offset)); - /* Step 3, Place item lists */ - - uint32_t *ids_ptr = ids.ptrw(); - - for (uint32_t i = 0; i < sort_id_count; i++) { - const SortID &id = sort_ids[i]; - Cell &cell = cluster_data_ptr[id.cell_index]; - uint32_t pointer = cell.item_pointers[id.item_type] & POINTER_MASK; - uint32_t counter = cell.item_pointers[id.item_type] >> COUNTER_SHIFT; - ids_ptr[pointer + counter] = id.item_index; - - cell.item_pointers[id.item_type] = pointer | ((counter + 1) << COUNTER_SHIFT); - } - - RD::get_singleton()->texture_update(cluster_texture, 0, cluster_data, true); - RD::get_singleton()->buffer_update(items_buffer, 0, offset * sizeof(uint32_t), ids_ptr, true); -} - -void LightClusterBuilder::setup(uint32_t p_width, uint32_t p_height, uint32_t p_depth) { - if (width == p_width && height == p_height && depth == p_depth) { - return; - } - if (cluster_texture.is_valid()) { - RD::get_singleton()->free(cluster_texture); - } - - width = p_width; - height = p_height; - depth = p_depth; - - cluster_data.resize(width * height * depth * sizeof(Cell)); - - { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R32G32B32A32_UINT; - tf.texture_type = RD::TEXTURE_TYPE_3D; - tf.width = width; - tf.height = height; - tf.depth = depth; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; - - cluster_texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); - } -} - -RID LightClusterBuilder::get_cluster_texture() const { - return cluster_texture; -} - -RID LightClusterBuilder::get_cluster_indices_buffer() const { - return items_buffer; -} - -LightClusterBuilder::LightClusterBuilder() { - //initialize accumulators to something - lights = (LightData *)memalloc(sizeof(LightData) * 1024); - light_max = 1024; - - refprobes = (OrientedBoxData *)memalloc(sizeof(OrientedBoxData) * 1024); - refprobe_max = 1024; - - decals = (OrientedBoxData *)memalloc(sizeof(OrientedBoxData) * 1024); - decal_max = 1024; - - items = (Item *)memalloc(sizeof(Item) * 1024); - item_max = 1024; - - sort_ids = (SortID *)memalloc(sizeof(SortID) * 1024); - ids.resize(2014); - items_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 1024); - item_max = 1024; -} - -LightClusterBuilder::~LightClusterBuilder() { - if (cluster_data.size()) { - RD::get_singleton()->free(cluster_texture); - } - - if (lights) { - memfree(lights); - } - if (refprobes) { - memfree(refprobes); - } - if (decals) { - memfree(decals); - } - if (items) { - memfree(items); - } - if (sort_ids) { - memfree(sort_ids); - RD::get_singleton()->free(items_buffer); - } -} diff --git a/servers/rendering/renderer_rd/light_cluster_builder.h b/servers/rendering/renderer_rd/light_cluster_builder.h deleted file mode 100644 index 8f77ece6f5..0000000000 --- a/servers/rendering/renderer_rd/light_cluster_builder.h +++ /dev/null @@ -1,290 +0,0 @@ -/*************************************************************************/ -/* light_cluster_builder.h */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -#ifndef LIGHT_CLUSTER_BUILDER_H -#define LIGHT_CLUSTER_BUILDER_H - -#include "servers/rendering/renderer_rd/renderer_storage_rd.h" - -class LightClusterBuilder { -public: - enum LightType { - LIGHT_TYPE_OMNI, - LIGHT_TYPE_SPOT - }; - - enum ItemType { - ITEM_TYPE_OMNI_LIGHT, - ITEM_TYPE_SPOT_LIGHT, - ITEM_TYPE_REFLECTION_PROBE, - ITEM_TYPE_DECAL, - ITEM_TYPE_MAX //should always be 4 - }; - - enum { - COUNTER_SHIFT = 20, //one million total ids - POINTER_MASK = (1 << COUNTER_SHIFT) - 1, - COUNTER_MASK = 0xfff // 4096 items per cell - }; - -private: - struct LightData { - float position[3]; - uint32_t type; - float radius; - float spot_aperture; - uint32_t pad[2]; - }; - - uint32_t light_count = 0; - uint32_t light_max = 0; - LightData *lights = nullptr; - - struct OrientedBoxData { - float position[3]; - uint32_t pad; - float x_axis[3]; - uint32_t pad2; - float y_axis[3]; - uint32_t pad3; - float z_axis[3]; - uint32_t pad4; - }; - - uint32_t refprobe_count = 0; - uint32_t refprobe_max = 0; - OrientedBoxData *refprobes = nullptr; - - uint32_t decal_count = 0; - uint32_t decal_max = 0; - OrientedBoxData *decals = nullptr; - - struct Item { - AABB aabb; - ItemType type; - uint32_t index; - }; - - Item *items = nullptr; - uint32_t item_count = 0; - uint32_t item_max = 0; - - uint32_t width = 0; - uint32_t height = 0; - uint32_t depth = 0; - - struct Cell { - uint32_t item_pointers[ITEM_TYPE_MAX]; - }; - - Vector<uint8_t> cluster_data; - RID cluster_texture; - - struct SortID { - uint32_t cell_index; - uint32_t item_index; - ItemType item_type; - }; - - SortID *sort_ids = nullptr; - Vector<uint32_t> ids; - uint32_t sort_id_count = 0; - uint32_t sort_id_max = 0; - RID items_buffer; - - Transform view_xform; - CameraMatrix projection; - float z_far = 0; - float z_near = 0; - - _FORCE_INLINE_ void _add_item(const AABB &p_aabb, ItemType p_type, uint32_t p_index) { - if (unlikely(item_count == item_max)) { - item_max = nearest_power_of_2_templated(item_max + 1); - items = (Item *)memrealloc(items, sizeof(Item) * item_max); - } - - Item &item = items[item_count]; - item.aabb = p_aabb; - item.index = p_index; - item.type = p_type; - item_count++; - } - -public: - void begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection); - - _FORCE_INLINE_ void add_light(LightType p_type, const Transform &p_transform, float p_radius, float p_spot_aperture) { - if (unlikely(light_count == light_max)) { - light_max = nearest_power_of_2_templated(light_max + 1); - lights = (LightData *)memrealloc(lights, sizeof(LightData) * light_max); - } - - LightData &ld = lights[light_count]; - ld.type = p_type; - ld.position[0] = p_transform.origin.x; - ld.position[1] = p_transform.origin.y; - ld.position[2] = p_transform.origin.z; - ld.radius = p_radius; - ld.spot_aperture = p_spot_aperture; - - Transform xform = view_xform * p_transform; - - ld.radius *= xform.basis.get_uniform_scale(); - - AABB aabb; - - switch (p_type) { - case LIGHT_TYPE_OMNI: { - aabb.position = xform.origin; - aabb.size = Vector3(ld.radius, ld.radius, ld.radius); - aabb.position -= aabb.size; - aabb.size *= 2.0; - - _add_item(aabb, ITEM_TYPE_OMNI_LIGHT, light_count); - } break; - case LIGHT_TYPE_SPOT: { - float r = ld.radius; - real_t len = Math::tan(Math::deg2rad(ld.spot_aperture)) * r; - - aabb.position = xform.origin; - aabb.expand_to(xform.xform(Vector3(len, len, -r))); - aabb.expand_to(xform.xform(Vector3(-len, len, -r))); - aabb.expand_to(xform.xform(Vector3(-len, -len, -r))); - aabb.expand_to(xform.xform(Vector3(len, -len, -r))); - _add_item(aabb, ITEM_TYPE_SPOT_LIGHT, light_count); - } break; - } - - light_count++; - } - - _FORCE_INLINE_ void add_reflection_probe(const Transform &p_transform, const Vector3 &p_half_extents) { - if (unlikely(refprobe_count == refprobe_max)) { - refprobe_max = nearest_power_of_2_templated(refprobe_max + 1); - refprobes = (OrientedBoxData *)memrealloc(refprobes, sizeof(OrientedBoxData) * refprobe_max); - } - - Transform xform = view_xform * p_transform; - - OrientedBoxData &rp = refprobes[refprobe_count]; - Vector3 origin = xform.origin; - rp.position[0] = origin.x; - rp.position[1] = origin.y; - rp.position[2] = origin.z; - - Vector3 x_axis = xform.basis.get_axis(0) * p_half_extents.x; - rp.x_axis[0] = x_axis.x; - rp.x_axis[1] = x_axis.y; - rp.x_axis[2] = x_axis.z; - - Vector3 y_axis = xform.basis.get_axis(1) * p_half_extents.y; - rp.y_axis[0] = y_axis.x; - rp.y_axis[1] = y_axis.y; - rp.y_axis[2] = y_axis.z; - - Vector3 z_axis = xform.basis.get_axis(2) * p_half_extents.z; - rp.z_axis[0] = z_axis.x; - rp.z_axis[1] = z_axis.y; - rp.z_axis[2] = z_axis.z; - - AABB aabb; - - aabb.position = origin + x_axis + y_axis + z_axis; - aabb.expand_to(origin + x_axis + y_axis - z_axis); - aabb.expand_to(origin + x_axis - y_axis + z_axis); - aabb.expand_to(origin + x_axis - y_axis - z_axis); - aabb.expand_to(origin - x_axis + y_axis + z_axis); - aabb.expand_to(origin - x_axis + y_axis - z_axis); - aabb.expand_to(origin - x_axis - y_axis + z_axis); - aabb.expand_to(origin - x_axis - y_axis - z_axis); - - _add_item(aabb, ITEM_TYPE_REFLECTION_PROBE, refprobe_count); - - refprobe_count++; - } - - _FORCE_INLINE_ void add_decal(const Transform &p_transform, const Vector3 &p_half_extents) { - if (unlikely(decal_count == decal_max)) { - decal_max = nearest_power_of_2_templated(decal_max + 1); - decals = (OrientedBoxData *)memrealloc(decals, sizeof(OrientedBoxData) * decal_max); - } - - Transform xform = view_xform * p_transform; - - OrientedBoxData &dc = decals[decal_count]; - - Vector3 origin = xform.origin; - dc.position[0] = origin.x; - dc.position[1] = origin.y; - dc.position[2] = origin.z; - - Vector3 x_axis = xform.basis.get_axis(0) * p_half_extents.x; - dc.x_axis[0] = x_axis.x; - dc.x_axis[1] = x_axis.y; - dc.x_axis[2] = x_axis.z; - - Vector3 y_axis = xform.basis.get_axis(1) * p_half_extents.y; - dc.y_axis[0] = y_axis.x; - dc.y_axis[1] = y_axis.y; - dc.y_axis[2] = y_axis.z; - - Vector3 z_axis = xform.basis.get_axis(2) * p_half_extents.z; - dc.z_axis[0] = z_axis.x; - dc.z_axis[1] = z_axis.y; - dc.z_axis[2] = z_axis.z; - - AABB aabb; - - aabb.position = origin + x_axis + y_axis + z_axis; - aabb.expand_to(origin + x_axis + y_axis - z_axis); - aabb.expand_to(origin + x_axis - y_axis + z_axis); - aabb.expand_to(origin + x_axis - y_axis - z_axis); - aabb.expand_to(origin - x_axis + y_axis + z_axis); - aabb.expand_to(origin - x_axis + y_axis - z_axis); - aabb.expand_to(origin - x_axis - y_axis + z_axis); - aabb.expand_to(origin - x_axis - y_axis - z_axis); - - _add_item(aabb, ITEM_TYPE_DECAL, decal_count); - - decal_count++; - } - - void bake_cluster(); - - void setup(uint32_t p_width, uint32_t p_height, uint32_t p_depth); - - RID get_cluster_texture() const; - RID get_cluster_indices_buffer() const; - - LightClusterBuilder(); - ~LightClusterBuilder(); -}; - -#endif // LIGHT_CLUSTER_BUILDER_H diff --git a/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp b/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp index 74556f8105..f3b09399f9 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp @@ -1071,7 +1071,7 @@ void RendererSceneRenderForward::_render_list_with_threads(RenderListParameters } } -void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2 &p_screen_pixel_size, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers, bool p_pancake_shadows) { +void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers, bool p_pancake_shadows) { //CameraMatrix projection = p_cam_projection; //projection.flip_y(); // Vulkan and modern APIs use Y-Down CameraMatrix correction; @@ -1099,8 +1099,18 @@ void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_ren scene_state.ubo.penumbra_shadow_samples = penumbra_shadow_samples_get(); scene_state.ubo.soft_shadow_samples = soft_shadow_samples_get(); - scene_state.ubo.screen_pixel_size[0] = p_screen_pixel_size.x; - scene_state.ubo.screen_pixel_size[1] = p_screen_pixel_size.y; + Size2 screen_pixel_size = Vector2(1.0, 1.0) / Size2(p_screen_size); + scene_state.ubo.screen_pixel_size[0] = screen_pixel_size.x; + scene_state.ubo.screen_pixel_size[1] = screen_pixel_size.y; + + scene_state.ubo.cluster_shift = get_shift_from_power_of_2(p_cluster_size); + scene_state.ubo.max_cluster_element_count_div_32 = p_max_cluster_elements / 32; + { + uint32_t cluster_screen_width = (p_screen_size.width - 1) / p_cluster_size + 1; + uint32_t cluster_screen_height = (p_screen_size.height - 1) / p_cluster_size + 1; + scene_state.ubo.cluster_type_size = cluster_screen_width * cluster_screen_height * (scene_state.ubo.max_cluster_element_count_div_32 + 32); + scene_state.ubo.cluster_width = cluster_screen_width; + } if (p_shadow_atlas.is_valid()) { Vector2 sas = shadow_atlas_get_size(p_shadow_atlas); @@ -1489,7 +1499,7 @@ void RendererSceneRenderForward::_setup_lightmaps(const PagedArray<RID> &p_light } } -void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_screen_lod_threshold) { +void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_screen_lod_threshold) { RenderBufferDataForward *render_buffer = nullptr; if (p_render_buffer.is_valid()) { render_buffer = (RenderBufferDataForward *)render_buffers_get_data(p_render_buffer); @@ -1522,7 +1532,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf scene_state.ubo.viewport_size[1] = vp_he.y; scene_state.ubo.directional_light_count = p_directional_light_count; - Size2 screen_pixel_size; Size2i screen_size; RID opaque_framebuffer; RID opaque_specular_framebuffer; @@ -1537,8 +1546,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf bool using_giprobe = false; if (render_buffer) { - screen_pixel_size.width = 1.0 / render_buffer->width; - screen_pixel_size.height = 1.0 / render_buffer->height; screen_size.x = render_buffer->width; screen_size.y = render_buffer->height; @@ -1595,8 +1602,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf alpha_framebuffer = opaque_framebuffer; } else if (p_reflection_probe.is_valid()) { uint32_t resolution = reflection_probe_instance_get_resolution(p_reflection_probe); - screen_pixel_size.width = 1.0 / resolution; - screen_pixel_size.height = 1.0 / resolution; screen_size.x = resolution; screen_size.y = resolution; @@ -1613,7 +1618,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf _setup_lightmaps(p_lightmaps, p_cam_transform); _setup_giprobes(p_gi_probes); - _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); + _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); _update_render_base_uniform_set(); //may have changed due to the above (light buffer enlarged, as an example) @@ -1703,7 +1708,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf bool debug_giprobes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_ALBEDO || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_LIGHTING || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_EMISSION; bool debug_sdfgi_probes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_SDFGI_PROBES; - bool depth_pre_pass = !low_end && depth_framebuffer.is_valid(); bool using_ssao = depth_pre_pass && p_render_buffer.is_valid() && p_environment.is_valid() && environment_is_ssao_enabled(p_environment); @@ -1711,7 +1715,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf if (depth_pre_pass) { //depth pre pass RENDER_TIMESTAMP("Render Depth Pre-Pass"); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); bool finish_depth = using_ssao || using_sdfgi || using_giprobe; RenderListParameters render_list_params(render_list.elements, render_list.element_count, false, depth_pass_mode, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); @@ -1738,11 +1742,11 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf _process_gi(p_render_buffer, render_buffer->normal_roughness_buffer, render_buffer->ambient_buffer, render_buffer->reflection_buffer, render_buffer->giprobe_buffer, p_environment, p_cam_projection, p_cam_transform, p_gi_probes); } - _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), p_render_buffer.is_valid()); + _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), p_render_buffer.is_valid()); RENDER_TIMESTAMP("Render Opaque Pass"); - RID rp_uniform_set = _setup_render_pass_uniform_set(p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_gi_probes, p_lightmaps); + RID rp_uniform_set = _setup_render_pass_uniform_set(p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_cluster_buffer, p_gi_probes, p_lightmaps); bool can_continue_color = !scene_state.used_screen_texture && !using_ssr && !using_sss; bool can_continue_depth = !scene_state.used_depth_texture && !using_ssr && !using_sss; @@ -1844,7 +1848,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf RENDER_TIMESTAMP("Render Transparent Pass"); - _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); + _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); render_list.sort_by_reverse_depth_and_priority(true); @@ -1867,7 +1871,7 @@ void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, const PagedAr scene_state.ubo.dual_paraboloid_side = p_use_dp_flip ? -1 : 1; - _setup_environment(RID(), RID(), p_projection, p_transform, RID(), true, Vector2(1, 1), RID(), true, Color(), 0, p_zfar, false, p_use_pancake); + _setup_environment(RID(), RID(), p_projection, p_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), true, Color(), 0, p_zfar, false, p_use_pancake); if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_DISABLE_LOD) { p_screen_lod_threshold = 0.0; @@ -1877,7 +1881,7 @@ void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, const PagedAr _fill_render_list(p_instances, pass_mode, p_projection, p_transform); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); RENDER_TIMESTAMP("Render Shadow"); @@ -1899,13 +1903,13 @@ void RendererSceneRenderForward::_render_particle_collider_heightfield(RID p_fb, scene_state.ubo.dual_paraboloid_side = 0; - _setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), RID(), true, Color(), 0, p_cam_projection.get_z_far(), false, false); + _setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), true, Color(), 0, p_cam_projection.get_z_far(), false, false); PassMode pass_mode = PASS_MODE_SHADOW; _fill_render_list(p_instances, pass_mode, p_cam_projection, p_cam_transform); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); RENDER_TIMESTAMP("Render Collider Heightield"); @@ -1928,12 +1932,12 @@ void RendererSceneRenderForward::_render_material(const Transform &p_cam_transfo scene_state.ubo.dual_paraboloid_side = 0; scene_state.ubo.material_uv2_mode = true; - _setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), RID(), false, Color(), 0, 0); + _setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0); PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL; _fill_render_list(p_instances, pass_mode, p_cam_projection, p_cam_transform); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); RENDER_TIMESTAMP("Render Material"); @@ -1964,12 +1968,12 @@ void RendererSceneRenderForward::_render_uv2(const PagedArray<GeometryInstance * scene_state.ubo.dual_paraboloid_side = 0; scene_state.ubo.material_uv2_mode = true; - _setup_environment(RID(), RID(), CameraMatrix(), Transform(), RID(), true, Vector2(1, 1), RID(), false, Color(), 0, 0); + _setup_environment(RID(), RID(), CameraMatrix(), Transform(), RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0); PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL; _fill_render_list(p_instances, pass_mode, CameraMatrix(), Transform()); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); RENDER_TIMESTAMP("Render Material"); @@ -2079,7 +2083,7 @@ void RendererSceneRenderForward::_render_sdfgi(RID p_render_buffers, const Vecto RendererStorageRD::store_transform(to_bounds.affine_inverse() * cam_xform, scene_state.ubo.sdf_to_bounds); - _setup_environment(RID(), RID(), camera_proj, cam_xform, RID(), true, Vector2(1, 1), RID(), false, Color(), 0, 0); + _setup_environment(RID(), RID(), camera_proj, cam_xform, RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0); Map<Size2i, RID>::Element *E = sdfgi_framebuffer_size_cache.find(fb_size); if (!E) { @@ -2150,20 +2154,27 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { RD::Uniform u; u.binding = 5; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.ids.push_back(get_positional_light_buffer()); + u.ids.push_back(get_omni_light_buffer()); uniforms.push_back(u); } - { RD::Uniform u; u.binding = 6; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.ids.push_back(get_reflection_probe_buffer()); + u.ids.push_back(get_spot_light_buffer()); uniforms.push_back(u); } + { RD::Uniform u; u.binding = 7; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(get_reflection_probe_buffer()); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 8; u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; u.ids.push_back(get_directional_light_buffer()); uniforms.push_back(u); @@ -2210,21 +2221,6 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { RD::Uniform u; u.binding = 15; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.ids.push_back(get_cluster_builder_texture()); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 16; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.ids.push_back(get_cluster_builder_indices_buffer()); - uniforms.push_back(u); - } - - { - RD::Uniform u; - u.binding = 17; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; if (directional_shadow_get_texture().is_valid()) { u.ids.push_back(directional_shadow_get_texture()); } else { @@ -2236,7 +2232,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 18; + u.binding = 16; u.ids.push_back(storage->global_variables_get_storage_buffer()); uniforms.push_back(u); } @@ -2244,7 +2240,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { if (!low_end) { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.binding = 19; + u.binding = 17; u.ids.push_back(sdfgi_get_ubo()); uniforms.push_back(u); } @@ -2253,7 +2249,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { } } -RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps) { +RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps) { if (render_pass_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(render_pass_uniform_set)) { RD::get_singleton()->free(render_pass_uniform_set); } @@ -2351,6 +2347,15 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; u.binding = 5; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + RID cb = p_cluster_buffer.is_valid() ? p_cluster_buffer : default_vec4_xform_buffer; + u.ids.push_back(cb); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.binding = 6; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID texture = (false && rb && rb->depth.is_valid()) ? rb->depth : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE); u.ids.push_back(texture); @@ -2358,17 +2363,18 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 6; + u.binding = 7; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID bbt = rb ? render_buffers_get_back_buffer_texture(p_render_buffers) : RID(); RID texture = bbt.is_valid() ? bbt : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); u.ids.push_back(texture); uniforms.push_back(u); } + if (!low_end) { { RD::Uniform u; - u.binding = 7; + u.binding = 8; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID texture = rb && rb->normal_roughness_buffer.is_valid() ? rb->normal_roughness_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_NORMAL); u.ids.push_back(texture); @@ -2377,7 +2383,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 8; + u.binding = 9; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID aot = rb ? render_buffers_get_ao_texture(p_render_buffers) : RID(); RID texture = aot.is_valid() ? aot : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); @@ -2387,7 +2393,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 9; + u.binding = 10; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID texture = rb && rb->ambient_buffer.is_valid() ? rb->ambient_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); u.ids.push_back(texture); @@ -2396,7 +2402,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 10; + u.binding = 11; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID texture = rb && rb->reflection_buffer.is_valid() ? rb->reflection_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); u.ids.push_back(texture); @@ -2404,7 +2410,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 11; + u.binding = 12; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID t; if (rb && render_buffers_is_sdfgi_enabled(p_render_buffers)) { @@ -2417,7 +2423,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 12; + u.binding = 13; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; if (rb && render_buffers_is_sdfgi_enabled(p_render_buffers)) { u.ids.push_back(render_buffers_get_sdfgi_occlusion_texture(p_render_buffers)); @@ -2428,14 +2434,14 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 13; + u.binding = 14; u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; u.ids.push_back(rb ? render_buffers_get_gi_probe_buffer(p_render_buffers) : render_buffers_get_default_gi_probe_buffer()); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 14; + u.binding = 15; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID vfog = RID(); if (rb && render_buffers_has_volumetric_fog(p_render_buffers)) { @@ -2519,33 +2525,43 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed uniforms.push_back(u); } + + { + RD::Uniform u; + u.binding = 5; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + RID cb = default_vec4_xform_buffer; + u.ids.push_back(cb); + uniforms.push_back(u); + } + // actual sdfgi stuff { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 5; + u.binding = 6; u.ids.push_back(p_albedo_texture); uniforms.push_back(u); } { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 6; + u.binding = 7; u.ids.push_back(p_emission_texture); uniforms.push_back(u); } { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 7; + u.binding = 8; u.ids.push_back(p_emission_aniso_texture); uniforms.push_back(u); } { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 8; + u.binding = 9; u.ids.push_back(p_geom_facing_texture); uniforms.push_back(u); } diff --git a/servers/rendering/renderer_rd/renderer_scene_render_forward.h b/servers/rendering/renderer_rd/renderer_scene_render_forward.h index 3b5a5ad96f..d4a4c9a3a9 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_forward.h +++ b/servers/rendering/renderer_rd/renderer_scene_render_forward.h @@ -263,7 +263,7 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { void _update_render_base_uniform_set(); RID _setup_sdfgi_render_pass_uniform_set(RID p_albedo_texture, RID p_emission_texture, RID p_emission_aniso_texture, RID p_geom_facing_texture); - RID _setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps); + RID _setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps); struct LightmapData { float normal_xform[12]; @@ -300,6 +300,11 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { float viewport_size[2]; float screen_pixel_size[2]; + uint32_t cluster_shift; + uint32_t cluster_width; + uint32_t cluster_type_size; + uint32_t max_cluster_element_count_div_32; + float directional_penumbra_shadow_kernel[128]; //32 vec4s float directional_soft_shadow_kernel[128]; float penumbra_shadow_kernel[128]; @@ -421,7 +426,7 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { PASS_MODE_SDF, }; - void _setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2 &p_screen_pixel_size, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false); + void _setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false); void _setup_giprobes(const PagedArray<RID> &p_giprobes); void _setup_lightmaps(const PagedArray<RID> &p_lightmaps, const Transform &p_cam_transform); @@ -701,7 +706,7 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { RenderList render_list; protected: - virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_lod_threshold); + virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_lod_threshold); virtual void _render_shadow(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0); virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region); virtual void _render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region); diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp index 3061511c6d..2e457c2ce6 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp @@ -1514,7 +1514,9 @@ void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_rough push_constant.proj_info[3] = (1.0f + p_projection.matrix[1][2]) / p_projection.matrix[1][1]; push_constant.max_giprobes = MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()); push_constant.high_quality_vct = gi_probe_quality == RS::GI_PROBE_QUALITY_HIGH; - push_constant.use_sdfgi = rb->sdfgi != nullptr; + + bool use_sdfgi = rb->sdfgi != nullptr; + bool use_giprobes = push_constant.max_giprobes > 0; if (env) { push_constant.ao_color[0] = env->ao_color.r; @@ -1765,8 +1767,9 @@ void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_rough rb->gi_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi.shader.version_get_shader(gi.shader_version, 0), 0); } + GI::Mode mode = (use_sdfgi && use_giprobes) ? GI::MODE_COMBINED : (use_sdfgi ? GI::MODE_SDFGI : GI::MODE_GIPROBE); RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi.pipelines[0]); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi.pipelines[mode]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->gi_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(GI::PushConstant)); RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width, rb->height, 1, 8, 8, 1); @@ -3233,6 +3236,10 @@ RID RendererSceneRenderRD::reflection_atlas_create() { ra.count = GLOBAL_GET("rendering/quality/reflection_atlas/reflection_count"); ra.size = GLOBAL_GET("rendering/quality/reflection_atlas/reflection_size"); + ra.cluster_builder = memnew(ClusterBuilderRD); + ra.cluster_builder->set_shared(&cluster_builder_shared); + ra.cluster_builder->setup(Size2i(ra.size, ra.size), max_cluster_elements, RID(), RID(), RID()); + return reflection_atlas_owner.make_rid(ra); } @@ -3244,6 +3251,8 @@ void RendererSceneRenderRD::reflection_atlas_set_size(RID p_ref_atlas, int p_ref return; //no changes } + ra->cluster_builder->setup(Size2i(ra->size, ra->size), max_cluster_elements, RID(), RID(), RID()); + ra->size = p_reflection_size; ra->count = p_reflection_count; @@ -3253,7 +3262,6 @@ void RendererSceneRenderRD::reflection_atlas_set_size(RID p_ref_atlas, int p_ref ra->reflection = RID(); RD::get_singleton()->free(ra->depth_buffer); ra->depth_buffer = RID(); - for (int i = 0; i < ra->reflections.size(); i++) { _clear_reflection_data(ra->reflections.write[i].data); if (ra->reflections[i].owner.is_null()) { @@ -5884,6 +5892,11 @@ void RendererSceneRenderRD::render_buffers_configure(RID p_render_buffers, RID p rb->msaa = p_msaa; rb->screen_space_aa = p_screen_space_aa; rb->use_debanding = p_use_debanding; + if (rb->cluster_builder == nullptr) { + rb->cluster_builder = memnew(ClusterBuilderRD); + } + rb->cluster_builder->set_shared(&cluster_builder_shared); + _free_render_buffer_data(rb); { @@ -5924,6 +5937,8 @@ void RendererSceneRenderRD::render_buffers_configure(RID p_render_buffers, RID p rb->data->configure(rb->texture, rb->depth_texture, p_width, p_height, p_msaa); _render_buffers_uniform_set_changed(p_render_buffers); + + rb->cluster_builder->setup(Size2i(p_width, p_height), max_cluster_elements, rb->depth_texture, storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED), rb->texture); } void RendererSceneRenderRD::sub_surface_scattering_set_quality(RS::SubSurfaceScatteringQuality p_quality) { @@ -6034,17 +6049,34 @@ RendererSceneRenderRD::RenderBufferData *RendererSceneRenderRD::render_buffers_g } void RendererSceneRenderRD::_setup_reflections(const PagedArray<RID> &p_reflections, const Transform &p_camera_inverse_transform, RID p_environment) { + cluster.reflection_count = 0; + for (uint32_t i = 0; i < (uint32_t)p_reflections.size(); i++) { - RID rpi = p_reflections[i]; + if (cluster.reflection_count == cluster.max_reflections) { + break; + } - if (i >= cluster.max_reflections) { - reflection_probe_instance_set_render_index(rpi, 0); //invalid, but something needs to be set + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_reflections[i]); + if (!rpi) { continue; } - reflection_probe_instance_set_render_index(rpi, i); + cluster.reflection_sort[cluster.reflection_count].instance = rpi; + cluster.reflection_sort[cluster.reflection_count].depth = -p_camera_inverse_transform.xform(rpi->transform.origin).z; + cluster.reflection_count++; + } + + if (cluster.reflection_count > 0) { + SortArray<Cluster::InstanceSort<ReflectionProbeInstance>> sort_array; + sort_array.sort(cluster.reflection_sort, cluster.reflection_count); + } + + for (uint32_t i = 0; i < cluster.reflection_count; i++) { + ReflectionProbeInstance *rpi = cluster.reflection_sort[i].instance; + + rpi->render_index = i; - RID base_probe = reflection_probe_instance_get_probe(rpi); + RID base_probe = rpi->probe; Cluster::ReflectionData &reflection_ubo = cluster.reflections[i]; @@ -6053,7 +6085,7 @@ void RendererSceneRenderRD::_setup_reflections(const PagedArray<RID> &p_reflecti reflection_ubo.box_extents[0] = extents.x; reflection_ubo.box_extents[1] = extents.y; reflection_ubo.box_extents[2] = extents.z; - reflection_ubo.index = reflection_probe_instance_get_atlas_index(rpi); + reflection_ubo.index = rpi->atlas_index; Vector3 origin_offset = storage->reflection_probe_get_origin_offset(base_probe); @@ -6074,29 +6106,38 @@ void RendererSceneRenderRD::_setup_reflections(const PagedArray<RID> &p_reflecti reflection_ubo.ambient[1] = ambient_linear.g * interior_ambient_energy; reflection_ubo.ambient[2] = ambient_linear.b * interior_ambient_energy; - Transform transform = reflection_probe_instance_get_transform(rpi); + Transform transform = rpi->transform; Transform proj = (p_camera_inverse_transform * transform).inverse(); RendererStorageRD::store_transform(proj, reflection_ubo.local_matrix); - cluster.builder.add_reflection_probe(transform, extents); + current_cluster_builder->add_box(ClusterBuilderRD::BOX_TYPE_REFLECTION_PROBE, transform, extents); - reflection_probe_instance_set_render_pass(rpi, RSG::rasterizer->get_frame_number()); + rpi->last_pass = RSG::rasterizer->get_frame_number(); } - if (p_reflections.size()) { - RD::get_singleton()->buffer_update(cluster.reflection_buffer, 0, MIN(cluster.max_reflections, (unsigned int)p_reflections.size()) * sizeof(ReflectionData), cluster.reflections, true); + if (cluster.reflection_count) { + RD::get_singleton()->buffer_update(cluster.reflection_buffer, 0, cluster.reflection_count * sizeof(ReflectionData), cluster.reflections, true); } } -void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const Transform &p_camera_inverse_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count) { - uint32_t light_count = 0; +void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const Transform &p_camera_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count) { + Transform inverse_transform = p_camera_transform.affine_inverse(); + r_directional_light_count = 0; r_positional_light_count = 0; sky_scene_state.ubo.directional_light_count = 0; + Plane camera_plane(p_camera_transform.origin, -p_camera_transform.basis.get_axis(Vector3::AXIS_Z).normalized()); + + cluster.omni_light_count = 0; + cluster.spot_light_count = 0; + for (int i = 0; i < (int)p_lights.size(); i++) { - RID li = p_lights[i]; - RID base = light_instance_get_base_light(li); + LightInstance *li = light_instance_owner.getornull(p_lights[i]); + if (!li) { + continue; + } + RID base = li->light; ERR_CONTINUE(base.is_null()); @@ -6106,7 +6147,7 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const // Copy to SkyDirectionalLightData if (r_directional_light_count < sky_scene_state.max_directional_lights) { SkyDirectionalLightData &sky_light_data = sky_scene_state.directional_lights[r_directional_light_count]; - Transform light_transform = light_instance_get_base_transform(li); + Transform light_transform = li->transform; Vector3 world_direction = light_transform.basis.xform(Vector3(0, 0, 1)).normalized(); sky_light_data.direction[0] = world_direction.x; @@ -6142,9 +6183,9 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const Cluster::DirectionalLightData &light_data = cluster.directional_lights[r_directional_light_count]; - Transform light_transform = light_instance_get_base_transform(li); + Transform light_transform = li->transform; - Vector3 direction = p_camera_inverse_transform.basis.xform(light_transform.basis.xform(Vector3(0, 0, 1))).normalized(); + Vector3 direction = inverse_transform.basis.xform(light_transform.basis.xform(Vector3(0, 0, 1))).normalized(); light_data.direction[0] = direction.x; light_data.direction[1] = direction.y; @@ -6223,28 +6264,28 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const int limit = smode == RS::LIGHT_DIRECTIONAL_SHADOW_ORTHOGONAL ? 0 : (smode == RS::LIGHT_DIRECTIONAL_SHADOW_PARALLEL_2_SPLITS ? 1 : 3); light_data.blend_splits = storage->light_directional_get_blend_splits(base); for (int j = 0; j < 4; j++) { - Rect2 atlas_rect = light_instance_get_directional_shadow_atlas_rect(li, j); - CameraMatrix matrix = light_instance_get_shadow_camera(li, j); - float split = light_instance_get_directional_shadow_split(li, MIN(limit, j)); + Rect2 atlas_rect = li->shadow_transform[j].atlas_rect; + CameraMatrix matrix = li->shadow_transform[j].camera; + float split = li->shadow_transform[MIN(limit, j)].split; CameraMatrix bias; bias.set_light_bias(); CameraMatrix rectm; rectm.set_light_atlas_rect(atlas_rect); - Transform modelview = (p_camera_inverse_transform * light_instance_get_shadow_transform(li, j)).inverse(); + Transform modelview = (inverse_transform * li->shadow_transform[j].transform).inverse(); CameraMatrix shadow_mtx = rectm * bias * matrix * modelview; light_data.shadow_split_offsets[j] = split; - float bias_scale = light_instance_get_shadow_bias_scale(li, j); + float bias_scale = li->shadow_transform[j].bias_scale; light_data.shadow_bias[j] = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * bias_scale; - light_data.shadow_normal_bias[j] = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * light_instance_get_directional_shadow_texel_size(li, j); + light_data.shadow_normal_bias[j] = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * li->shadow_transform[j].shadow_texel_size; light_data.shadow_transmittance_bias[j] = storage->light_get_transmittance_bias(base) * bias_scale; - light_data.shadow_z_range[j] = light_instance_get_shadow_range(li, j); - light_data.shadow_range_begin[j] = light_instance_get_shadow_range_begin(li, j); + light_data.shadow_z_range[j] = li->shadow_transform[j].farplane; + light_data.shadow_range_begin[j] = li->shadow_transform[j].range_begin; RendererStorageRD::store_camera(shadow_mtx, light_data.shadow_matrices[j]); - Vector2 uv_scale = light_instance_get_shadow_uv_scale(li, j); + Vector2 uv_scale = li->shadow_transform[j].uv_scale; uv_scale *= atlas_rect.size; //adapt to atlas size switch (j) { case 0: { @@ -6281,162 +6322,198 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const r_directional_light_count++; } break; - case RS::LIGHT_SPOT: case RS::LIGHT_OMNI: { - if (light_count >= cluster.max_lights) { + if (cluster.omni_light_count >= cluster.max_lights) { continue; } - Transform light_transform = light_instance_get_base_transform(li); + cluster.omni_light_sort[cluster.omni_light_count].instance = li; + cluster.omni_light_sort[cluster.omni_light_count].depth = camera_plane.distance_to(li->transform.origin); + cluster.omni_light_count++; + } break; + case RS::LIGHT_SPOT: { + if (cluster.spot_light_count >= cluster.max_lights) { + continue; + } - Cluster::LightData &light_data = cluster.lights[light_count]; - cluster.lights_instances[light_count] = li; + cluster.spot_light_sort[cluster.spot_light_count].instance = li; + cluster.spot_light_sort[cluster.spot_light_count].depth = camera_plane.distance_to(li->transform.origin); + cluster.spot_light_count++; + } break; + } - float sign = storage->light_is_negative(base) ? -1 : 1; - Color linear_col = storage->light_get_color(base).to_linear(); + li->last_pass = RSG::rasterizer->get_frame_number(); + } - light_data.attenuation = storage->light_get_param(base, RS::LIGHT_PARAM_ATTENUATION); + if (cluster.omni_light_count) { + SortArray<Cluster::InstanceSort<LightInstance>> sorter; + sorter.sort(cluster.omni_light_sort, cluster.omni_light_count); + } - float energy = sign * storage->light_get_param(base, RS::LIGHT_PARAM_ENERGY) * Math_PI; + if (cluster.spot_light_count) { + SortArray<Cluster::InstanceSort<LightInstance>> sorter; + sorter.sort(cluster.spot_light_sort, cluster.spot_light_count); + } - light_data.color[0] = linear_col.r * energy; - light_data.color[1] = linear_col.g * energy; - light_data.color[2] = linear_col.b * energy; - light_data.specular_amount = storage->light_get_param(base, RS::LIGHT_PARAM_SPECULAR); + ShadowAtlas *shadow_atlas = nullptr; - float radius = MAX(0.001, storage->light_get_param(base, RS::LIGHT_PARAM_RANGE)); - light_data.inv_radius = 1.0 / radius; + if (p_shadow_atlas.is_valid() && p_using_shadows) { + shadow_atlas = shadow_atlas_owner.getornull(p_shadow_atlas); + } - Vector3 pos = p_camera_inverse_transform.xform(light_transform.origin); + for (uint32_t i = 0; i < (cluster.omni_light_count + cluster.spot_light_count); i++) { + uint32_t index = (i < cluster.omni_light_count) ? i : i - (cluster.omni_light_count); + Cluster::LightData &light_data = (i < cluster.omni_light_count) ? cluster.omni_lights[index] : cluster.spot_lights[index]; + RS::LightType type = (i < cluster.omni_light_count) ? RS::LIGHT_OMNI : RS::LIGHT_SPOT; + LightInstance *li = (i < cluster.omni_light_count) ? cluster.omni_light_sort[index].instance : cluster.spot_light_sort[index].instance; + RID base = li->light; - light_data.position[0] = pos.x; - light_data.position[1] = pos.y; - light_data.position[2] = pos.z; + cluster.lights_instances[i] = li->self; - Vector3 direction = p_camera_inverse_transform.basis.xform(light_transform.basis.xform(Vector3(0, 0, -1))).normalized(); + Transform light_transform = li->transform; - light_data.direction[0] = direction.x; - light_data.direction[1] = direction.y; - light_data.direction[2] = direction.z; + float sign = storage->light_is_negative(base) ? -1 : 1; + Color linear_col = storage->light_get_color(base).to_linear(); - float size = storage->light_get_param(base, RS::LIGHT_PARAM_SIZE); + light_data.attenuation = storage->light_get_param(base, RS::LIGHT_PARAM_ATTENUATION); - light_data.size = size; + float energy = sign * storage->light_get_param(base, RS::LIGHT_PARAM_ENERGY) * Math_PI; - light_data.cone_attenuation = storage->light_get_param(base, RS::LIGHT_PARAM_SPOT_ATTENUATION); - float spot_angle = storage->light_get_param(base, RS::LIGHT_PARAM_SPOT_ANGLE); - light_data.cone_angle = Math::cos(Math::deg2rad(spot_angle)); + light_data.color[0] = linear_col.r * energy; + light_data.color[1] = linear_col.g * energy; + light_data.color[2] = linear_col.b * energy; + light_data.specular_amount = storage->light_get_param(base, RS::LIGHT_PARAM_SPECULAR) * 2.0; - light_data.mask = storage->light_get_cull_mask(base); + float radius = MAX(0.001, storage->light_get_param(base, RS::LIGHT_PARAM_RANGE)); + light_data.inv_radius = 1.0 / radius; - light_data.atlas_rect[0] = 0; - light_data.atlas_rect[1] = 0; - light_data.atlas_rect[2] = 0; - light_data.atlas_rect[3] = 0; + Vector3 pos = inverse_transform.xform(light_transform.origin); - RID projector = storage->light_get_projector(base); + light_data.position[0] = pos.x; + light_data.position[1] = pos.y; + light_data.position[2] = pos.z; - if (projector.is_valid()) { - Rect2 rect = storage->decal_atlas_get_texture_rect(projector); + Vector3 direction = inverse_transform.basis.xform(light_transform.basis.xform(Vector3(0, 0, -1))).normalized(); - if (type == RS::LIGHT_SPOT) { - light_data.projector_rect[0] = rect.position.x; - light_data.projector_rect[1] = rect.position.y + rect.size.height; //flip because shadow is flipped - light_data.projector_rect[2] = rect.size.width; - light_data.projector_rect[3] = -rect.size.height; - } else { - light_data.projector_rect[0] = rect.position.x; - light_data.projector_rect[1] = rect.position.y; - light_data.projector_rect[2] = rect.size.width; - light_data.projector_rect[3] = rect.size.height * 0.5; //used by dp, so needs to be half - } - } else { - light_data.projector_rect[0] = 0; - light_data.projector_rect[1] = 0; - light_data.projector_rect[2] = 0; - light_data.projector_rect[3] = 0; - } + light_data.direction[0] = direction.x; + light_data.direction[1] = direction.y; + light_data.direction[2] = direction.z; - if (p_using_shadows && p_shadow_atlas.is_valid() && shadow_atlas_owns_light_instance(p_shadow_atlas, li)) { - // fill in the shadow information + float size = storage->light_get_param(base, RS::LIGHT_PARAM_SIZE); - light_data.shadow_enabled = true; + light_data.size = size; - if (type == RS::LIGHT_SPOT) { - light_data.shadow_bias = (storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * radius / 10.0); - float shadow_texel_size = Math::tan(Math::deg2rad(spot_angle)) * radius * 2.0; - shadow_texel_size *= light_instance_get_shadow_texel_size(li, p_shadow_atlas); + light_data.cone_attenuation = storage->light_get_param(base, RS::LIGHT_PARAM_SPOT_ATTENUATION); + float spot_angle = storage->light_get_param(base, RS::LIGHT_PARAM_SPOT_ANGLE); + light_data.cone_angle = Math::cos(Math::deg2rad(spot_angle)); - light_data.shadow_normal_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * shadow_texel_size; + light_data.mask = storage->light_get_cull_mask(base); - } else { //omni - light_data.shadow_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * radius / 10.0; - float shadow_texel_size = light_instance_get_shadow_texel_size(li, p_shadow_atlas); - light_data.shadow_normal_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * shadow_texel_size * 2.0; // applied in -1 .. 1 space - } + light_data.atlas_rect[0] = 0; + light_data.atlas_rect[1] = 0; + light_data.atlas_rect[2] = 0; + light_data.atlas_rect[3] = 0; - light_data.transmittance_bias = storage->light_get_transmittance_bias(base); + RID projector = storage->light_get_projector(base); - Rect2 rect = light_instance_get_shadow_atlas_rect(li, p_shadow_atlas); + if (projector.is_valid()) { + Rect2 rect = storage->decal_atlas_get_texture_rect(projector); - light_data.atlas_rect[0] = rect.position.x; - light_data.atlas_rect[1] = rect.position.y; - light_data.atlas_rect[2] = rect.size.width; - light_data.atlas_rect[3] = rect.size.height; + if (type == RS::LIGHT_SPOT) { + light_data.projector_rect[0] = rect.position.x; + light_data.projector_rect[1] = rect.position.y + rect.size.height; //flip because shadow is flipped + light_data.projector_rect[2] = rect.size.width; + light_data.projector_rect[3] = -rect.size.height; + } else { + light_data.projector_rect[0] = rect.position.x; + light_data.projector_rect[1] = rect.position.y; + light_data.projector_rect[2] = rect.size.width; + light_data.projector_rect[3] = rect.size.height * 0.5; //used by dp, so needs to be half + } + } else { + light_data.projector_rect[0] = 0; + light_data.projector_rect[1] = 0; + light_data.projector_rect[2] = 0; + light_data.projector_rect[3] = 0; + } - light_data.soft_shadow_scale = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BLUR); - light_data.shadow_volumetric_fog_fade = 1.0 / storage->light_get_shadow_volumetric_fog_fade(base); + if (shadow_atlas && shadow_atlas->shadow_owners.has(li->self)) { + // fill in the shadow information - if (type == RS::LIGHT_OMNI) { - light_data.atlas_rect[3] *= 0.5; //one paraboloid on top of another - Transform proj = (p_camera_inverse_transform * light_transform).inverse(); + light_data.shadow_enabled = true; - RendererStorageRD::store_transform(proj, light_data.shadow_matrix); + if (type == RS::LIGHT_SPOT) { + light_data.shadow_bias = (storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * radius / 10.0); + float shadow_texel_size = Math::tan(Math::deg2rad(spot_angle)) * radius * 2.0; + shadow_texel_size *= light_instance_get_shadow_texel_size(li->self, p_shadow_atlas); - if (size > 0.0) { - light_data.soft_shadow_size = size; - } else { - light_data.soft_shadow_size = 0.0; - light_data.soft_shadow_scale *= shadows_quality_radius_get(); // Only use quality radius for PCF - } + light_data.shadow_normal_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * shadow_texel_size; - } else if (type == RS::LIGHT_SPOT) { - Transform modelview = (p_camera_inverse_transform * light_transform).inverse(); - CameraMatrix bias; - bias.set_light_bias(); + } else { //omni + light_data.shadow_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * radius / 10.0; + float shadow_texel_size = light_instance_get_shadow_texel_size(li->self, p_shadow_atlas); + light_data.shadow_normal_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * shadow_texel_size * 2.0; // applied in -1 .. 1 space + } - CameraMatrix shadow_mtx = bias * light_instance_get_shadow_camera(li, 0) * modelview; - RendererStorageRD::store_camera(shadow_mtx, light_data.shadow_matrix); + light_data.transmittance_bias = storage->light_get_transmittance_bias(base); - if (size > 0.0) { - CameraMatrix cm = light_instance_get_shadow_camera(li, 0); - float half_np = cm.get_z_near() * Math::tan(Math::deg2rad(spot_angle)); - light_data.soft_shadow_size = (size * 0.5 / radius) / (half_np / cm.get_z_near()) * rect.size.width; - } else { - light_data.soft_shadow_size = 0.0; - light_data.soft_shadow_scale *= shadows_quality_radius_get(); // Only use quality radius for PCF - } - } + Rect2 rect = light_instance_get_shadow_atlas_rect(li->self, p_shadow_atlas); + + light_data.atlas_rect[0] = rect.position.x; + light_data.atlas_rect[1] = rect.position.y; + light_data.atlas_rect[2] = rect.size.width; + light_data.atlas_rect[3] = rect.size.height; + + light_data.soft_shadow_scale = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BLUR); + light_data.shadow_volumetric_fog_fade = 1.0 / storage->light_get_shadow_volumetric_fog_fade(base); + + if (type == RS::LIGHT_OMNI) { + light_data.atlas_rect[3] *= 0.5; //one paraboloid on top of another + Transform proj = (inverse_transform * light_transform).inverse(); + + RendererStorageRD::store_transform(proj, light_data.shadow_matrix); + + if (size > 0.0) { + light_data.soft_shadow_size = size; } else { - light_data.shadow_enabled = false; + light_data.soft_shadow_size = 0.0; + light_data.soft_shadow_scale *= shadows_quality_radius_get(); // Only use quality radius for PCF } - light_instance_set_index(li, light_count); + } else if (type == RS::LIGHT_SPOT) { + Transform modelview = (inverse_transform * light_transform).inverse(); + CameraMatrix bias; + bias.set_light_bias(); - cluster.builder.add_light(type == RS::LIGHT_SPOT ? LightClusterBuilder::LIGHT_TYPE_SPOT : LightClusterBuilder::LIGHT_TYPE_OMNI, light_transform, radius, spot_angle); + CameraMatrix shadow_mtx = bias * li->shadow_transform[0].camera * modelview; + RendererStorageRD::store_camera(shadow_mtx, light_data.shadow_matrix); - light_count++; - r_positional_light_count++; - } break; + if (size > 0.0) { + CameraMatrix cm = li->shadow_transform[0].camera; + float half_np = cm.get_z_near() * Math::tan(Math::deg2rad(spot_angle)); + light_data.soft_shadow_size = (size * 0.5 / radius) / (half_np / cm.get_z_near()) * rect.size.width; + } else { + light_data.soft_shadow_size = 0.0; + light_data.soft_shadow_scale *= shadows_quality_radius_get(); // Only use quality radius for PCF + } + } + } else { + light_data.shadow_enabled = false; } - light_instance_set_render_pass(li, RSG::rasterizer->get_frame_number()); + li->light_index = index; + + current_cluster_builder->add_light(type == RS::LIGHT_SPOT ? ClusterBuilderRD::LIGHT_TYPE_SPOT : ClusterBuilderRD::LIGHT_TYPE_OMNI, light_transform, radius, spot_angle); - //update UBO for forward rendering, blit to texture for clustered + r_positional_light_count++; } - if (light_count) { - RD::get_singleton()->buffer_update(cluster.light_buffer, 0, sizeof(Cluster::LightData) * light_count, cluster.lights, true); + if (cluster.omni_light_count) { + RD::get_singleton()->buffer_update(cluster.omni_light_buffer, 0, sizeof(Cluster::LightData) * cluster.omni_light_count, cluster.omni_lights, true); + } + + if (cluster.spot_light_count) { + RD::get_singleton()->buffer_update(cluster.spot_light_buffer, 0, sizeof(Cluster::LightData) * cluster.spot_light_count, cluster.spot_lights, true); } if (r_directional_light_count) { @@ -6449,18 +6526,26 @@ void RendererSceneRenderRD::_setup_decals(const PagedArray<RID> &p_decals, const uv_xform.basis.scale(Vector3(2.0, 1.0, 2.0)); uv_xform.origin = Vector3(-1.0, 0.0, -1.0); - uint32_t decal_count = MIN((uint32_t)p_decals.size(), cluster.max_decals); - int idx = 0; + uint32_t decal_count = p_decals.size(); + + cluster.decal_count = 0; + for (uint32_t i = 0; i < decal_count; i++) { - RID di = p_decals[i]; - RID decal = decal_instance_get_base(di); + if (cluster.decal_count == cluster.max_decals) { + break; + } + + DecalInstance *di = decal_instance_owner.getornull(p_decals[i]); + if (!di) { + continue; + } + RID decal = di->decal; - Transform xform = decal_instance_get_transform(di); + Transform xform = di->transform; - float fade = 1.0; + real_t distance = -p_camera_inverse_xform.xform(xform.origin).z; if (storage->decal_is_distance_fade_enabled(decal)) { - real_t distance = -p_camera_inverse_xform.xform(xform.origin).z; float fade_begin = storage->decal_get_distance_fade_begin(decal); float fade_length = storage->decal_get_distance_fade_length(decal); @@ -6468,18 +6553,43 @@ void RendererSceneRenderRD::_setup_decals(const PagedArray<RID> &p_decals, const if (distance > fade_begin + fade_length) { continue; // do not use this decal, its invisible } + } + } + cluster.decal_sort[cluster.decal_count].instance = di; + cluster.decal_sort[cluster.decal_count].depth = distance; + cluster.decal_count++; + } + + if (cluster.decal_count > 0) { + SortArray<Cluster::InstanceSort<DecalInstance>> sort_array; + sort_array.sort(cluster.decal_sort, cluster.decal_count); + } + + for (uint32_t i = 0; i < cluster.decal_count; i++) { + DecalInstance *di = cluster.decal_sort[i].instance; + RID decal = di->decal; + + Transform xform = di->transform; + float fade = 1.0; + + if (storage->decal_is_distance_fade_enabled(decal)) { + real_t distance = -p_camera_inverse_xform.xform(xform.origin).z; + float fade_begin = storage->decal_get_distance_fade_begin(decal); + float fade_length = storage->decal_get_distance_fade_length(decal); + + if (distance > fade_begin) { fade = 1.0 - (distance - fade_begin) / fade_length; } } - Cluster::DecalData &dd = cluster.decals[idx]; + Cluster::DecalData &dd = cluster.decals[i]; Vector3 decal_extents = storage->decal_get_extents(decal); Transform scale_xform; scale_xform.basis.scale(Vector3(decal_extents.x, decal_extents.y, decal_extents.z)); - Transform to_decal_xform = (p_camera_inverse_xform * decal_instance_get_transform(di) * scale_xform * uv_xform).affine_inverse(); + Transform to_decal_xform = (p_camera_inverse_xform * di->transform * scale_xform * uv_xform).affine_inverse(); RendererStorageRD::store_transform(to_decal_xform, dd.xform); Vector3 normal = xform.basis.get_axis(Vector3::AXIS_Y).normalized(); @@ -6564,13 +6674,11 @@ void RendererSceneRenderRD::_setup_decals(const PagedArray<RID> &p_decals, const dd.upper_fade = storage->decal_get_upper_fade(decal); dd.lower_fade = storage->decal_get_lower_fade(decal); - cluster.builder.add_decal(xform, decal_extents); - - idx++; + current_cluster_builder->add_box(ClusterBuilderRD::BOX_TYPE_DECAL, xform, decal_extents); } - if (idx > 0) { - RD::get_singleton()->buffer_update(cluster.decal_buffer, 0, sizeof(Cluster::DecalData) * idx, cluster.decals, true); + if (cluster.decal_count > 0) { + RD::get_singleton()->buffer_update(cluster.decal_buffer, 0, sizeof(Cluster::DecalData) * cluster.decal_count, cluster.decals, true); } } @@ -6753,8 +6861,10 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e cluster.lights_shadow_rect_cache_count = 0; - for (int i = 0; i < p_positional_light_count; i++) { - if (cluster.lights[i].shadow_enabled != 0) { + for (uint32_t i = 0; i < cluster.omni_light_count + cluster.spot_light_count; i++) { + Cluster::LightData &ld = i < cluster.omni_light_count ? cluster.omni_lights[i] : cluster.spot_lights[i - cluster.omni_light_count]; + + if (ld.shadow_enabled != 0) { RID li = cluster.lights_instances[i]; ERR_CONTINUE(!shadow_atlas->shadow_owners.has(li)); @@ -6792,7 +6902,7 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e cluster.lights_shadow_rect_cache_count++; - if (cluster.lights_shadow_rect_cache_count == cluster.max_lights) { + if (cluster.lights_shadow_rect_cache_count == cluster.max_lights * 2) { break; //light limit reached } } @@ -6889,23 +6999,22 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.binding = 3; - u.ids.push_back(get_positional_light_buffer()); + u.ids.push_back(get_omni_light_buffer()); uniforms.push_back(u); } - { RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.binding = 4; - u.ids.push_back(get_directional_light_buffer()); + u.ids.push_back(get_spot_light_buffer()); uniforms.push_back(u); } { RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; u.binding = 5; - u.ids.push_back(get_cluster_builder_texture()); + u.ids.push_back(get_directional_light_buffer()); uniforms.push_back(u); } @@ -6913,7 +7022,7 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.binding = 6; - u.ids.push_back(get_cluster_builder_indices_buffer()); + u.ids.push_back(rb->cluster_builder->get_cluster_buffer()); uniforms.push_back(u); } @@ -6973,6 +7082,13 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); uniforms.push_back(u); } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 14; + u.ids.push_back(volumetric_fog.params_ubo); + uniforms.push_back(u); + } rb->volumetric_fog->uniform_set = RD::get_singleton()->uniform_set_create(uniforms, volumetric_fog.shader.version_get_shader(volumetric_fog.shader_version, 0), 0); @@ -7018,7 +7134,7 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e rb->volumetric_fog->length = env->volumetric_fog_length; rb->volumetric_fog->spread = env->volumetric_fog_detail_spread; - VolumetricFogShader::PushConstant push_constant; + VolumetricFogShader::ParamsUBO params; Vector2 frustum_near_size = p_cam_projection.get_viewport_half_extents(); Vector2 frustum_far_size = p_cam_projection.get_far_plane_half_extents(); @@ -7034,51 +7150,71 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e fog_near_size = Vector2(); } - push_constant.fog_frustum_size_begin[0] = fog_near_size.x; - push_constant.fog_frustum_size_begin[1] = fog_near_size.y; + params.fog_frustum_size_begin[0] = fog_near_size.x; + params.fog_frustum_size_begin[1] = fog_near_size.y; - push_constant.fog_frustum_size_end[0] = fog_far_size.x; - push_constant.fog_frustum_size_end[1] = fog_far_size.y; + params.fog_frustum_size_end[0] = fog_far_size.x; + params.fog_frustum_size_end[1] = fog_far_size.y; - push_constant.z_near = z_near; - push_constant.z_far = z_far; + params.z_near = z_near; + params.z_far = z_far; - push_constant.fog_frustum_end = fog_end; + params.fog_frustum_end = fog_end; - push_constant.fog_volume_size[0] = rb->volumetric_fog->width; - push_constant.fog_volume_size[1] = rb->volumetric_fog->height; - push_constant.fog_volume_size[2] = rb->volumetric_fog->depth; + params.fog_volume_size[0] = rb->volumetric_fog->width; + params.fog_volume_size[1] = rb->volumetric_fog->height; + params.fog_volume_size[2] = rb->volumetric_fog->depth; - push_constant.directional_light_count = p_directional_light_count; + params.directional_light_count = p_directional_light_count; Color light = env->volumetric_fog_light.to_linear(); - push_constant.light_energy[0] = light.r * env->volumetric_fog_light_energy; - push_constant.light_energy[1] = light.g * env->volumetric_fog_light_energy; - push_constant.light_energy[2] = light.b * env->volumetric_fog_light_energy; - push_constant.base_density = env->volumetric_fog_density; + params.light_energy[0] = light.r * env->volumetric_fog_light_energy; + params.light_energy[1] = light.g * env->volumetric_fog_light_energy; + params.light_energy[2] = light.b * env->volumetric_fog_light_energy; + params.base_density = env->volumetric_fog_density; + + params.detail_spread = env->volumetric_fog_detail_spread; + params.gi_inject = env->volumetric_fog_gi_inject; + + params.cam_rotation[0] = p_cam_transform.basis[0][0]; + params.cam_rotation[1] = p_cam_transform.basis[1][0]; + params.cam_rotation[2] = p_cam_transform.basis[2][0]; + params.cam_rotation[3] = 0; + params.cam_rotation[4] = p_cam_transform.basis[0][1]; + params.cam_rotation[5] = p_cam_transform.basis[1][1]; + params.cam_rotation[6] = p_cam_transform.basis[2][1]; + params.cam_rotation[7] = 0; + params.cam_rotation[8] = p_cam_transform.basis[0][2]; + params.cam_rotation[9] = p_cam_transform.basis[1][2]; + params.cam_rotation[10] = p_cam_transform.basis[2][2]; + params.cam_rotation[11] = 0; + params.filter_axis = 0; + params.max_gi_probes = env->volumetric_fog_gi_inject > 0.001 ? p_gi_probe_count : 0; - push_constant.detail_spread = env->volumetric_fog_detail_spread; - push_constant.gi_inject = env->volumetric_fog_gi_inject; + { + uint32_t cluster_size = rb->cluster_builder->get_cluster_size(); + params.cluster_shift = get_shift_from_power_of_2(cluster_size); - push_constant.cam_rotation[0] = p_cam_transform.basis[0][0]; - push_constant.cam_rotation[1] = p_cam_transform.basis[1][0]; - push_constant.cam_rotation[2] = p_cam_transform.basis[2][0]; - push_constant.cam_rotation[3] = 0; - push_constant.cam_rotation[4] = p_cam_transform.basis[0][1]; - push_constant.cam_rotation[5] = p_cam_transform.basis[1][1]; - push_constant.cam_rotation[6] = p_cam_transform.basis[2][1]; - push_constant.cam_rotation[7] = 0; - push_constant.cam_rotation[8] = p_cam_transform.basis[0][2]; - push_constant.cam_rotation[9] = p_cam_transform.basis[1][2]; - push_constant.cam_rotation[10] = p_cam_transform.basis[2][2]; - push_constant.cam_rotation[11] = 0; - push_constant.filter_axis = 0; - push_constant.max_gi_probes = env->volumetric_fog_gi_inject > 0.001 ? p_gi_probe_count : 0; + uint32_t cluster_screen_width = (rb->width - 1) / cluster_size + 1; + uint32_t cluster_screen_height = (rb->height - 1) / cluster_size + 1; + params.cluster_type_size = cluster_screen_width * cluster_screen_height * (32 + 32); + params.cluster_width = cluster_screen_width; + params.max_cluster_element_count_div_32 = max_cluster_elements / 32; + + params.screen_size[0] = rb->width; + params.screen_size[1] = rb->height; + } /* Vector2 dssize = directional_shadow_get_size(); push_constant.directional_shadow_pixel_size[0] = 1.0 / dssize.x; push_constant.directional_shadow_pixel_size[1] = 1.0 / dssize.y; */ + + RENDER_TIMESTAMP(">Volumetric Fog"); + + RENDER_TIMESTAMP("Render Fog"); + RD::get_singleton()->buffer_update(volumetric_fog.params_ubo, 0, sizeof(VolumetricFogShader::ParamsUBO), ¶ms, true); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); bool use_filter = volumetric_fog_filter_active; @@ -7086,38 +7222,48 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[using_sdfgi ? VOLUMETRIC_FOG_SHADER_DENSITY_WITH_SDFGI : VOLUMETRIC_FOG_SHADER_DENSITY]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set, 0); + if (using_sdfgi) { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->sdfgi_uniform_set, 1); } - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VolumetricFogShader::PushConstant)); RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth, 4, 4, 4); RD::get_singleton()->compute_list_add_barrier(compute_list); if (use_filter) { + RENDER_TIMESTAMP("Filter Fog"); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[VOLUMETRIC_FOG_SHADER_FILTER]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VolumetricFogShader::PushConstant)); RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth, 8, 8, 1); - RD::get_singleton()->compute_list_add_barrier(compute_list); + RD::get_singleton()->compute_list_end(); + //need restart for buffer update - push_constant.filter_axis = 1; + params.filter_axis = 1; + RD::get_singleton()->buffer_update(volumetric_fog.params_ubo, 0, sizeof(VolumetricFogShader::ParamsUBO), ¶ms, true); + compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[VOLUMETRIC_FOG_SHADER_FILTER]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set2, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VolumetricFogShader::PushConstant)); + if (using_sdfgi) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->sdfgi_uniform_set, 1); + } RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth, 8, 8, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); } + RENDER_TIMESTAMP("Integrate Fog"); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[VOLUMETRIC_FOG_SHADER_FOG]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VolumetricFogShader::PushConstant)); RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, 1, 8, 8, 1); RD::get_singleton()->compute_list_end(); + + RENDER_TIMESTAMP("<Volumetric Fog"); } void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold) { @@ -7150,7 +7296,24 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform & gi_probes = ∅ } - cluster.builder.begin(p_cam_transform.affine_inverse(), p_cam_projection); //prepare cluster + if (render_buffers_owner.owns(p_render_buffers)) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + current_cluster_builder = rb->cluster_builder; + } else if (reflection_probe_instance_owner.owns(p_reflection_probe)) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_reflection_probe); + ReflectionAtlas *ra = reflection_atlas_owner.getornull(rpi->atlas); + if (!ra) { + ERR_PRINT("reflection probe has no reflection atlas! Bug?"); + current_cluster_builder = nullptr; + } else { + current_cluster_builder = ra->cluster_builder; + } + } else { + ERR_PRINT("No cluster builder, bug"); //should never happen, will crash + current_cluster_builder = nullptr; + } + + current_cluster_builder->begin(p_cam_transform, p_cam_projection, !p_reflection_probe.is_valid()); bool using_shadows = true; @@ -7165,12 +7328,15 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform & uint32_t directional_light_count = 0; uint32_t positional_light_count = 0; - _setup_lights(*lights, p_cam_transform.affine_inverse(), p_shadow_atlas, using_shadows, directional_light_count, positional_light_count); + _setup_lights(*lights, p_cam_transform, p_shadow_atlas, using_shadows, directional_light_count, positional_light_count); _setup_decals(p_decals, p_cam_transform.affine_inverse()); - cluster.builder.bake_cluster(); //bake to cluster + + current_cluster_builder->bake_cluster(); uint32_t gi_probe_count = 0; - _setup_giprobes(p_render_buffers, p_cam_transform, *gi_probes, gi_probe_count); + if (p_render_buffers.is_valid()) { + _setup_giprobes(p_render_buffers, p_cam_transform, *gi_probes, gi_probe_count); + } if (p_render_buffers.is_valid()) { bool directional_shadows = false; @@ -7183,9 +7349,30 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform & _update_volumetric_fog(p_render_buffers, p_environment, p_cam_projection, p_cam_transform, p_shadow_atlas, directional_light_count, directional_shadows, positional_light_count, gi_probe_count); } - _render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_ortogonal, p_instances, directional_light_count, *gi_probes, p_lightmaps, p_environment, p_camera_effects, p_shadow_atlas, p_reflection_atlas, p_reflection_probe, p_reflection_probe_pass, clear_color, p_screen_lod_threshold); + _render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_ortogonal, p_instances, directional_light_count, *gi_probes, p_lightmaps, p_environment, current_cluster_builder->get_cluster_buffer(), current_cluster_builder->get_cluster_size(), current_cluster_builder->get_max_cluster_elements(), p_camera_effects, p_shadow_atlas, p_reflection_atlas, p_reflection_probe, p_reflection_probe_pass, clear_color, p_screen_lod_threshold); if (p_render_buffers.is_valid()) { + if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_OMNI_LIGHTS || debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_SPOT_LIGHTS || debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_DECALS || debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_REFLECTION_PROBES) { + ClusterBuilderRD::ElementType elem_type = ClusterBuilderRD::ELEMENT_TYPE_MAX; + switch (debug_draw) { + case RS::VIEWPORT_DEBUG_DRAW_CLUSTER_OMNI_LIGHTS: + elem_type = ClusterBuilderRD::ELEMENT_TYPE_OMNI_LIGHT; + break; + case RS::VIEWPORT_DEBUG_DRAW_CLUSTER_SPOT_LIGHTS: + elem_type = ClusterBuilderRD::ELEMENT_TYPE_SPOT_LIGHT; + break; + case RS::VIEWPORT_DEBUG_DRAW_CLUSTER_DECALS: + elem_type = ClusterBuilderRD::ELEMENT_TYPE_DECAL; + break; + case RS::VIEWPORT_DEBUG_DRAW_CLUSTER_REFLECTION_PROBES: + elem_type = ClusterBuilderRD::ELEMENT_TYPE_REFLECTION_PROBE; + break; + default: { + } + } + current_cluster_builder->debug(elem_type); + } + RENDER_TIMESTAMP("Tonemap"); _render_buffers_post_process_and_tonemap(p_render_buffers, p_environment, p_camera_effects, p_cam_projection); @@ -7846,6 +8033,9 @@ bool RendererSceneRenderRD::free(RID p_rid) { if (rb->volumetric_fog) { _volumetric_fog_erase(rb); } + if (rb->cluster_builder) { + memdelete(rb->cluster_builder); + } render_buffers_owner.free(p_rid); } else if (environment_owner.owns(p_rid)) { //not much to delete, just free it @@ -7855,6 +8045,10 @@ bool RendererSceneRenderRD::free(RID p_rid) { camera_effects_owner.free(p_rid); } else if (reflection_atlas_owner.owns(p_rid)) { reflection_atlas_set_size(p_rid, 0, 0); + ReflectionAtlas *ra = reflection_atlas_owner.getornull(p_rid); + if (ra->cluster_builder) { + memdelete(ra->cluster_builder); + } reflection_atlas_owner.free(p_rid); } else if (reflection_probe_instance_owner.owns(p_rid)) { //not much to delete, just free it @@ -8073,20 +8267,17 @@ void RendererSceneRenderRD::sdfgi_set_debug_probe_select(const Vector3 &p_positi RendererSceneRenderRD *RendererSceneRenderRD::singleton = nullptr; -RID RendererSceneRenderRD::get_cluster_builder_texture() { - return cluster.builder.get_cluster_texture(); -} - -RID RendererSceneRenderRD::get_cluster_builder_indices_buffer() { - return cluster.builder.get_cluster_indices_buffer(); -} - RID RendererSceneRenderRD::get_reflection_probe_buffer() { return cluster.reflection_buffer; } -RID RendererSceneRenderRD::get_positional_light_buffer() { - return cluster.light_buffer; +RID RendererSceneRenderRD::get_omni_light_buffer() { + return cluster.omni_light_buffer; +} + +RID RendererSceneRenderRD::get_spot_light_buffer() { + return cluster.spot_light_buffer; } + RID RendererSceneRenderRD::get_directional_light_buffer() { return cluster.directional_light_buffer; } @@ -8102,6 +8293,8 @@ bool RendererSceneRenderRD::is_low_end() const { } RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { + max_cluster_elements = GLOBAL_GET("rendering/cluster_builder/max_clustered_elements"); + storage = p_storage; singleton = this; @@ -8436,11 +8629,15 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { sdfgi_shader.integrate_default_sky_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.integrate.version_get_shader(sdfgi_shader.integrate_shader, 0), 1); } } + //GK { //calculate tables String defines = "\n#define SDFGI_OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; Vector<String> gi_modes; - gi_modes.push_back(""); + gi_modes.push_back("\n#define USE_GIPROBE\n"); + gi_modes.push_back("\n#define USE_SDFGI\n"); + gi_modes.push_back("\n#define USE_SDFGI\n\n#define USE_GIPROBE\n"); + gi.shader.initialize(gi_modes, defines); gi.shader_version = gi.shader.version_create(); for (int i = 0; i < GI::MODE_MAX; i++) { @@ -8484,30 +8681,29 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { default_giprobe_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(GI::GIProbeData) * RenderBuffers::MAX_GIPROBES); } - //cluster setup - uint32_t uniform_max_size = RD::get_singleton()->limit_get(RD::LIMIT_MAX_UNIFORM_BUFFER_SIZE); - { //reflections - uint32_t reflection_buffer_size; - if (uniform_max_size < 65536) { - reflection_buffer_size = uniform_max_size; - } else { - reflection_buffer_size = 65536; - } - cluster.max_reflections = reflection_buffer_size / sizeof(Cluster::ReflectionData); + cluster.max_reflections = max_cluster_elements; cluster.reflections = memnew_arr(Cluster::ReflectionData, cluster.max_reflections); - cluster.reflection_buffer = RD::get_singleton()->storage_buffer_create(reflection_buffer_size); + cluster.reflection_sort = memnew_arr(Cluster::InstanceSort<ReflectionProbeInstance>, cluster.max_decals); + cluster.reflection_buffer = RD::get_singleton()->storage_buffer_create(sizeof(Cluster::ReflectionData) * cluster.max_reflections); } { //lights - cluster.max_lights = MIN(1024 * 1024, uniform_max_size) / sizeof(Cluster::LightData); //1mb of lights + cluster.max_lights = max_cluster_elements; + uint32_t light_buffer_size = cluster.max_lights * sizeof(Cluster::LightData); - cluster.lights = memnew_arr(Cluster::LightData, cluster.max_lights); - cluster.light_buffer = RD::get_singleton()->storage_buffer_create(light_buffer_size); + cluster.omni_lights = memnew_arr(Cluster::LightData, cluster.max_lights); + cluster.omni_light_buffer = RD::get_singleton()->storage_buffer_create(light_buffer_size); + cluster.omni_light_sort = memnew_arr(Cluster::InstanceSort<LightInstance>, cluster.max_lights); + cluster.spot_lights = memnew_arr(Cluster::LightData, cluster.max_lights); + cluster.spot_light_buffer = RD::get_singleton()->storage_buffer_create(light_buffer_size); + cluster.spot_light_sort = memnew_arr(Cluster::InstanceSort<LightInstance>, cluster.max_lights); //defines += "\n#define MAX_LIGHT_DATA_STRUCTS " + itos(cluster.max_lights) + "\n"; - cluster.lights_instances = memnew_arr(RID, cluster.max_lights); - cluster.lights_shadow_rect_cache = memnew_arr(Rect2i, cluster.max_lights); + + //used for volumetric fog shrinking + cluster.lights_instances = memnew_arr(RID, cluster.max_lights * 2); + cluster.lights_shadow_rect_cache = memnew_arr(Rect2i, cluster.max_lights * 2); cluster.max_directional_lights = MAX_DIRECTIONAL_LIGHTS; uint32_t directional_light_buffer_size = cluster.max_directional_lights * sizeof(Cluster::DirectionalLightData); @@ -8516,14 +8712,13 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { } { //decals - cluster.max_decals = MIN(1024 * 1024, uniform_max_size) / sizeof(Cluster::DecalData); //1mb of decals + cluster.max_decals = max_cluster_elements; uint32_t decal_buffer_size = cluster.max_decals * sizeof(Cluster::DecalData); cluster.decals = memnew_arr(Cluster::DecalData, cluster.max_decals); + cluster.decal_sort = memnew_arr(Cluster::InstanceSort<DecalInstance>, cluster.max_decals); cluster.decal_buffer = RD::get_singleton()->storage_buffer_create(decal_buffer_size); } - cluster.builder.setup(16, 8, 24); - if (!low_end) { String defines = "\n#define MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS " + itos(cluster.max_directional_lights) + "\n"; Vector<String> volumetric_fog_modes; @@ -8536,6 +8731,7 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { for (int i = 0; i < VOLUMETRIC_FOG_SHADER_MAX; i++) { volumetric_fog.pipelines[i] = RD::get_singleton()->compute_pipeline_create(volumetric_fog.shader.version_get_shader(volumetric_fog.shader_version, i)); } + volumetric_fog.params_ubo = RD::get_singleton()->uniform_buffer_create(sizeof(VolumetricFogShader::ParamsUBO)); } { @@ -8601,6 +8797,7 @@ RendererSceneRenderRD::~RendererSceneRenderRD() { sdfgi_shader.preprocess.version_free(sdfgi_shader.preprocess_shader); volumetric_fog.shader.version_free(volumetric_fog.shader_version); + RD::get_singleton()->free(volumetric_fog.params_ubo); memdelete_arr(gi_probe_lights); } @@ -8622,15 +8819,21 @@ RendererSceneRenderRD::~RendererSceneRenderRD() { { RD::get_singleton()->free(cluster.directional_light_buffer); - RD::get_singleton()->free(cluster.light_buffer); + RD::get_singleton()->free(cluster.omni_light_buffer); + RD::get_singleton()->free(cluster.spot_light_buffer); RD::get_singleton()->free(cluster.reflection_buffer); RD::get_singleton()->free(cluster.decal_buffer); memdelete_arr(cluster.directional_lights); - memdelete_arr(cluster.lights); + memdelete_arr(cluster.omni_lights); + memdelete_arr(cluster.spot_lights); + memdelete_arr(cluster.omni_light_sort); + memdelete_arr(cluster.spot_light_sort); memdelete_arr(cluster.lights_shadow_rect_cache); memdelete_arr(cluster.lights_instances); memdelete_arr(cluster.reflections); + memdelete_arr(cluster.reflection_sort); memdelete_arr(cluster.decals); + memdelete_arr(cluster.decal_sort); } RD::get_singleton()->free(shadow_sampler); diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.h b/servers/rendering/renderer_rd/renderer_scene_render_rd.h index 264434a301..3e69335225 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.h +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.h @@ -34,7 +34,7 @@ #include "core/templates/local_vector.h" #include "core/templates/rid_owner.h" #include "servers/rendering/renderer_compositor.h" -#include "servers/rendering/renderer_rd/light_cluster_builder.h" +#include "servers/rendering/renderer_rd/cluster_builder_rd.h" #include "servers/rendering/renderer_rd/renderer_storage_rd.h" #include "servers/rendering/renderer_rd/shaders/gi.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/giprobe.glsl.gen.h" @@ -104,12 +104,12 @@ protected: }; virtual RenderBufferData *_create_render_buffer_data() = 0; - void _setup_lights(const PagedArray<RID> &p_lights, const Transform &p_camera_inverse_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count); + void _setup_lights(const PagedArray<RID> &p_lights, const Transform &p_camera_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count); void _setup_decals(const PagedArray<RID> &p_decals, const Transform &p_camera_inverse_xform); void _setup_reflections(const PagedArray<RID> &p_reflections, const Transform &p_camera_inverse_transform, RID p_environment); void _setup_giprobes(RID p_render_buffers, const Transform &p_transform, const PagedArray<RID> &p_gi_probes, uint32_t &r_gi_probes_used); - virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_color, float p_screen_lod_threshold) = 0; + virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_cluster_max_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_color, float p_screen_lod_threshold) = 0; virtual void _render_shadow(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0) = 0; virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; virtual void _render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; @@ -341,6 +341,8 @@ private: }; Vector<Reflection> reflections; + + ClusterBuilderRD *cluster_builder = nullptr; }; mutable RID_Owner<ReflectionAtlas> reflection_atlas_owner; @@ -833,6 +835,9 @@ private: /* RENDER BUFFERS */ + ClusterBuilderSharedDataRD cluster_builder_shared; + ClusterBuilderRD *current_cluster_builder = nullptr; + struct SDFGI; struct VolumetricFog; @@ -858,6 +863,8 @@ private: SDFGI *sdfgi = nullptr; VolumetricFog *volumetric_fog = nullptr; + ClusterBuilderRD *cluster_builder = nullptr; + //built-in textures used for ping pong image processing and blurring struct Blur { RID texture; @@ -1259,7 +1266,7 @@ private: uint32_t max_giprobes; uint32_t high_quality_vct; - uint32_t use_sdfgi; + uint32_t pad2; uint32_t orthogonal; float ao_color[3]; @@ -1269,8 +1276,11 @@ private: }; RID sdfgi_ubo; - enum { - MODE_MAX = 1 + enum Mode { + MODE_GIPROBE, + MODE_SDFGI, + MODE_COMBINED, + MODE_MAX }; GiShaderRD shader; @@ -1394,18 +1404,39 @@ private: float normal_fade; }; + template <class T> + struct InstanceSort { + float depth; + T *instance; + bool operator<(const InstanceSort &p_sort) const { + return depth < p_sort.depth; + } + }; + ReflectionData *reflections; + InstanceSort<ReflectionProbeInstance> *reflection_sort; uint32_t max_reflections; RID reflection_buffer; uint32_t max_reflection_probes_per_instance; + uint32_t reflection_count = 0; DecalData *decals; + InstanceSort<DecalInstance> *decal_sort; uint32_t max_decals; RID decal_buffer; + uint32_t decal_count; - LightData *lights; + LightData *omni_lights; + LightData *spot_lights; + + InstanceSort<LightInstance> *omni_light_sort; + InstanceSort<LightInstance> *spot_light_sort; uint32_t max_lights; - RID light_buffer; + RID omni_light_buffer; + RID spot_light_buffer; + uint32_t omni_light_count = 0; + uint32_t spot_light_count = 0; + RID *lights_instances; Rect2i *lights_shadow_rect_cache; uint32_t lights_shadow_rect_cache_count = 0; @@ -1414,8 +1445,6 @@ private: uint32_t max_directional_lights; RID directional_light_buffer; - LightClusterBuilder builder; - } cluster; struct VolumetricFog { @@ -1445,7 +1474,7 @@ private: }; struct VolumetricFogShader { - struct PushConstant { + struct ParamsUBO { float fog_frustum_size_begin[2]; float fog_frustum_size_end[2]; @@ -1463,13 +1492,21 @@ private: float detail_spread; float gi_inject; uint32_t max_gi_probes; - uint32_t pad; + uint32_t cluster_type_size; + + float screen_size[2]; + uint32_t cluster_shift; + uint32_t cluster_width; + + uint32_t cluster_pad[3]; + uint32_t max_cluster_element_count_div_32; float cam_rotation[12]; }; VolumetricFogShaderRD shader; + RID params_ubo; RID shader_version; RID pipelines[VOLUMETRIC_FOG_SHADER_MAX]; @@ -1494,6 +1531,7 @@ private: float weight; }; + uint32_t max_cluster_elements = 512; bool low_end = false; public: @@ -2002,10 +2040,9 @@ public: virtual void set_time(double p_time, double p_step); - RID get_cluster_builder_texture(); - RID get_cluster_builder_indices_buffer(); RID get_reflection_probe_buffer(); - RID get_positional_light_buffer(); + RID get_omni_light_buffer(); + RID get_spot_light_buffer(); RID get_directional_light_buffer(); RID get_decal_buffer(); int get_max_directional_lights() const; diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.cpp b/servers/rendering/renderer_rd/renderer_storage_rd.cpp index b74a1083e7..6203f3ba64 100644 --- a/servers/rendering/renderer_rd/renderer_storage_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_storage_rd.cpp @@ -7340,6 +7340,7 @@ void RendererStorageRD::_update_decal_atlas() { tformat.shareable_formats.push_back(RD::DATA_FORMAT_R8G8B8A8_SRGB); decal_atlas.texture = RD::get_singleton()->texture_create(tformat, RD::TextureView()); + RD::get_singleton()->texture_clear(decal_atlas.texture, Color(0, 0, 0, 0), 0, decal_atlas.mipmaps, 0, 1, true); { //create the framebuffer diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.h b/servers/rendering/renderer_rd/renderer_storage_rd.h index 5ef73f0db8..2fb66ac573 100644 --- a/servers/rendering/renderer_rd/renderer_storage_rd.h +++ b/servers/rendering/renderer_rd/renderer_storage_rd.h @@ -95,6 +95,21 @@ public: p_array[11] = 0; } + static _FORCE_INLINE_ void store_transform_transposed_3x4(const Transform &p_mtx, float *p_array) { + p_array[0] = p_mtx.basis.elements[0][0]; + p_array[1] = p_mtx.basis.elements[0][1]; + p_array[2] = p_mtx.basis.elements[0][2]; + p_array[3] = p_mtx.origin.x; + p_array[4] = p_mtx.basis.elements[1][0]; + p_array[5] = p_mtx.basis.elements[1][1]; + p_array[6] = p_mtx.basis.elements[1][2]; + p_array[7] = p_mtx.origin.y; + p_array[8] = p_mtx.basis.elements[2][0]; + p_array[9] = p_mtx.basis.elements[2][1]; + p_array[10] = p_mtx.basis.elements[2][2]; + p_array[11] = p_mtx.origin.z; + } + static _FORCE_INLINE_ void store_camera(const CameraMatrix &p_mtx, float *p_array) { for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j++) { diff --git a/servers/rendering/renderer_rd/shaders/SCsub b/servers/rendering/renderer_rd/shaders/SCsub index deaa9668df..1b0197c1c1 100644 --- a/servers/rendering/renderer_rd/shaders/SCsub +++ b/servers/rendering/renderer_rd/shaders/SCsub @@ -44,3 +44,6 @@ if "RD_GLSL" in env["BUILDERS"]: env.RD_GLSL("particles_copy.glsl") env.RD_GLSL("sort.glsl") env.RD_GLSL("skeleton.glsl") + env.RD_GLSL("cluster_render.glsl") + env.RD_GLSL("cluster_store.glsl") + env.RD_GLSL("cluster_debug.glsl") diff --git a/servers/rendering/renderer_rd/shaders/cluster_debug.glsl b/servers/rendering/renderer_rd/shaders/cluster_debug.glsl new file mode 100644 index 0000000000..70a875192c --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/cluster_debug.glsl @@ -0,0 +1,115 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +const vec3 usage_gradient[33] = vec3[]( // 1 (none) + 32 + vec3(0.14, 0.17, 0.23), + vec3(0.24, 0.44, 0.83), + vec3(0.23, 0.57, 0.84), + vec3(0.22, 0.71, 0.84), + vec3(0.22, 0.85, 0.83), + vec3(0.21, 0.85, 0.72), + vec3(0.21, 0.85, 0.57), + vec3(0.20, 0.85, 0.42), + vec3(0.20, 0.85, 0.27), + vec3(0.27, 0.86, 0.19), + vec3(0.51, 0.85, 0.19), + vec3(0.57, 0.86, 0.19), + vec3(0.62, 0.85, 0.19), + vec3(0.67, 0.86, 0.20), + vec3(0.73, 0.85, 0.20), + vec3(0.78, 0.85, 0.20), + vec3(0.83, 0.85, 0.20), + vec3(0.85, 0.82, 0.20), + vec3(0.85, 0.76, 0.20), + vec3(0.85, 0.81, 0.20), + vec3(0.85, 0.65, 0.20), + vec3(0.84, 0.60, 0.21), + vec3(0.84, 0.56, 0.21), + vec3(0.84, 0.51, 0.21), + vec3(0.84, 0.46, 0.21), + vec3(0.84, 0.41, 0.21), + vec3(0.84, 0.36, 0.21), + vec3(0.84, 0.31, 0.21), + vec3(0.84, 0.27, 0.21), + vec3(0.83, 0.22, 0.22), + vec3(0.83, 0.22, 0.27), + vec3(0.83, 0.22, 0.32), + vec3(1.00, 0.63, 0.70)); +layout(push_constant, binding = 0, std430) uniform Params { + uvec2 screen_size; + uvec2 cluster_screen_size; + + uint cluster_shift; + uint cluster_type; + float z_near; + float z_far; + + bool orthogonal; + uint max_cluster_element_count_div_32; + uint pad1; + uint pad2; +} +params; + +layout(set = 0, binding = 1, std430) buffer restrict readonly ClusterData { + uint data[]; +} +cluster_data; + +layout(rgba16f, set = 0, binding = 2) uniform restrict writeonly image2D screen_buffer; +layout(set = 0, binding = 3) uniform texture2D depth_buffer; +layout(set = 0, binding = 4) uniform sampler depth_buffer_sampler; + +void main() { + uvec2 screen_pos = gl_GlobalInvocationID.xy; + if (any(greaterThanEqual(screen_pos, params.screen_size))) { + return; + } + + uvec2 cluster_pos = screen_pos >> params.cluster_shift; + + uint offset = cluster_pos.y * params.cluster_screen_size.x + cluster_pos.x; + offset += params.cluster_screen_size.x * params.cluster_screen_size.y * params.cluster_type; + offset *= (params.max_cluster_element_count_div_32 + 32); + + //depth buffers generally can't be accessed via image API + float depth = texelFetch(sampler2D(depth_buffer, depth_buffer_sampler), ivec2(screen_pos), 0).r * 2.0 - 1.0; + + if (params.orthogonal) { + depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; + } else { + depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near)); + } + depth /= params.z_far; + + uint slice = uint(clamp(floor(depth * 32.0), 0.0, 31.0)); + uint slice_minmax = cluster_data.data[offset + params.max_cluster_element_count_div_32 + slice]; + uint item_min = slice_minmax & 0xFFFF; + uint item_max = slice_minmax >> 16; + + uint item_count = 0; + for (uint i = 0; i < params.max_cluster_element_count_div_32; i++) { + uint slice_bits = cluster_data.data[offset + i]; + while (slice_bits != 0) { + uint bit = findLSB(slice_bits); + uint item = i * 32 + bit; + if ((item >= item_min && item < item_max)) { + item_count++; + } + slice_bits &= ~(1 << bit); + } + } + + item_count = min(item_count, 32); + + vec3 color = usage_gradient[item_count]; + + color = mix(color * 1.2, color * 0.3, float(slice) / 31.0); + + imageStore(screen_buffer, ivec2(screen_pos), vec4(color, 1.0)); +} diff --git a/servers/rendering/renderer_rd/shaders/cluster_render.glsl b/servers/rendering/renderer_rd/shaders/cluster_render.glsl new file mode 100644 index 0000000000..8723ea78e4 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/cluster_render.glsl @@ -0,0 +1,168 @@ +#[vertex] + +#version 450 + +VERSION_DEFINES + +layout(location = 0) in vec3 vertex_attrib; + +layout(location = 0) out float depth_interp; +layout(location = 1) out flat uint element_index; + +layout(push_constant, binding = 0, std430) uniform Params { + uint base_index; + uint pad0; + uint pad1; + uint pad2; +} +params; + +layout(set = 0, binding = 1, std140) uniform State { + mat4 projection; + + float inv_z_far; + uint screen_to_clusters_shift; // shift to obtain coordinates in block indices + uint cluster_screen_width; // + uint cluster_data_size; // how much data for a single cluster takes + + uint cluster_depth_offset; + uint pad0; + uint pad1; + uint pad2; +} +state; + +struct RenderElement { + uint type; //0-4 + bool touches_near; + bool touches_far; + uint original_index; + mat3x4 transform_inv; + vec3 scale; + uint pad; +}; + +layout(set = 0, binding = 2, std430) buffer restrict readonly RenderElements { + RenderElement data[]; +} +render_elements; + +void main() { + element_index = params.base_index + gl_InstanceIndex; + + vec3 vertex = vertex_attrib; + vertex *= render_elements.data[element_index].scale; + + vertex = vec4(vertex, 1.0) * render_elements.data[element_index].transform_inv; + depth_interp = -vertex.z; + + gl_Position = state.projection * vec4(vertex, 1.0); +} + +#[fragment] + +#version 450 + +VERSION_DEFINES + +#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic) && defined(GL_KHR_shader_subgroup_vote) + +#extension GL_KHR_shader_subgroup_ballot : enable +#extension GL_KHR_shader_subgroup_arithmetic : enable +#extension GL_KHR_shader_subgroup_vote : enable + +#define USE_SUBGROUPS +#endif + +layout(location = 0) in float depth_interp; +layout(location = 1) in flat uint element_index; + +layout(set = 0, binding = 1, std140) uniform State { + mat4 projection; + float inv_z_far; + uint screen_to_clusters_shift; // shift to obtain coordinates in block indices + uint cluster_screen_width; // + uint cluster_data_size; // how much data for a single cluster takes + uint cluster_depth_offset; + uint pad0; + uint pad1; + uint pad2; +} +state; + +//cluster data is layout linearly, each cell contains the follow information: +// - list of bits for every element to mark as used, so (max_elem_count/32)*4 uints +// - a uint for each element to mark the depth bits used when rendering (0-31) + +layout(set = 0, binding = 3, std430) buffer restrict ClusterRender { + uint data[]; +} +cluster_render; + +void main() { + //convert from screen to cluster + uvec2 cluster = uvec2(gl_FragCoord.xy) >> state.screen_to_clusters_shift; + + //get linear cluster offset from screen poss + uint cluster_offset = cluster.x + state.cluster_screen_width * cluster.y; + //multiply by data size to position at the beginning of the element list for this cluster + cluster_offset *= state.cluster_data_size; + + //find the current element in the list and plot the bit to mark it as used + uint usage_write_offset = cluster_offset + (element_index >> 5); + uint usage_write_bit = 1 << (element_index & 0x1F); + +#ifdef USE_SUBGROUPS + + uint cluster_thread_group_index; + + if (!gl_HelperInvocation) { + //http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf + + uvec4 mask; + + while (true) { + // find the cluster offset of the first active thread + // threads that did break; go inactive and no longer count + uint first = subgroupBroadcastFirst(cluster_offset); + // update the mask for thread that match this cluster + mask = subgroupBallot(first == cluster_offset); + if (first == cluster_offset) { + // This thread belongs to the group of threads that match this offset, + // so exit the loop. + break; + } + } + + cluster_thread_group_index = subgroupBallotExclusiveBitCount(mask); + + if (cluster_thread_group_index == 0) { + atomicOr(cluster_render.data[usage_write_offset], usage_write_bit); + } + } +#else + if (!gl_HelperInvocation) { + atomicOr(cluster_render.data[usage_write_offset], usage_write_bit); + } +#endif + //find the current element in the depth usage list and mark the current depth as used + float unit_depth = depth_interp * state.inv_z_far; + + uint z_bit = clamp(uint(floor(unit_depth * 32.0)), 0, 31); + + uint z_write_offset = cluster_offset + state.cluster_depth_offset + element_index; + uint z_write_bit = 1 << z_bit; + +#ifdef USE_SUBGROUPS + if (!gl_HelperInvocation) { + z_write_bit = subgroupOr(z_write_bit); //merge all Zs + if (cluster_thread_group_index == 0) { + atomicOr(cluster_render.data[z_write_offset], z_write_bit); + } + } +#else + if (!gl_HelperInvocation) { + atomicOr(cluster_render.data[z_write_offset], z_write_bit); + } +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/cluster_store.glsl b/servers/rendering/renderer_rd/shaders/cluster_store.glsl new file mode 100644 index 0000000000..5be0893c4f --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/cluster_store.glsl @@ -0,0 +1,119 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(push_constant, binding = 0, std430) uniform Params { + uint cluster_render_data_size; // how much data for a single cluster takes + uint max_render_element_count_div_32; //divided by 32 + uvec2 cluster_screen_size; + uint render_element_count_div_32; //divided by 32 + + uint max_cluster_element_count_div_32; //divided by 32 + uint pad1; + uint pad2; +} +params; + +layout(set = 0, binding = 1, std430) buffer restrict readonly ClusterRender { + uint data[]; +} +cluster_render; + +layout(set = 0, binding = 2, std430) buffer restrict ClusterStore { + uint data[]; +} +cluster_store; + +struct RenderElement { + uint type; //0-4 + bool touches_near; + bool touches_far; + uint original_index; + mat3x4 transform_inv; + vec3 scale; + uint pad; +}; + +layout(set = 0, binding = 3, std430) buffer restrict readonly RenderElements { + RenderElement data[]; +} +render_elements; + +void main() { + uvec2 pos = gl_GlobalInvocationID.xy; + if (any(greaterThanEqual(pos, params.cluster_screen_size))) { + return; + } + + //counter for each type of render_element + + //base offset for this cluster + uint base_offset = (pos.x + params.cluster_screen_size.x * pos.y); + uint src_offset = base_offset * params.cluster_render_data_size; + + uint render_element_offset = 0; + + //check all render_elements and see which one was written to + while (render_element_offset < params.render_element_count_div_32) { + uint bits = cluster_render.data[src_offset + render_element_offset]; + while (bits != 0) { + //if bits exist, check the render_element + uint index_bit = findLSB(bits); + uint index = render_element_offset * 32 + index_bit; + uint type = render_elements.data[index].type; + + uint z_range_offset = src_offset + params.max_render_element_count_div_32 + index; + uint z_range = cluster_render.data[z_range_offset]; + + //if object was written, z was written, but check just in case + if (z_range != 0) { //should always be > 0 + + uint from_z = findLSB(z_range); + uint to_z = findMSB(z_range) + 1; + + if (render_elements.data[index].touches_near) { + from_z = 0; + } + + if (render_elements.data[index].touches_far) { + to_z = 32; + } + + // find cluster offset in the buffer used for indexing in the renderer + uint dst_offset = (base_offset + type * (params.cluster_screen_size.x * params.cluster_screen_size.y)) * (params.max_cluster_element_count_div_32 + 32); + + uint orig_index = render_elements.data[index].original_index; + //store this index in the Z slices by setting the relevant bit + for (uint i = from_z; i < to_z; i++) { + uint slice_ofs = dst_offset + params.max_cluster_element_count_div_32 + i; + + uint minmax = cluster_store.data[slice_ofs]; + + if (minmax == 0) { + minmax = 0xFFFF; //min 0, max 0xFFFF + } + + uint elem_min = min(orig_index, minmax & 0xFFFF); + uint elem_max = max(orig_index + 1, minmax >> 16); //always store plus one, so zero means range is empty when not written to + + minmax = elem_min | (elem_max << 16); + cluster_store.data[slice_ofs] = minmax; + } + + uint store_word = orig_index >> 5; + uint store_bit = orig_index & 0x1F; + + //store the actual render_element index at the end, so the rendering code can reference it + cluster_store.data[dst_offset + store_word] |= 1 << store_bit; + } + + bits &= ~(1 << index_bit); //clear the bit to continue iterating + } + + render_element_offset++; + } +} diff --git a/servers/rendering/renderer_rd/shaders/gi.glsl b/servers/rendering/renderer_rd/shaders/gi.glsl index 8011dadc72..c2965f9874 100644 --- a/servers/rendering/renderer_rd/shaders/gi.glsl +++ b/servers/rendering/renderer_rd/shaders/gi.glsl @@ -99,7 +99,7 @@ layout(push_constant, binding = 0, std430) uniform Params { uint max_giprobes; bool high_quality_vct; - bool use_sdfgi; + uint pad2; bool orthogonal; vec3 ao_color; @@ -331,7 +331,7 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o } ambient_light.rgb = diffuse; -#if 1 + if (roughness < 0.2) { vec3 pos_to_uvw = 1.0 / sdfgi.grid_size; vec4 light_accum = vec4(0.0); @@ -363,7 +363,6 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o //ray_pos += ray_dir * (bias / sdfgi.cascades[cascade].to_cell); //bias to avoid self occlusion ray_pos += (ray_dir * 1.0 / max(abs_ray_dir.x, max(abs_ray_dir.y, abs_ray_dir.z)) + cam_normal * 1.4) * bias / sdfgi.cascades[cascade].to_cell; } - float softness = 0.2 + min(1.0, roughness * 5.0) * 4.0; //approximation to roughness so it does not seem like a hard fade while (length(ray_pos) < max_distance) { for (uint i = 0; i < sdfgi.max_cascades; i++) { @@ -434,8 +433,6 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o } } -#endif - reflection_light.rgb = specular; ambient_light.rgb *= sdfgi.energy; @@ -621,11 +618,12 @@ void main() { vec3 reflection = normalize(reflect(normalize(vertex), normal)); - if (params.use_sdfgi) { - sdfgi_process(vertex, normal, reflection, roughness, ambient_light, reflection_light); - } +#ifdef USE_SDFGI + sdfgi_process(vertex, normal, reflection, roughness, ambient_light, reflection_light); +#endif - if (params.max_giprobes > 0) { +#ifdef USE_GIPROBES + { uvec2 giprobe_tex = texelFetch(usampler2D(giprobe_buffer, linear_sampler), pos, 0).rg; roughness *= roughness; //find arbitrary tangent and bitangent, then build a matrix @@ -656,6 +654,7 @@ void main() { ambient_light = amb_accum; } } +#endif } imageStore(ambient_buffer, pos, ambient_light); diff --git a/servers/rendering/renderer_rd/shaders/scene_forward.glsl b/servers/rendering/renderer_rd/shaders/scene_forward.glsl index 7fa5f7b0fe..c3e7e2acbf 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward.glsl @@ -541,7 +541,7 @@ vec3 F0(float metallic, float specular, vec3 albedo) { return mix(vec3(dielectric), albedo, vec3(metallic)); } -void light_compute(vec3 N, vec3 L, vec3 V, vec3 light_color, float attenuation, vec3 f0, uint orms, +void light_compute(vec3 N, vec3 L, vec3 V, vec3 light_color, float attenuation, vec3 f0, uint orms, float specular_amount, #ifdef LIGHT_BACKLIGHT_USED vec3 backlight, #endif @@ -710,7 +710,7 @@ LIGHT_SHADER_CODE blinn *= (shininess + 8.0) * (1.0 / (8.0 * M_PI)); float intensity = blinn; - specular_light += light_color * intensity * attenuation; + specular_light += light_color * intensity * attenuation * specular_amount; #elif defined(SPECULAR_PHONG) @@ -721,7 +721,7 @@ LIGHT_SHADER_CODE phong *= (shininess + 8.0) * (1.0 / (8.0 * M_PI)); float intensity = (phong) / max(4.0 * cNdotV * cNdotL, 0.75); - specular_light += light_color * intensity * attenuation; + specular_light += light_color * intensity * attenuation * specular_amount; #elif defined(SPECULAR_TOON) @@ -730,7 +730,7 @@ LIGHT_SHADER_CODE float mid = 1.0 - roughness; mid *= mid; float intensity = smoothstep(mid - roughness * 0.5, mid + roughness * 0.5, RdotV) * mid; - diffuse_light += light_color * intensity * attenuation; // write to diffuse_light, as in toon shading you generally want no reflection + diffuse_light += light_color * intensity * attenuation * specular_amount; // write to diffuse_light, as in toon shading you generally want no reflection #elif defined(SPECULAR_DISABLED) // none.. @@ -760,7 +760,7 @@ LIGHT_SHADER_CODE vec3 specular_brdf_NL = cNdotL * D * F * G; - specular_light += specular_brdf_NL * light_color * attenuation; + specular_light += specular_brdf_NL * light_color * attenuation * specular_amount; #endif #if defined(LIGHT_CLEARCOAT_USED) @@ -774,7 +774,7 @@ LIGHT_SHADER_CODE float clearcoat_specular_brdf_NL = 0.25 * clearcoat * Gr * Fr * Dr * cNdotL; - specular_light += clearcoat_specular_brdf_NL * light_color * attenuation; + specular_light += clearcoat_specular_brdf_NL * light_color * attenuation * specular_amount; #endif } @@ -903,28 +903,28 @@ float get_omni_attenuation(float distance, float inv_range, float decay) { float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { #ifndef USE_NO_SHADOWS - if (lights.data[idx].shadow_enabled) { + if (omni_lights.data[idx].shadow_enabled) { // there is a shadowmap - vec3 light_rel_vec = lights.data[idx].position - vertex; + vec3 light_rel_vec = omni_lights.data[idx].position - vertex; float light_length = length(light_rel_vec); vec4 v = vec4(vertex, 1.0); - vec4 splane = (lights.data[idx].shadow_matrix * v); + vec4 splane = (omni_lights.data[idx].shadow_matrix * v); float shadow_len = length(splane.xyz); //need to remember shadow len from here { - vec3 nofs = normal_interp * lights.data[idx].shadow_normal_bias / lights.data[idx].inv_radius; + vec3 nofs = normal_interp * omni_lights.data[idx].shadow_normal_bias / omni_lights.data[idx].inv_radius; nofs *= (1.0 - max(0.0, dot(normalize(light_rel_vec), normalize(normal_interp)))); v.xyz += nofs; - splane = (lights.data[idx].shadow_matrix * v); + splane = (omni_lights.data[idx].shadow_matrix * v); } float shadow; #ifdef USE_SOFT_SHADOWS - if (lights.data[idx].soft_shadow_size > 0.0) { + if (omni_lights.data[idx].soft_shadow_size > 0.0) { //soft shadow //find blocker @@ -944,10 +944,10 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); vec3 tangent = normalize(cross(v0, normal)); vec3 bitangent = normalize(cross(tangent, normal)); - float z_norm = shadow_len * lights.data[idx].inv_radius; + float z_norm = shadow_len * omni_lights.data[idx].inv_radius; - tangent *= lights.data[idx].soft_shadow_size * lights.data[idx].soft_shadow_scale; - bitangent *= lights.data[idx].soft_shadow_size * lights.data[idx].soft_shadow_scale; + tangent *= omni_lights.data[idx].soft_shadow_size * omni_lights.data[idx].soft_shadow_scale; + bitangent *= omni_lights.data[idx].soft_shadow_size * omni_lights.data[idx].soft_shadow_scale; for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) { vec2 disk = disk_rotation * scene_data.penumbra_shadow_kernel[i].xy; @@ -955,7 +955,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { vec3 pos = splane.xyz + tangent * disk.x + bitangent * disk.y; pos = normalize(pos); - vec4 uv_rect = lights.data[idx].atlas_rect; + vec4 uv_rect = omni_lights.data[idx].atlas_rect; if (pos.z >= 0.0) { pos.z += 1.0; @@ -983,7 +983,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { tangent *= penumbra; bitangent *= penumbra; - z_norm -= lights.data[idx].inv_radius * lights.data[idx].shadow_bias; + z_norm -= omni_lights.data[idx].inv_radius * omni_lights.data[idx].shadow_bias; shadow = 0.0; for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) { @@ -991,7 +991,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { vec3 pos = splane.xyz + tangent * disk.x + bitangent * disk.y; pos = normalize(pos); - vec4 uv_rect = lights.data[idx].atlas_rect; + vec4 uv_rect = omni_lights.data[idx].atlas_rect; if (pos.z >= 0.0) { pos.z += 1.0; @@ -1016,7 +1016,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { } else { #endif splane.xyz = normalize(splane.xyz); - vec4 clamp_rect = lights.data[idx].atlas_rect; + vec4 clamp_rect = omni_lights.data[idx].atlas_rect; if (splane.z >= 0.0) { splane.z += 1.0; @@ -1030,10 +1030,10 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { splane.xy /= splane.z; splane.xy = splane.xy * 0.5 + 0.5; - splane.z = (shadow_len - lights.data[idx].shadow_bias) * lights.data[idx].inv_radius; + splane.z = (shadow_len - omni_lights.data[idx].shadow_bias) * omni_lights.data[idx].inv_radius; splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; splane.w = 1.0; //needed? i think it should be 1 already - shadow = sample_pcf_shadow(shadow_atlas, lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, splane); + shadow = sample_pcf_shadow(shadow_atlas, omni_lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, splane); #ifdef USE_SOFT_SHADOWS } #endif @@ -1068,17 +1068,17 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v inout float alpha, #endif inout vec3 diffuse_light, inout vec3 specular_light) { - vec3 light_rel_vec = lights.data[idx].position - vertex; + vec3 light_rel_vec = omni_lights.data[idx].position - vertex; float light_length = length(light_rel_vec); - float omni_attenuation = get_omni_attenuation(light_length, lights.data[idx].inv_radius, lights.data[idx].attenuation); + float omni_attenuation = get_omni_attenuation(light_length, omni_lights.data[idx].inv_radius, omni_lights.data[idx].attenuation); float light_attenuation = omni_attenuation; - vec3 color = lights.data[idx].color; + vec3 color = omni_lights.data[idx].color; #ifdef USE_SOFT_SHADOWS float size_A = 0.0; - if (lights.data[idx].size > 0.0) { - float t = lights.data[idx].size / max(0.001, light_length); + if (omni_lights.data[idx].size > 0.0) { + float t = omni_lights.data[idx].size / max(0.001, light_length); size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t)); } #endif @@ -1087,10 +1087,10 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v float transmittance_z = transmittance_depth; //no transmittance by default transmittance_color.a *= light_attenuation; { - vec4 clamp_rect = lights.data[idx].atlas_rect; + vec4 clamp_rect = omni_lights.data[idx].atlas_rect; //redo shadowmapping, but shrink the model a bit to avoid arctifacts - vec4 splane = (lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * lights.data[idx].transmittance_bias, 1.0)); + vec4 splane = (omni_lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * omni_lights.data[idx].transmittance_bias, 1.0)); shadow_len = length(splane.xyz); splane = normalize(splane.xyz); @@ -1104,22 +1104,22 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v splane.xy /= splane.z; splane.xy = splane.xy * 0.5 + 0.5; - splane.z = shadow_len * lights.data[idx].inv_radius; + splane.z = shadow_len * omni_lights.data[idx].inv_radius; splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; splane.w = 1.0; //needed? i think it should be 1 already float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r; - transmittance_z = (splane.z - shadow_z) / lights.data[idx].inv_radius; + transmittance_z = (splane.z - shadow_z) / omni_lights.data[idx].inv_radius; } #endif #if 0 - if (lights.data[idx].projector_rect != vec4(0.0)) { - vec3 local_v = (lights.data[idx].shadow_matrix * vec4(vertex, 1.0)).xyz; + if (omni_lights.data[idx].projector_rect != vec4(0.0)) { + vec3 local_v = (omni_lights.data[idx].shadow_matrix * vec4(vertex, 1.0)).xyz; local_v = normalize(local_v); - vec4 atlas_rect = lights.data[idx].projector_rect; + vec4 atlas_rect = omni_lights.data[idx].projector_rect; if (local_v.z >= 0.0) { local_v.z += 1.0; @@ -1136,7 +1136,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v vec2 proj_uv_ddx; vec2 proj_uv_ddy; { - vec3 local_v_ddx = (lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddx, 1.0)).xyz; + vec3 local_v_ddx = (omni_lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddx, 1.0)).xyz; local_v_ddx = normalize(local_v_ddx); if (local_v_ddx.z >= 0.0) { @@ -1150,7 +1150,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v proj_uv_ddx = local_v_ddx.xy * atlas_rect.zw - proj_uv; - vec3 local_v_ddy = (lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddy, 1.0)).xyz; + vec3 local_v_ddy = (omni_lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddy, 1.0)).xyz; local_v_ddy = normalize(local_v_ddy); if (local_v_ddy.z >= 0.0) { @@ -1172,7 +1172,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v light_attenuation *= shadow; - light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms, + light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms, omni_lights.data[idx].specular_amount, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -1204,37 +1204,37 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) { #ifndef USE_NO_SHADOWS - if (lights.data[idx].shadow_enabled) { - vec3 light_rel_vec = lights.data[idx].position - vertex; + if (spot_lights.data[idx].shadow_enabled) { + vec3 light_rel_vec = spot_lights.data[idx].position - vertex; float light_length = length(light_rel_vec); - vec3 spot_dir = lights.data[idx].direction; + vec3 spot_dir = spot_lights.data[idx].direction; //there is a shadowmap vec4 v = vec4(vertex, 1.0); - v.xyz -= spot_dir * lights.data[idx].shadow_bias; + v.xyz -= spot_dir * spot_lights.data[idx].shadow_bias; - float z_norm = dot(spot_dir, -light_rel_vec) * lights.data[idx].inv_radius; + float z_norm = dot(spot_dir, -light_rel_vec) * spot_lights.data[idx].inv_radius; float depth_bias_scale = 1.0 / (max(0.0001, z_norm)); //the closer to the light origin, the more you have to offset to reach 1px in the map - vec3 normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(spot_dir, -normalize(normal_interp)))) * lights.data[idx].shadow_normal_bias * depth_bias_scale; + vec3 normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(spot_dir, -normalize(normal_interp)))) * spot_lights.data[idx].shadow_normal_bias * depth_bias_scale; normal_bias -= spot_dir * dot(spot_dir, normal_bias); //only XY, no Z v.xyz += normal_bias; //adjust with bias - z_norm = dot(spot_dir, v.xyz - lights.data[idx].position) * lights.data[idx].inv_radius; + z_norm = dot(spot_dir, v.xyz - spot_lights.data[idx].position) * spot_lights.data[idx].inv_radius; float shadow; - vec4 splane = (lights.data[idx].shadow_matrix * v); + vec4 splane = (spot_lights.data[idx].shadow_matrix * v); splane /= splane.w; #ifdef USE_SOFT_SHADOWS - if (lights.data[idx].soft_shadow_size > 0.0) { + if (spot_lights.data[idx].soft_shadow_size > 0.0) { //soft shadow //find blocker - vec2 shadow_uv = splane.xy * lights.data[idx].atlas_rect.zw + lights.data[idx].atlas_rect.xy; + vec2 shadow_uv = splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy; float blocker_count = 0.0; float blocker_average = 0.0; @@ -1247,11 +1247,11 @@ float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) { disk_rotation = mat2(vec2(cr, -sr), vec2(sr, cr)); } - float uv_size = lights.data[idx].soft_shadow_size * z_norm * lights.data[idx].soft_shadow_scale; - vec2 clamp_max = lights.data[idx].atlas_rect.xy + lights.data[idx].atlas_rect.zw; + float uv_size = spot_lights.data[idx].soft_shadow_size * z_norm * spot_lights.data[idx].soft_shadow_scale; + vec2 clamp_max = spot_lights.data[idx].atlas_rect.xy + spot_lights.data[idx].atlas_rect.zw; for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) { vec2 suv = shadow_uv + (disk_rotation * scene_data.penumbra_shadow_kernel[i].xy) * uv_size; - suv = clamp(suv, lights.data[idx].atlas_rect.xy, clamp_max); + suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max); float d = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), suv, 0.0).r; if (d < z_norm) { blocker_average += d; @@ -1268,7 +1268,7 @@ float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) { shadow = 0.0; for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) { vec2 suv = shadow_uv + (disk_rotation * scene_data.penumbra_shadow_kernel[i].xy) * uv_size; - suv = clamp(suv, lights.data[idx].atlas_rect.xy, clamp_max); + suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max); shadow += textureProj(sampler2DShadow(shadow_atlas, shadow_sampler), vec4(suv, z_norm, 1.0)); } @@ -1282,9 +1282,9 @@ float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) { } else { #endif //hard shadow - vec4 shadow_uv = vec4(splane.xy * lights.data[idx].atlas_rect.zw + lights.data[idx].atlas_rect.xy, z_norm, 1.0); + vec4 shadow_uv = vec4(splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy, z_norm, 1.0); - shadow = sample_pcf_shadow(shadow_atlas, lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, shadow_uv); + shadow = sample_pcf_shadow(shadow_atlas, spot_lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, shadow_uv); #ifdef USE_SOFT_SHADOWS } #endif @@ -1321,28 +1321,28 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v #endif inout vec3 diffuse_light, inout vec3 specular_light) { - vec3 light_rel_vec = lights.data[idx].position - vertex; + vec3 light_rel_vec = spot_lights.data[idx].position - vertex; float light_length = length(light_rel_vec); - float spot_attenuation = get_omni_attenuation(light_length, lights.data[idx].inv_radius, lights.data[idx].attenuation); - vec3 spot_dir = lights.data[idx].direction; - float scos = max(dot(-normalize(light_rel_vec), spot_dir), lights.data[idx].cone_angle); - float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - lights.data[idx].cone_angle)); - spot_attenuation *= 1.0 - pow(spot_rim, lights.data[idx].cone_attenuation); + float spot_attenuation = get_omni_attenuation(light_length, spot_lights.data[idx].inv_radius, spot_lights.data[idx].attenuation); + vec3 spot_dir = spot_lights.data[idx].direction; + float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_lights.data[idx].cone_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_lights.data[idx].cone_angle)); + spot_attenuation *= 1.0 - pow(spot_rim, spot_lights.data[idx].cone_attenuation); float light_attenuation = spot_attenuation; - vec3 color = lights.data[idx].color; - float specular_amount = lights.data[idx].specular_amount; + vec3 color = spot_lights.data[idx].color; + float specular_amount = spot_lights.data[idx].specular_amount; #ifdef USE_SOFT_SHADOWS float size_A = 0.0; - if (lights.data[idx].size > 0.0) { - float t = lights.data[idx].size / max(0.001, light_length); + if (spot_lights.data[idx].size > 0.0) { + float t = spot_lights.data[idx].size / max(0.001, light_length); size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t)); } #endif /* - if (lights.data[idx].atlas_rect!=vec4(0.0)) { + if (spot_lights.data[idx].atlas_rect!=vec4(0.0)) { //use projector texture } */ @@ -1351,13 +1351,13 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v float transmittance_z = transmittance_depth; transmittance_color.a *= light_attenuation; { - splane = (lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * lights.data[idx].transmittance_bias, 1.0)); + splane = (spot_lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * spot_lights.data[idx].transmittance_bias, 1.0)); splane /= splane.w; - splane.xy = splane.xy * lights.data[idx].atlas_rect.zw + lights.data[idx].atlas_rect.xy; + splane.xy = splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy; float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r; //reconstruct depth - shadow_z /= lights.data[idx].inv_radius; + shadow_z /= spot_lights.data[idx].inv_radius; //distance to light plane float z = dot(spot_dir, -light_rel_vec); transmittance_z = z - shadow_z; @@ -1366,7 +1366,7 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v light_attenuation *= shadow; - light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms, + light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms, spot_lights.data[idx].specular_amount, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -1785,7 +1785,43 @@ vec4 fog_process(vec3 vertex) { return vec4(fog_color, fog_amount); } +void cluster_get_item_range(uint p_offset, out uint item_min, out uint item_max, out uint item_from, out uint item_to) { + uint item_min_max = cluster_buffer.data[p_offset]; + item_min = item_min_max & 0xFFFF; + item_max = item_min_max >> 16; + ; + + item_from = item_min >> 5; + item_to = (item_max == 0) ? 0 : ((item_max - 1) >> 5) + 1; //side effect of how it is stored, as item_max 0 means no elements +} + +uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) { + int local_min = clamp(int(z_min) - int(i) * 32, 0, 31); + int mask_width = min(int(z_max) - int(z_min), 32 - local_min); + return bitfieldInsert(uint(0), uint(0xFFFFFFFF), local_min, mask_width); +} + +float blur_shadow(float shadow) { + return shadow; +#if 0 + //disabling for now, will investigate later + float interp_shadow = shadow; + if (gl_HelperInvocation) { + interp_shadow = -4.0; // technically anything below -4 will do but just to make sure + } + + uvec2 fc2 = uvec2(gl_FragCoord.xy); + interp_shadow -= dFdx(interp_shadow) * (float(fc2.x & 1) - 0.5); + interp_shadow -= dFdy(interp_shadow) * (float(fc2.y & 1) - 0.5); + + if (interp_shadow >= 0.0) { + shadow = interp_shadow; + } + return shadow; #endif +} + +#endif //!MODE_RENDER DEPTH void main() { #ifdef MODE_DUAL_PARABOLOID @@ -2003,67 +2039,98 @@ FRAGMENT_SHADER_CODE #ifndef MODE_RENDER_DEPTH - uvec4 cluster_cell = texture(usampler3D(cluster_texture, material_samplers[SAMPLER_NEAREST_CLAMP]), vec3(screen_uv, (abs(vertex.z) - scene_data.z_near) / (scene_data.z_far - scene_data.z_near))); + uvec2 cluster_pos = uvec2(gl_FragCoord.xy) >> scene_data.cluster_shift; + uint cluster_offset = (scene_data.cluster_width * cluster_pos.y + cluster_pos.x) * (scene_data.max_cluster_element_count_div_32 + 32); + + uint cluster_z = uint(clamp((-vertex.z / scene_data.z_far) * 32.0, 0.0, 31.0)); + //used for interpolating anything cluster related vec3 vertex_ddx = dFdx(vertex); vec3 vertex_ddy = dFdy(vertex); { // process decals - uint decal_count = cluster_cell.w >> CLUSTER_COUNTER_SHIFT; - uint decal_pointer = cluster_cell.w & CLUSTER_POINTER_MASK; + uint cluster_decal_offset = cluster_offset + scene_data.cluster_type_size * 2; - //do outside for performance and avoiding arctifacts + uint item_min; + uint item_max; + uint item_from; + uint item_to; - for (uint i = 0; i < decal_count; i++) { - uint decal_index = cluster_data.indices[decal_pointer + i]; - if (!bool(decals.data[decal_index].mask & draw_call.layer_mask)) { - continue; //not masked - } + cluster_get_item_range(cluster_decal_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); - vec3 uv_local = (decals.data[decal_index].xform * vec4(vertex, 1.0)).xyz; - if (any(lessThan(uv_local, vec3(0.0, -1.0, 0.0))) || any(greaterThan(uv_local, vec3(1.0)))) { - continue; //out of decal - } +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif - //we need ddx/ddy for mipmaps, so simulate them - vec2 ddx = (decals.data[decal_index].xform * vec4(vertex_ddx, 0.0)).xz; - vec2 ddy = (decals.data[decal_index].xform * vec4(vertex_ddy, 0.0)).xz; + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_decal_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif - float fade = pow(1.0 - (uv_local.y > 0.0 ? uv_local.y : -uv_local.y), uv_local.y > 0.0 ? decals.data[decal_index].upper_fade : decals.data[decal_index].lower_fade); + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + uint decal_index = 32 * i + bit; - if (decals.data[decal_index].normal_fade > 0.0) { - fade *= smoothstep(decals.data[decal_index].normal_fade, 1.0, dot(normal_interp, decals.data[decal_index].normal) * 0.5 + 0.5); - } + if (!bool(decals.data[decal_index].mask & draw_call.layer_mask)) { + continue; //not masked + } + + vec3 uv_local = (decals.data[decal_index].xform * vec4(vertex, 1.0)).xyz; + if (any(lessThan(uv_local, vec3(0.0, -1.0, 0.0))) || any(greaterThan(uv_local, vec3(1.0)))) { + continue; //out of decal + } + + //we need ddx/ddy for mipmaps, so simulate them + vec2 ddx = (decals.data[decal_index].xform * vec4(vertex_ddx, 0.0)).xz; + vec2 ddy = (decals.data[decal_index].xform * vec4(vertex_ddy, 0.0)).xz; - if (decals.data[decal_index].albedo_rect != vec4(0.0)) { - //has albedo - vec4 decal_albedo = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].albedo_rect.zw + decals.data[decal_index].albedo_rect.xy, ddx * decals.data[decal_index].albedo_rect.zw, ddy * decals.data[decal_index].albedo_rect.zw); - decal_albedo *= decals.data[decal_index].modulate; - decal_albedo.a *= fade; - albedo = mix(albedo, decal_albedo.rgb, decal_albedo.a * decals.data[decal_index].albedo_mix); - - if (decals.data[decal_index].normal_rect != vec4(0.0)) { - vec3 decal_normal = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].normal_rect.zw + decals.data[decal_index].normal_rect.xy, ddx * decals.data[decal_index].normal_rect.zw, ddy * decals.data[decal_index].normal_rect.zw).xyz; - decal_normal.xy = decal_normal.xy * vec2(2.0, -2.0) - vec2(1.0, -1.0); //users prefer flipped y normal maps in most authoring software - decal_normal.z = sqrt(max(0.0, 1.0 - dot(decal_normal.xy, decal_normal.xy))); - //convert to view space, use xzy because y is up - decal_normal = (decals.data[decal_index].normal_xform * decal_normal.xzy).xyz; - - normal = normalize(mix(normal, decal_normal, decal_albedo.a)); + float fade = pow(1.0 - (uv_local.y > 0.0 ? uv_local.y : -uv_local.y), uv_local.y > 0.0 ? decals.data[decal_index].upper_fade : decals.data[decal_index].lower_fade); + + if (decals.data[decal_index].normal_fade > 0.0) { + fade *= smoothstep(decals.data[decal_index].normal_fade, 1.0, dot(normal_interp, decals.data[decal_index].normal) * 0.5 + 0.5); } - if (decals.data[decal_index].orm_rect != vec4(0.0)) { - vec3 decal_orm = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].orm_rect.zw + decals.data[decal_index].orm_rect.xy, ddx * decals.data[decal_index].orm_rect.zw, ddy * decals.data[decal_index].orm_rect.zw).xyz; - ao = mix(ao, decal_orm.r, decal_albedo.a); - roughness = mix(roughness, decal_orm.g, decal_albedo.a); - metallic = mix(metallic, decal_orm.b, decal_albedo.a); + if (decals.data[decal_index].albedo_rect != vec4(0.0)) { + //has albedo + vec4 decal_albedo = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].albedo_rect.zw + decals.data[decal_index].albedo_rect.xy, ddx * decals.data[decal_index].albedo_rect.zw, ddy * decals.data[decal_index].albedo_rect.zw); + decal_albedo *= decals.data[decal_index].modulate; + decal_albedo.a *= fade; + albedo = mix(albedo, decal_albedo.rgb, decal_albedo.a * decals.data[decal_index].albedo_mix); + + if (decals.data[decal_index].normal_rect != vec4(0.0)) { + vec3 decal_normal = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].normal_rect.zw + decals.data[decal_index].normal_rect.xy, ddx * decals.data[decal_index].normal_rect.zw, ddy * decals.data[decal_index].normal_rect.zw).xyz; + decal_normal.xy = decal_normal.xy * vec2(2.0, -2.0) - vec2(1.0, -1.0); //users prefer flipped y normal maps in most authoring software + decal_normal.z = sqrt(max(0.0, 1.0 - dot(decal_normal.xy, decal_normal.xy))); + //convert to view space, use xzy because y is up + decal_normal = (decals.data[decal_index].normal_xform * decal_normal.xzy).xyz; + + normal = normalize(mix(normal, decal_normal, decal_albedo.a)); + } + + if (decals.data[decal_index].orm_rect != vec4(0.0)) { + vec3 decal_orm = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].orm_rect.zw + decals.data[decal_index].orm_rect.xy, ddx * decals.data[decal_index].orm_rect.zw, ddy * decals.data[decal_index].orm_rect.zw).xyz; + ao = mix(ao, decal_orm.r, decal_albedo.a); + roughness = mix(roughness, decal_orm.g, decal_albedo.a); + metallic = mix(metallic, decal_orm.b, decal_albedo.a); + } } - } - if (decals.data[decal_index].emission_rect != vec4(0.0)) { - //emission is additive, so its independent from albedo - emission += textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].emission_energy * fade; + if (decals.data[decal_index].emission_rect != vec4(0.0)) { + //emission is additive, so its independent from albedo + emission += textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].emission_energy * fade; + } } } } @@ -2348,12 +2415,45 @@ FRAGMENT_SHADER_CODE vec4 reflection_accum = vec4(0.0, 0.0, 0.0, 0.0); vec4 ambient_accum = vec4(0.0, 0.0, 0.0, 0.0); - uint reflection_probe_count = cluster_cell.z >> CLUSTER_COUNTER_SHIFT; - uint reflection_probe_pointer = cluster_cell.z & CLUSTER_POINTER_MASK; + uint cluster_reflection_offset = cluster_offset + scene_data.cluster_type_size * 3; + + uint item_min; + uint item_max; + uint item_from; + uint item_to; + + cluster_get_item_range(cluster_reflection_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); + +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif + + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_reflection_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif - for (uint i = 0; i < reflection_probe_count; i++) { - uint ref_index = cluster_data.indices[reflection_probe_pointer + i]; - reflection_process(ref_index, vertex, normal, roughness, ambient_light, specular_light, ambient_accum, reflection_accum); + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + uint reflection_index = 32 * i + bit; + + if (!bool(reflections.data[reflection_index].mask & draw_call.layer_mask)) { + continue; //not masked + } + + reflection_process(reflection_index, vertex, normal, roughness, ambient_light, specular_light, ambient_accum, reflection_accum); + } } if (reflection_accum.a > 0.0) { @@ -2800,7 +2900,9 @@ FRAGMENT_SHADER_CODE shadow = float(shadow1 >> ((i - 4) * 8) & 0xFF) / 255.0; } - light_compute(normal, directional_lights.data[i].direction, normalize(view), directional_lights.data[i].color * directional_lights.data[i].energy, shadow, f0, orms, + blur_shadow(shadow); + + light_compute(normal, directional_lights.data[i].direction, normalize(view), directional_lights.data[i].color * directional_lights.data[i].energy, shadow, f0, orms, 1.0, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -2833,154 +2935,146 @@ FRAGMENT_SHADER_CODE { //omni lights - uint omni_light_count = cluster_cell.x >> CLUSTER_COUNTER_SHIFT; - uint omni_light_pointer = cluster_cell.x & CLUSTER_POINTER_MASK; + uint cluster_omni_offset = cluster_offset; - // Do shadow and lighting in two passes to reduce register pressure - uint shadow0 = 0; - uint shadow1 = 0; - uint shadow2 = 0; + uint item_min; + uint item_max; + uint item_from; + uint item_to; - for (uint i = 0; i < 18; i++) { - if (i >= omni_light_count) { - break; - } - uint light_index = cluster_data.indices[omni_light_pointer + i]; + cluster_get_item_range(cluster_omni_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); - if (!bool(lights.data[light_index].mask & draw_call.layer_mask)) { - continue; //not masked - } +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif - float s = light_process_omni_shadow(light_index, vertex, view); - if (i < 6) { - shadow0 |= uint(clamp(s * 31.0, 0.0, 31.0)) << (i * 5); - } else if (i < 12) { - shadow1 |= uint(clamp(s * 31.0, 0.0, 31.0)) << ((i - 6) * 5); - } else { - shadow2 |= uint(clamp(s * 31.0, 0.0, 31.0)) << ((i - 12) * 5); - } - } + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_omni_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif - for (uint i = 0; i < 18; i++) { - if (i == omni_light_count) { - break; - } - uint light_index = cluster_data.indices[omni_light_pointer + i]; + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + uint light_index = 32 * i + bit; - if (!bool(lights.data[light_index].mask & draw_call.layer_mask)) { - continue; //not masked - } + if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) { + continue; //not masked + } - float shadow; - if (i < 6) { - shadow = float(shadow0 >> (i * 5) & 0x1F) / 31.0; - } else if (i < 12) { - shadow = float(shadow1 >> ((i - 6) * 5) & 0x1F) / 31.0; - } else { - shadow = float(shadow1 >> ((i - 12) * 5) & 0x1F) / 31.0; - } + float shadow = light_process_omni_shadow(light_index, vertex, view); + + shadow = blur_shadow(shadow); - light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow, + light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow, #ifdef LIGHT_BACKLIGHT_USED - backlight, + backlight, #endif #ifdef LIGHT_TRANSMITTANCE_USED - transmittance_color, - transmittance_depth, - transmittance_curve, - transmittance_boost, + transmittance_color, + transmittance_depth, + transmittance_curve, + transmittance_boost, #endif #ifdef LIGHT_RIM_USED - rim, - rim_tint, - albedo, + rim, + rim_tint, + albedo, #endif #ifdef LIGHT_CLEARCOAT_USED - clearcoat, clearcoat_gloss, + clearcoat, clearcoat_gloss, #endif #ifdef LIGHT_ANISOTROPY_USED - tangent, binormal, anisotropy, + tangent, binormal, anisotropy, #endif #ifdef USE_SHADOW_TO_OPACITY - alpha, + alpha, #endif - diffuse_light, specular_light); + diffuse_light, specular_light); + } } } { //spot lights - uint spot_light_count = cluster_cell.y >> CLUSTER_COUNTER_SHIFT; - uint spot_light_pointer = cluster_cell.y & CLUSTER_POINTER_MASK; - // Do shadow and lighting in two passes to reduce register pressure - uint shadow0 = 0; - uint shadow1 = 0; - uint shadow2 = 0; + uint cluster_spot_offset = cluster_offset + scene_data.cluster_type_size; - for (uint i = 0; i < 18; i++) { - if (i >= spot_light_count) { - break; - } - uint light_index = cluster_data.indices[spot_light_pointer + i]; + uint item_min; + uint item_max; + uint item_from; + uint item_to; - if (!bool(lights.data[light_index].mask & draw_call.layer_mask)) { - continue; //not masked - } + cluster_get_item_range(cluster_spot_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); - float s = light_process_spot_shadow(light_index, vertex, view); - if (i < 6) { - shadow0 |= uint(clamp(s * 31.0, 0.0, 31.0)) << (i * 5); - } else if (i < 12) { - shadow1 |= uint(clamp(s * 31.0, 0.0, 31.0)) << ((i - 6) * 5); - } else { - shadow2 |= uint(clamp(s * 31.0, 0.0, 31.0)) << ((i - 12) * 5); - } - } +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif - for (uint i = 0; i < 18; i++) { - if (i == spot_light_count) { - break; - } - uint light_index = cluster_data.indices[spot_light_pointer + i]; + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_spot_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif - if (!bool(lights.data[light_index].mask & draw_call.layer_mask)) { - continue; //not masked - } + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif - float shadow; - if (i < 6) { - shadow = float(shadow0 >> (i * 5) & 0x1F) / 31.0; - } else if (i < 12) { - shadow = float(shadow1 >> ((i - 6) * 5) & 0x1F) / 31.0; - } else { - shadow = float(shadow1 >> ((i - 12) * 5) & 0x1F) / 31.0; - } + uint light_index = 32 * i + bit; - light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow, + if (!bool(spot_lights.data[light_index].mask & draw_call.layer_mask)) { + continue; //not masked + } + + float shadow = light_process_spot_shadow(light_index, vertex, view); + + shadow = blur_shadow(shadow); + + light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow, #ifdef LIGHT_BACKLIGHT_USED - backlight, + backlight, #endif #ifdef LIGHT_TRANSMITTANCE_USED - transmittance_color, - transmittance_depth, - transmittance_curve, - transmittance_boost, + transmittance_color, + transmittance_depth, + transmittance_curve, + transmittance_boost, #endif #ifdef LIGHT_RIM_USED - rim, - rim_tint, - albedo, + rim, + rim_tint, + albedo, #endif #ifdef LIGHT_CLEARCOAT_USED - clearcoat, clearcoat_gloss, + clearcoat, clearcoat_gloss, #endif #ifdef LIGHT_ANISOTROPY_USED - tangent, binormal, anisotropy, + tangent, binormal, anisotropy, #endif #ifdef USE_SHADOW_TO_OPACITY - alpha, + alpha, #endif - diffuse_light, specular_light); + diffuse_light, specular_light); + } } } diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl index 87ce74ba88..a37e32e1fc 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl @@ -3,6 +3,15 @@ #define MAX_GI_PROBES 8 +#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic) + +#extension GL_KHR_shader_subgroup_ballot : enable +#extension GL_KHR_shader_subgroup_arithmetic : enable + +#define USE_SUBGROUPS + +#endif + #include "cluster_data_inc.glsl" #if !defined(MODE_RENDER_DEPTH) || defined(MODE_RENDER_MATERIAL) || defined(MODE_RENDER_SDF) || defined(MODE_RENDER_NORMAL_ROUGHNESS) || defined(MODE_RENDER_GIPROBE) || defined(TANGENT_USED) || defined(NORMAL_MAP_USED) @@ -52,6 +61,11 @@ layout(set = 0, binding = 3, std140) uniform SceneData { vec2 viewport_size; vec2 screen_pixel_size; + uint cluster_shift; + uint cluster_width; + uint cluster_type_size; + uint max_cluster_element_count_div_32; + //use vec4s because std140 doesnt play nice with vec2s, z and w are wasted vec4 directional_penumbra_shadow_kernel[32]; vec4 directional_soft_shadow_kernel[32]; @@ -139,17 +153,22 @@ scene_data; #define INSTANCE_FLAGS_SKELETON (1 << 19) #define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 20) -layout(set = 0, binding = 5, std430) restrict readonly buffer Lights { +layout(set = 0, binding = 5, std430) restrict readonly buffer OmniLights { + LightData data[]; +} +omni_lights; + +layout(set = 0, binding = 6, std430) restrict readonly buffer SpotLights { LightData data[]; } -lights; +spot_lights; -layout(set = 0, binding = 6) buffer restrict readonly ReflectionProbeData { +layout(set = 0, binding = 7) buffer restrict readonly ReflectionProbeData { ReflectionData data[]; } reflections; -layout(set = 0, binding = 7, std140) uniform DirectionalLights { +layout(set = 0, binding = 8, std140) uniform DirectionalLights { DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; } directional_lights; @@ -183,16 +202,9 @@ layout(set = 0, binding = 14, std430) restrict readonly buffer Decals { } decals; -layout(set = 0, binding = 15) uniform utexture3D cluster_texture; - -layout(set = 0, binding = 16, std430) restrict readonly buffer ClusterData { - uint indices[]; -} -cluster_data; +layout(set = 0, binding = 15) uniform texture2D directional_shadow_atlas; -layout(set = 0, binding = 17) uniform texture2D directional_shadow_atlas; - -layout(set = 0, binding = 18, std430) restrict readonly buffer GlobalVariableData { +layout(set = 0, binding = 16, std430) restrict readonly buffer GlobalVariableData { vec4 data[]; } global_variables; @@ -206,7 +218,7 @@ struct SDFGIProbeCascadeData { float to_cell; // 1/bounds * grid_size }; -layout(set = 0, binding = 19, std140) uniform SDFGI { +layout(set = 0, binding = 17, std140) uniform SDFGI { vec3 grid_size; uint max_cascades; @@ -262,14 +274,19 @@ layout(set = 1, binding = 3) uniform texture2DArray lightmap_textures[MAX_LIGHTM layout(set = 1, binding = 4) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; #endif +layout(set = 1, binding = 5, std430) buffer restrict readonly ClusterBuffer { + uint data[]; +} +cluster_buffer; + /* Set 3, Render Buffers */ #ifdef MODE_RENDER_SDF -layout(r16ui, set = 1, binding = 5) uniform restrict writeonly uimage3D albedo_volume_grid; -layout(r32ui, set = 1, binding = 6) uniform restrict writeonly uimage3D emission_grid; -layout(r32ui, set = 1, binding = 7) uniform restrict writeonly uimage3D emission_aniso_grid; -layout(r32ui, set = 1, binding = 8) uniform restrict uimage3D geom_facing_grid; +layout(r16ui, set = 1, binding = 6) uniform restrict writeonly uimage3D albedo_volume_grid; +layout(r32ui, set = 1, binding = 7) uniform restrict writeonly uimage3D emission_grid; +layout(r32ui, set = 1, binding = 8) uniform restrict writeonly uimage3D emission_aniso_grid; +layout(r32ui, set = 1, binding = 9) uniform restrict uimage3D geom_facing_grid; //still need to be present for shaders that use it, so remap them to something #define depth_buffer shadow_atlas @@ -278,17 +295,17 @@ layout(r32ui, set = 1, binding = 8) uniform restrict uimage3D geom_facing_grid; #else -layout(set = 1, binding = 5) uniform texture2D depth_buffer; -layout(set = 1, binding = 6) uniform texture2D color_buffer; +layout(set = 1, binding = 6) uniform texture2D depth_buffer; +layout(set = 1, binding = 7) uniform texture2D color_buffer; #ifndef LOW_END_MODE -layout(set = 1, binding = 7) uniform texture2D normal_roughness_buffer; -layout(set = 1, binding = 8) uniform texture2D ao_buffer; -layout(set = 1, binding = 9) uniform texture2D ambient_buffer; -layout(set = 1, binding = 10) uniform texture2D reflection_buffer; -layout(set = 1, binding = 11) uniform texture2DArray sdfgi_lightprobe_texture; -layout(set = 1, binding = 12) uniform texture3D sdfgi_occlusion_cascades; +layout(set = 1, binding = 8) uniform texture2D normal_roughness_buffer; +layout(set = 1, binding = 9) uniform texture2D ao_buffer; +layout(set = 1, binding = 10) uniform texture2D ambient_buffer; +layout(set = 1, binding = 11) uniform texture2D reflection_buffer; +layout(set = 1, binding = 12) uniform texture2DArray sdfgi_lightprobe_texture; +layout(set = 1, binding = 13) uniform texture3D sdfgi_occlusion_cascades; struct GIProbeData { mat4 xform; @@ -306,12 +323,12 @@ struct GIProbeData { uint mipmaps; }; -layout(set = 1, binding = 13, std140) uniform GIProbes { +layout(set = 1, binding = 14, std140) uniform GIProbes { GIProbeData data[MAX_GI_PROBES]; } gi_probes; -layout(set = 1, binding = 14) uniform texture3D volumetric_fog_texture; +layout(set = 1, binding = 15) uniform texture3D volumetric_fog_texture; #endif // LOW_END_MODE diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl index 30dbf5871f..ed0a8a4b86 100644 --- a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl +++ b/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl @@ -143,10 +143,78 @@ void main() { uint voxel_albedo = process_voxels.data[voxel_index].albedo; vec3 albedo = vec3(uvec3(voxel_albedo >> 10, voxel_albedo >> 5, voxel_albedo) & uvec3(0x1F)) / float(0x1F); - vec3 light_accum[6]; - + vec3 light_accum[6] = vec3[](vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0)); uint valid_aniso = (voxel_albedo >> 15) & 0x3F; + const vec3 aniso_dir[6] = vec3[]( + vec3(1, 0, 0), + vec3(0, 1, 0), + vec3(0, 0, 1), + vec3(-1, 0, 0), + vec3(0, -1, 0), + vec3(0, 0, -1)); + + // Add indirect light first, in order to save computation resources +#ifdef MODE_PROCESS_DYNAMIC + if (params.multibounce) { + vec3 pos = (vec3(positioni) + vec3(0.5)) * float(params.probe_axis_size - 1) / params.grid_size; + ivec3 probe_base_pos = ivec3(pos); + + float weight_accum[6] = float[](0, 0, 0, 0, 0, 0); + + ivec3 tex_pos = ivec3(probe_base_pos.xy, int(params.cascade)); + tex_pos.x += probe_base_pos.z * int(params.probe_axis_size); + + tex_pos.xy = tex_pos.xy * (OCT_SIZE + 2) + ivec2(1); + + vec3 base_tex_posf = vec3(tex_pos); + vec2 tex_pixel_size = 1.0 / vec2(ivec2((OCT_SIZE + 2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE + 2) * params.probe_axis_size)); + vec3 probe_uv_offset = (ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx; + + for (uint j = 0; j < 8; j++) { + ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); + ivec3 probe_posi = probe_base_pos; + probe_posi += offset; + + // Compute weight + + vec3 probe_pos = vec3(probe_posi); + vec3 probe_to_pos = pos - probe_pos; + vec3 probe_dir = normalize(-probe_to_pos); + + // Compute lightprobe texture position + + vec3 trilinear = vec3(1.0) - abs(probe_to_pos); + + for (uint k = 0; k < 6; k++) { + if (bool(valid_aniso & (1 << k))) { + vec3 n = aniso_dir[k]; + float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(n, probe_dir)); + + vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0); + tex_posf.xy *= tex_pixel_size; + + vec3 pos_uvw = tex_posf; + pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy; + pos_uvw.x += float(offset.z) * probe_uv_offset.z; + vec3 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb; + + light_accum[k] += indirect_light * weight; + weight_accum[k] += weight; + } + } + } + + for (uint k = 0; k < 6; k++) { + if (weight_accum[k] > 0.0) { + light_accum[k] /= weight_accum[k]; + light_accum[k] *= albedo; + } + } + } + +#endif + { uint rgbe = process_voxels.data[voxel_index].light; @@ -162,18 +230,10 @@ void main() { uint aniso = process_voxels.data[voxel_index].light_aniso; for (uint i = 0; i < 6; i++) { float strength = ((aniso >> (i * 5)) & 0x1F) / float(0x1F); - light_accum[i] = l * strength; + light_accum[i] += l * strength; } } - const vec3 aniso_dir[6] = vec3[]( - vec3(1, 0, 0), - vec3(0, 1, 0), - vec3(0, 0, 1), - vec3(-1, 0, 0), - vec3(0, -1, 0), - vec3(0, 0, -1)); - // Raytrace light vec3 pos_to_uvw = 1.0 / params.grid_size; @@ -292,65 +352,6 @@ void main() { } } - // Add indirect light - - if (params.multibounce) { - vec3 pos = (vec3(positioni) + vec3(0.5)) * float(params.probe_axis_size - 1) / params.grid_size; - ivec3 probe_base_pos = ivec3(pos); - - vec4 probe_accum[6] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); - float weight_accum[6] = float[](0, 0, 0, 0, 0, 0); - - ivec3 tex_pos = ivec3(probe_base_pos.xy, int(params.cascade)); - tex_pos.x += probe_base_pos.z * int(params.probe_axis_size); - - tex_pos.xy = tex_pos.xy * (OCT_SIZE + 2) + ivec2(1); - - vec3 base_tex_posf = vec3(tex_pos); - vec2 tex_pixel_size = 1.0 / vec2(ivec2((OCT_SIZE + 2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE + 2) * params.probe_axis_size)); - vec3 probe_uv_offset = (ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx; - - for (uint j = 0; j < 8; j++) { - ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); - ivec3 probe_posi = probe_base_pos; - probe_posi += offset; - - // Compute weight - - vec3 probe_pos = vec3(probe_posi); - vec3 probe_to_pos = pos - probe_pos; - vec3 probe_dir = normalize(-probe_to_pos); - - // Compute lightprobe texture position - - vec3 trilinear = vec3(1.0) - abs(probe_to_pos); - - for (uint k = 0; k < 6; k++) { - if (bool(valid_aniso & (1 << k))) { - vec3 n = aniso_dir[k]; - float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(n, probe_dir)); - - vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0); - tex_posf.xy *= tex_pixel_size; - - vec3 pos_uvw = tex_posf; - pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy; - pos_uvw.x += float(offset.z) * probe_uv_offset.z; - vec4 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0); - - probe_accum[k] += indirect_light * weight; - weight_accum[k] += weight; - } - } - } - - for (uint k = 0; k < 6; k++) { - if (weight_accum[k] > 0.0) { - light_accum[k] += probe_accum[k].rgb * albedo / weight_accum[k]; - } - } - } - // Store the light in the light texture float lumas[6]; diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl index d516ab22c3..67630a3aa1 100644 --- a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl +++ b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl @@ -136,12 +136,24 @@ uint rgbe_encode(vec3 color) { return (uint(sRed) & 0x1FF) | ((uint(sGreen) & 0x1FF) << 9) | ((uint(sBlue) & 0x1FF) << 18) | ((uint(exps) & 0x1F) << 27); } +struct SH { +#if (SH_SIZE == 16) + float c[48]; +#else + float c[28]; +#endif +}; + +shared SH sh_accum[64]; //8x8 + void main() { ivec2 pos = ivec2(gl_GlobalInvocationID.xy); if (any(greaterThanEqual(pos, params.image_size))) { //too large, do nothing return; } + uint probe_index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 8; + #ifdef MODE_PROCESS float probe_cell_size = float(params.grid_size.x / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; @@ -154,27 +166,9 @@ void main() { vec3 probe_pos = cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size; vec3 pos_to_uvw = 1.0 / params.grid_size; - vec4 probe_sh_accum[SH_SIZE] = vec4[]( - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0) -#if (SH_SIZE == 16) - , - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0) -#endif - ); + for (uint i = 0; i < SH_SIZE * 3; i++) { + sh_accum[probe_index].c[i] = 0.0; + } // quickly ensure each probe has a different "offset" for the vogel function, based on integer world position uvec3 h3 = hash3(uvec3(params.world_offset + probe_cell)); @@ -278,33 +272,33 @@ void main() { } vec3 ray_dir2 = ray_dir * ray_dir; - float c[SH_SIZE] = float[]( - - 0.282095, //l0 - 0.488603 * ray_dir.y, //l1n1 - 0.488603 * ray_dir.z, //l1n0 - 0.488603 * ray_dir.x, //l1p1 - 1.092548 * ray_dir.x * ray_dir.y, //l2n2 - 1.092548 * ray_dir.y * ray_dir.z, //l2n1 - 0.315392 * (3.0 * ray_dir2.z - 1.0), //l20 - 1.092548 * ray_dir.x * ray_dir.z, //l2p1 - 0.546274 * (ray_dir2.x - ray_dir2.y) //l2p2 + +#define SH_ACCUM(m_idx, m_value) \ + { \ + vec3 l = light.rgb * (m_value); \ + sh_accum[probe_index].c[m_idx * 3 + 0] += l.r; \ + sh_accum[probe_index].c[m_idx * 3 + 1] += l.g; \ + sh_accum[probe_index].c[m_idx * 3 + 2] += l.b; \ + } + SH_ACCUM(0, 0.282095); //l0 + SH_ACCUM(1, 0.488603 * ray_dir.y); //l1n1 + SH_ACCUM(2, 0.488603 * ray_dir.z); //l1n0 + SH_ACCUM(3, 0.488603 * ray_dir.x); //l1p1 + SH_ACCUM(4, 1.092548 * ray_dir.x * ray_dir.y); //l2n2 + SH_ACCUM(5, 1.092548 * ray_dir.y * ray_dir.z); //l2n1 + SH_ACCUM(6, 0.315392 * (3.0 * ray_dir2.z - 1.0)); //l20 + SH_ACCUM(7, 1.092548 * ray_dir.x * ray_dir.z); //l2p1 + SH_ACCUM(8, 0.546274 * (ray_dir2.x - ray_dir2.y)); //l2p2 #if (SH_SIZE == 16) - , - 0.590043 * ray_dir.y * (3.0f * ray_dir2.x - ray_dir2.y), - 2.890611 * ray_dir.y * ray_dir.x * ray_dir.z, - 0.646360 * ray_dir.y * (-1.0f + 5.0f * ray_dir2.z), - 0.373176 * (5.0f * ray_dir2.z * ray_dir.z - 3.0f * ray_dir.z), - 0.457045 * ray_dir.x * (-1.0f + 5.0f * ray_dir2.z), - 1.445305 * (ray_dir2.x - ray_dir2.y) * ray_dir.z, - 0.590043 * ray_dir.x * (ray_dir2.x - 3.0f * ray_dir2.y) + SH_ACCUM(9, 0.590043 * ray_dir.y * (3.0f * ray_dir2.x - ray_dir2.y)); + SH_ACCUM(10, 2.890611 * ray_dir.y * ray_dir.x * ray_dir.z); + SH_ACCUM(11, 0.646360 * ray_dir.y * (-1.0f + 5.0f * ray_dir2.z)); + SH_ACCUM(12, 0.373176 * (5.0f * ray_dir2.z * ray_dir.z - 3.0f * ray_dir.z)); + SH_ACCUM(13, 0.457045 * ray_dir.x * (-1.0f + 5.0f * ray_dir2.z)); + SH_ACCUM(14, 1.445305 * (ray_dir2.x - ray_dir2.y) * ray_dir.z); + SH_ACCUM(15, 0.590043 * ray_dir.x * (ray_dir2.x - 3.0f * ray_dir2.y)); #endif - ); - - for (uint j = 0; j < SH_SIZE; j++) { - probe_sh_accum[j] += light * c[j]; - } } for (uint i = 0; i < SH_SIZE; i++) { @@ -312,7 +306,7 @@ void main() { ivec3 prev_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(params.history_index)); ivec2 average_pos = prev_pos.xy; - vec4 value = probe_sh_accum[i] * 4.0 / float(params.ray_count); + vec4 value = vec4(sh_accum[probe_index].c[i * 3 + 0], sh_accum[probe_index].c[i * 3 + 1], sh_accum[probe_index].c[i * 3 + 2], 1.0) * 4.0 / float(params.ray_count); ivec4 ivalue = clamp(ivec4(value * float(1 << HISTORY_BITS)), -32768, 32767); //clamp to 16 bits, so higher values don't break average @@ -344,37 +338,11 @@ void main() { ivec2 oct_pos = (pos / OCT_SIZE) * (OCT_SIZE + 2) + ivec2(1); ivec2 local_pos = pos % OCT_SIZE; - //fill the spherical harmonic - vec4 sh[SH_SIZE]; - - for (uint i = 0; i < SH_SIZE; i++) { - // store in history texture - ivec2 average_pos = sh_pos + ivec2(0, i); - ivec4 average = imageLoad(lightprobe_average_texture, average_pos); - - sh[i] = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS); - } - //compute the octahedral normal for this texel vec3 normal = octahedron_encode(vec2(local_pos) / float(OCT_SIZE)); - /* + // read the spherical harmonic - const float c1 = 0.429043; - const float c2 = 0.511664; - const float c3 = 0.743125; - const float c4 = 0.886227; - const float c5 = 0.247708; - vec4 light = (c1 * sh[8] * (normal.x * normal.x - normal.y * normal.y) + - c3 * sh[6] * normal.z * normal.z + - c4 * sh[0] - - c5 * sh[6] + - 2.0 * c1 * sh[4] * normal.x * normal.y + - 2.0 * c1 * sh[7] * normal.x * normal.z + - 2.0 * c1 * sh[5] * normal.y * normal.z + - 2.0 * c2 * sh[3] * normal.x + - 2.0 * c2 * sh[1] * normal.y + - 2.0 * c2 * sh[2] * normal.z); -*/ + vec3 normal2 = normal * normal; float c[SH_SIZE] = float[]( @@ -426,7 +394,14 @@ void main() { vec3 radiance = vec3(0.0); for (uint i = 0; i < SH_SIZE; i++) { - vec3 m = sh[i].rgb * c[i] * 4.0; + // store in history texture + ivec2 average_pos = sh_pos + ivec2(0, i); + ivec4 average = imageLoad(lightprobe_average_texture, average_pos); + + vec4 sh = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS); + + vec3 m = sh.rgb * c[i] * 4.0; + irradiance += m * l_mult[i]; radiance += m; } diff --git a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl index 6215e721ce..aa32809a06 100644 --- a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl +++ b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl @@ -4,6 +4,15 @@ VERSION_DEFINES +/* Do not use subgroups here, seems there is not much advantage and causes glitches +#extension GL_KHR_shader_subgroup_ballot: enable +#extension GL_KHR_shader_subgroup_arithmetic: enable + +#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic) +#define USE_SUBGROUPS +#endif +*/ + #if defined(MODE_FOG) || defined(MODE_FILTER) layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; @@ -23,22 +32,25 @@ layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; layout(set = 0, binding = 1) uniform texture2D shadow_atlas; layout(set = 0, binding = 2) uniform texture2D directional_shadow_atlas; -layout(set = 0, binding = 3, std430) restrict readonly buffer Lights { +layout(set = 0, binding = 3, std430) restrict readonly buffer OmniLights { LightData data[]; } -lights; +omni_lights; -layout(set = 0, binding = 4, std140) uniform DirectionalLights { +layout(set = 0, binding = 4, std430) restrict readonly buffer SpotLights { + LightData data[]; +} +spot_lights; + +layout(set = 0, binding = 5, std140) uniform DirectionalLights { DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; } directional_lights; -layout(set = 0, binding = 5) uniform utexture3D cluster_texture; - -layout(set = 0, binding = 6, std430) restrict readonly buffer ClusterData { - uint indices[]; +layout(set = 0, binding = 6, std430) buffer restrict readonly ClusterBuffer { + uint data[]; } -cluster_data; +cluster_buffer; layout(set = 0, binding = 7) uniform sampler linear_sampler; @@ -132,7 +144,7 @@ layout(set = 1, binding = 2) uniform texture3D sdfgi_occlusion_texture; #endif //SDFGI -layout(push_constant, binding = 0, std430) uniform Params { +layout(set = 0, binding = 14, std140) uniform Params { vec2 fog_frustum_size_begin; vec2 fog_frustum_size_end; @@ -150,7 +162,14 @@ layout(push_constant, binding = 0, std430) uniform Params { float detail_spread; float gi_inject; uint max_gi_probes; - uint pad; + uint cluster_type_size; + + vec2 screen_size; + uint cluster_shift; + uint cluster_width; + + uvec3 cluster_pad; + uint max_cluster_element_count_div_32; mat3x4 cam_rotation; } @@ -178,6 +197,22 @@ float get_omni_attenuation(float distance, float inv_range, float decay) { return nd * pow(max(distance, 0.0001), -decay); } +void cluster_get_item_range(uint p_offset, out uint item_min, out uint item_max, out uint item_from, out uint item_to) { + uint item_min_max = cluster_buffer.data[p_offset]; + item_min = item_min_max & 0xFFFF; + item_max = item_min_max >> 16; + ; + + item_from = item_min >> 5; + item_to = (item_max == 0) ? 0 : ((item_max - 1) >> 5) + 1; //side effect of how it is stored, as item_max 0 means no elements +} + +uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) { + int local_min = clamp(int(z_min) - int(i) * 32, 0, 31); + int mask_width = min(int(z_max) - int(z_min), 32 - local_min); + return bitfieldInsert(uint(0), uint(0xFFFFFFFF), local_min, mask_width); +} + void main() { vec3 fog_cell_size = 1.0 / vec3(params.fog_volume_size); @@ -193,6 +228,12 @@ void main() { //posf += mix(vec3(0.0),vec3(1.0),0.3) * hash3f(uvec3(pos)) * 2.0 - 1.0; vec3 fog_unit_pos = posf * fog_cell_size + fog_cell_size * 0.5; //center of voxels + + uvec2 screen_pos = uvec2(fog_unit_pos.xy * params.screen_size); + uvec2 cluster_pos = screen_pos >> params.cluster_shift; + uint cluster_offset = (params.cluster_width * cluster_pos.y + cluster_pos.x) * (params.max_cluster_element_count_div_32 + 32); + //positions in screen are too spread apart, no hopes for optimizing with subgroups + fog_unit_pos.z = pow(fog_unit_pos.z, params.detail_spread); vec3 view_pos; @@ -200,6 +241,8 @@ void main() { view_pos.z = -params.fog_frustum_end * fog_unit_pos.z; view_pos.y = -view_pos.y; + uint cluster_z = uint(clamp((abs(view_pos.z) / params.z_far) * 32.0, 0.0, 31.0)); + vec3 total_light = params.light_color; float total_density = params.base_density; @@ -266,95 +309,160 @@ void main() { //compute lights from cluster - vec3 cluster_pos; - cluster_pos.xy = fog_unit_pos.xy; - cluster_pos.z = clamp((abs(view_pos.z) - params.z_near) / (params.z_far - params.z_near), 0.0, 1.0); + { //omni lights + + uint cluster_omni_offset = cluster_offset; + + uint item_min; + uint item_max; + uint item_from; + uint item_to; + + cluster_get_item_range(cluster_omni_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); + +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif - uvec4 cluster_cell = texture(usampler3D(cluster_texture, linear_sampler), cluster_pos); + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_omni_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif - uint omni_light_count = cluster_cell.x >> CLUSTER_COUNTER_SHIFT; - uint omni_light_pointer = cluster_cell.x & CLUSTER_POINTER_MASK; + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + uint light_index = 32 * i + bit; - for (uint i = 0; i < omni_light_count; i++) { - uint light_index = cluster_data.indices[omni_light_pointer + i]; + //if (!bool(omni_omni_lights.data[light_index].mask & draw_call.layer_mask)) { + // continue; //not masked + //} - vec3 light_pos = lights.data[i].position; - float d = distance(lights.data[i].position, view_pos); - float shadow_attenuation = 1.0; + vec3 light_pos = omni_lights.data[light_index].position; + float d = distance(omni_lights.data[light_index].position, view_pos); + float shadow_attenuation = 1.0; - if (d * lights.data[i].inv_radius < 1.0) { - float attenuation = get_omni_attenuation(d, lights.data[i].inv_radius, lights.data[i].attenuation); + if (d * omni_lights.data[light_index].inv_radius < 1.0) { + float attenuation = get_omni_attenuation(d, omni_lights.data[light_index].inv_radius, omni_lights.data[light_index].attenuation); - vec3 light = lights.data[i].color / M_PI; + vec3 light = omni_lights.data[light_index].color / M_PI; - if (lights.data[i].shadow_enabled) { - //has shadow - vec4 v = vec4(view_pos, 1.0); + if (omni_lights.data[light_index].shadow_enabled) { + //has shadow + vec4 v = vec4(view_pos, 1.0); - vec4 splane = (lights.data[i].shadow_matrix * v); - float shadow_len = length(splane.xyz); //need to remember shadow len from here + vec4 splane = (omni_lights.data[light_index].shadow_matrix * v); + float shadow_len = length(splane.xyz); //need to remember shadow len from here - splane.xyz = normalize(splane.xyz); - vec4 clamp_rect = lights.data[i].atlas_rect; + splane.xyz = normalize(splane.xyz); + vec4 clamp_rect = omni_lights.data[light_index].atlas_rect; - if (splane.z >= 0.0) { - splane.z += 1.0; + if (splane.z >= 0.0) { + splane.z += 1.0; - clamp_rect.y += clamp_rect.w; + clamp_rect.y += clamp_rect.w; - } else { - splane.z = 1.0 - splane.z; - } + } else { + splane.z = 1.0 - splane.z; + } - splane.xy /= splane.z; + splane.xy /= splane.z; - splane.xy = splane.xy * 0.5 + 0.5; - splane.z = shadow_len * lights.data[i].inv_radius; - splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; - splane.w = 1.0; //needed? i think it should be 1 already + splane.xy = splane.xy * 0.5 + 0.5; + splane.z = shadow_len * omni_lights.data[light_index].inv_radius; + splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; + splane.w = 1.0; //needed? i think it should be 1 already - float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; + float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; - shadow_attenuation = exp(min(0.0, (depth - splane.z)) / lights.data[i].inv_radius * lights.data[i].shadow_volumetric_fog_fade); + shadow_attenuation = exp(min(0.0, (depth - splane.z)) / omni_lights.data[light_index].inv_radius * omni_lights.data[light_index].shadow_volumetric_fog_fade); + } + total_light += light * attenuation * shadow_attenuation; + } } - total_light += light * attenuation * shadow_attenuation; } } - uint spot_light_count = cluster_cell.y >> CLUSTER_COUNTER_SHIFT; - uint spot_light_pointer = cluster_cell.y & CLUSTER_POINTER_MASK; + { //spot lights + + uint cluster_spot_offset = cluster_offset + params.cluster_type_size; + + uint item_min; + uint item_max; + uint item_from; + uint item_to; + + cluster_get_item_range(cluster_spot_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); + +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif + + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_spot_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif - for (uint i = 0; i < spot_light_count; i++) { - uint light_index = cluster_data.indices[spot_light_pointer + i]; + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif - vec3 light_pos = lights.data[i].position; - vec3 light_rel_vec = lights.data[i].position - view_pos; - float d = length(light_rel_vec); - float shadow_attenuation = 1.0; + //if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) { + // continue; //not masked + //} - if (d * lights.data[i].inv_radius < 1.0) { - float attenuation = get_omni_attenuation(d, lights.data[i].inv_radius, lights.data[i].attenuation); + uint light_index = 32 * i + bit; - vec3 spot_dir = lights.data[i].direction; - float scos = max(dot(-normalize(light_rel_vec), spot_dir), lights.data[i].cone_angle); - float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - lights.data[i].cone_angle)); - attenuation *= 1.0 - pow(spot_rim, lights.data[i].cone_attenuation); + vec3 light_pos = omni_lights.data[light_index].position; + vec3 light_rel_vec = omni_lights.data[light_index].position - view_pos; + float d = length(light_rel_vec); + float shadow_attenuation = 1.0; - vec3 light = lights.data[i].color / M_PI; + if (d * omni_lights.data[light_index].inv_radius < 1.0) { + float attenuation = get_omni_attenuation(d, omni_lights.data[light_index].inv_radius, omni_lights.data[light_index].attenuation); - if (lights.data[i].shadow_enabled) { - //has shadow - vec4 v = vec4(view_pos, 1.0); + vec3 spot_dir = omni_lights.data[light_index].direction; + float scos = max(dot(-normalize(light_rel_vec), spot_dir), omni_lights.data[light_index].cone_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - omni_lights.data[light_index].cone_angle)); + attenuation *= 1.0 - pow(spot_rim, omni_lights.data[light_index].cone_attenuation); - vec4 splane = (lights.data[i].shadow_matrix * v); - splane /= splane.w; + vec3 light = omni_lights.data[light_index].color / M_PI; - float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; + if (omni_lights.data[light_index].shadow_enabled) { + //has shadow + vec4 v = vec4(view_pos, 1.0); - shadow_attenuation = exp(min(0.0, (depth - splane.z)) / lights.data[i].inv_radius * lights.data[i].shadow_volumetric_fog_fade); - } + vec4 splane = (omni_lights.data[light_index].shadow_matrix * v); + splane /= splane.w; - total_light += light * attenuation * shadow_attenuation; + float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; + + shadow_attenuation = exp(min(0.0, (depth - splane.z)) / omni_lights.data[light_index].inv_radius * omni_lights.data[light_index].shadow_volumetric_fog_fade); + } + + total_light += light * attenuation * shadow_attenuation; + } + } } } |