diff options
Diffstat (limited to 'servers/rendering/renderer_rd')
38 files changed, 6835 insertions, 4125 deletions
diff --git a/servers/rendering/renderer_rd/cluster_builder_rd.cpp b/servers/rendering/renderer_rd/cluster_builder_rd.cpp new file mode 100644 index 0000000000..0fdd864d47 --- /dev/null +++ b/servers/rendering/renderer_rd/cluster_builder_rd.cpp @@ -0,0 +1,555 @@ +/*************************************************************************/ +/* cluster_builder_rd.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "cluster_builder_rd.h" +#include "servers/rendering/rendering_device.h" +#include "servers/rendering/rendering_server_globals.h" + +ClusterBuilderSharedDataRD::ClusterBuilderSharedDataRD() { + RD::VertexFormatID vertex_format; + + { + Vector<RD::VertexAttribute> attributes; + { + RD::VertexAttribute va; + va.format = RD::DATA_FORMAT_R32G32B32_SFLOAT; + va.stride = sizeof(float) * 3; + attributes.push_back(va); + } + vertex_format = RD::get_singleton()->vertex_format_create(attributes); + } + + { + Vector<String> versions; + versions.push_back(""); + cluster_render.cluster_render_shader.initialize(versions); + cluster_render.shader_version = cluster_render.cluster_render_shader.version_create(); + cluster_render.shader = cluster_render.cluster_render_shader.version_get_shader(cluster_render.shader_version, 0); + cluster_render.shader_pipelines[ClusterRender::PIPELINE_NORMAL] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, RD::get_singleton()->framebuffer_format_create_empty(), vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), RD::PipelineColorBlendState(), 0); + RD::PipelineMultisampleState ms; + ms.sample_count = RD::TEXTURE_SAMPLES_4; + cluster_render.shader_pipelines[ClusterRender::PIPELINE_MSAA] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, RD::get_singleton()->framebuffer_format_create_empty(), vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), ms, RD::PipelineDepthStencilState(), RD::PipelineColorBlendState(), 0); + } + { + Vector<String> versions; + versions.push_back(""); + cluster_store.cluster_store_shader.initialize(versions); + cluster_store.shader_version = cluster_store.cluster_store_shader.version_create(); + cluster_store.shader = cluster_store.cluster_store_shader.version_get_shader(cluster_store.shader_version, 0); + cluster_store.shader_pipeline = RD::get_singleton()->compute_pipeline_create(cluster_store.shader); + } + { + Vector<String> versions; + versions.push_back(""); + cluster_debug.cluster_debug_shader.initialize(versions); + cluster_debug.shader_version = cluster_debug.cluster_debug_shader.version_create(); + cluster_debug.shader = cluster_debug.cluster_debug_shader.version_get_shader(cluster_debug.shader_version, 0); + cluster_debug.shader_pipeline = RD::get_singleton()->compute_pipeline_create(cluster_debug.shader); + } + + { // SPHERE + static const uint32_t icosphere_vertex_count = 42; + static const float icosphere_vertices[icosphere_vertex_count * 3] = { + 0, 0, -1, 0.7236073, -0.5257253, -0.4472195, -0.276388, -0.8506492, -0.4472199, -0.8944262, 0, -0.4472156, -0.276388, 0.8506492, -0.4472199, 0.7236073, 0.5257253, -0.4472195, 0.276388, -0.8506492, 0.4472199, -0.7236073, -0.5257253, 0.4472195, -0.7236073, 0.5257253, 0.4472195, 0.276388, 0.8506492, 0.4472199, 0.8944262, 0, 0.4472156, 0, 0, 1, -0.1624555, -0.4999952, -0.8506544, 0.4253227, -0.3090114, -0.8506542, 0.2628688, -0.8090116, -0.5257377, 0.8506479, 0, -0.5257359, 0.4253227, 0.3090114, -0.8506542, -0.5257298, 0, -0.8506517, -0.6881894, -0.4999969, -0.5257362, -0.1624555, 0.4999952, -0.8506544, -0.6881894, 0.4999969, -0.5257362, 0.2628688, 0.8090116, -0.5257377, 0.9510579, -0.3090126, 0, 0.9510579, 0.3090126, 0, 0, -1, 0, 0.5877856, -0.8090167, 0, -0.9510579, -0.3090126, 0, -0.5877856, -0.8090167, 0, -0.5877856, 0.8090167, 0, -0.9510579, 0.3090126, 0, 0.5877856, 0.8090167, 0, 0, 1, 0, 0.6881894, -0.4999969, 0.5257362, -0.2628688, -0.8090116, 0.5257377, -0.8506479, 0, 0.5257359, -0.2628688, 0.8090116, 0.5257377, 0.6881894, 0.4999969, 0.5257362, 0.1624555, -0.4999952, 0.8506544, 0.5257298, 0, 0.8506517, -0.4253227, -0.3090114, 0.8506542, -0.4253227, 0.3090114, 0.8506542, 0.1624555, 0.4999952, 0.8506544 + }; + static const uint32_t icosphere_triangle_count = 80; + static const uint32_t icosphere_triangle_indices[icosphere_triangle_count * 3] = { + 0, 13, 12, 1, 13, 15, 0, 12, 17, 0, 17, 19, 0, 19, 16, 1, 15, 22, 2, 14, 24, 3, 18, 26, 4, 20, 28, 5, 21, 30, 1, 22, 25, 2, 24, 27, 3, 26, 29, 4, 28, 31, 5, 30, 23, 6, 32, 37, 7, 33, 39, 8, 34, 40, 9, 35, 41, 10, 36, 38, 38, 41, 11, 38, 36, 41, 36, 9, 41, 41, 40, 11, 41, 35, 40, 35, 8, 40, 40, 39, 11, 40, 34, 39, 34, 7, 39, 39, 37, 11, 39, 33, 37, 33, 6, 37, 37, 38, 11, 37, 32, 38, 32, 10, 38, 23, 36, 10, 23, 30, 36, 30, 9, 36, 31, 35, 9, 31, 28, 35, 28, 8, 35, 29, 34, 8, 29, 26, 34, 26, 7, 34, 27, 33, 7, 27, 24, 33, 24, 6, 33, 25, 32, 6, 25, 22, 32, 22, 10, 32, 30, 31, 9, 30, 21, 31, 21, 4, 31, 28, 29, 8, 28, 20, 29, 20, 3, 29, 26, 27, 7, 26, 18, 27, 18, 2, 27, 24, 25, 6, 24, 14, 25, 14, 1, 25, 22, 23, 10, 22, 15, 23, 15, 5, 23, 16, 21, 5, 16, 19, 21, 19, 4, 21, 19, 20, 4, 19, 17, 20, 17, 3, 20, 17, 18, 3, 17, 12, 18, 12, 2, 18, 15, 16, 5, 15, 13, 16, 13, 0, 16, 12, 14, 2, 12, 13, 14, 13, 1, 14 + }; + + Vector<uint8_t> vertex_data; + vertex_data.resize(sizeof(float) * icosphere_vertex_count * 3); + copymem(vertex_data.ptrw(), icosphere_vertices, vertex_data.size()); + + sphere_vertex_buffer = RD::get_singleton()->vertex_buffer_create(vertex_data.size(), vertex_data); + + Vector<uint8_t> index_data; + index_data.resize(sizeof(uint32_t) * icosphere_triangle_count * 3); + copymem(index_data.ptrw(), icosphere_triangle_indices, index_data.size()); + + sphere_index_buffer = RD::get_singleton()->index_buffer_create(icosphere_triangle_count * 3, RD::INDEX_BUFFER_FORMAT_UINT32, index_data); + + Vector<RID> buffers; + buffers.push_back(sphere_vertex_buffer); + + sphere_vertex_array = RD::get_singleton()->vertex_array_create(icosphere_vertex_count, vertex_format, buffers); + + sphere_index_array = RD::get_singleton()->index_array_create(sphere_index_buffer, 0, icosphere_triangle_count * 3); + + float min_d = 1e20; + for (uint32_t i = 0; i < icosphere_triangle_count; i++) { + Vector3 vertices[3]; + for (uint32_t j = 0; j < 3; j++) { + uint32_t index = icosphere_triangle_indices[i * 3 + j]; + for (uint32_t k = 0; k < 3; k++) { + vertices[j][k] = icosphere_vertices[index * 3 + k]; + } + } + Plane p(vertices[0], vertices[1], vertices[2]); + min_d = MIN(Math::abs(p.d), min_d); + } + sphere_overfit = 1.0 / min_d; + } + + { // CONE + static const uint32_t cone_vertex_count = 99; + static const float cone_vertices[cone_vertex_count * 3] = { + 0, 1, -1, 0.1950903, 0.9807853, -1, 0.3826835, 0.9238795, -1, 0.5555703, 0.8314696, -1, 0.7071068, 0.7071068, -1, 0.8314697, 0.5555702, -1, 0.9238795, 0.3826834, -1, 0.9807853, 0.1950903, -1, 1, 0, -1, 0.9807853, -0.1950902, -1, 0.9238796, -0.3826833, -1, 0.8314697, -0.5555702, -1, 0.7071068, -0.7071068, -1, 0.5555702, -0.8314697, -1, 0.3826833, -0.9238796, -1, 0.1950901, -0.9807853, -1, -3.25841e-7, -1, -1, -0.1950907, -0.9807852, -1, -0.3826839, -0.9238793, -1, -0.5555707, -0.8314693, -1, -0.7071073, -0.7071063, -1, -0.83147, -0.5555697, -1, -0.9238799, -0.3826827, -1, 0, 0, 0, -0.9807854, -0.1950894, -1, -1, 9.65599e-7, -1, -0.9807851, 0.1950913, -1, -0.9238791, 0.3826845, -1, -0.8314689, 0.5555713, -1, -0.7071059, 0.7071077, -1, -0.5555691, 0.8314704, -1, -0.3826821, 0.9238801, -1, -0.1950888, 0.9807856, -1 + }; + static const uint32_t cone_triangle_count = 62; + static const uint32_t cone_triangle_indices[cone_triangle_count * 3] = { + 0, 23, 1, 1, 23, 2, 2, 23, 3, 3, 23, 4, 4, 23, 5, 5, 23, 6, 6, 23, 7, 7, 23, 8, 8, 23, 9, 9, 23, 10, 10, 23, 11, 11, 23, 12, 12, 23, 13, 13, 23, 14, 14, 23, 15, 15, 23, 16, 16, 23, 17, 17, 23, 18, 18, 23, 19, 19, 23, 20, 20, 23, 21, 21, 23, 22, 22, 23, 24, 24, 23, 25, 25, 23, 26, 26, 23, 27, 27, 23, 28, 28, 23, 29, 29, 23, 30, 30, 23, 31, 31, 23, 32, 32, 23, 0, 7, 15, 24, 32, 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 3, 6, 7, 3, 7, 8, 9, 9, 10, 7, 10, 11, 7, 11, 12, 15, 12, 13, 15, 13, 14, 15, 15, 16, 17, 17, 18, 19, 19, 20, 24, 20, 21, 24, 21, 22, 24, 24, 25, 26, 26, 27, 28, 28, 29, 30, 30, 31, 32, 32, 1, 3, 15, 17, 24, 17, 19, 24, 24, 26, 32, 26, 28, 32, 28, 30, 32, 32, 3, 7, 7, 11, 15, 32, 7, 24 + }; + + Vector<uint8_t> vertex_data; + vertex_data.resize(sizeof(float) * cone_vertex_count * 3); + copymem(vertex_data.ptrw(), cone_vertices, vertex_data.size()); + + cone_vertex_buffer = RD::get_singleton()->vertex_buffer_create(vertex_data.size(), vertex_data); + + Vector<uint8_t> index_data; + index_data.resize(sizeof(uint32_t) * cone_triangle_count * 3); + copymem(index_data.ptrw(), cone_triangle_indices, index_data.size()); + + cone_index_buffer = RD::get_singleton()->index_buffer_create(cone_triangle_count * 3, RD::INDEX_BUFFER_FORMAT_UINT32, index_data); + + Vector<RID> buffers; + buffers.push_back(cone_vertex_buffer); + + cone_vertex_array = RD::get_singleton()->vertex_array_create(cone_vertex_count, vertex_format, buffers); + + cone_index_array = RD::get_singleton()->index_array_create(cone_index_buffer, 0, cone_triangle_count * 3); + + float min_d = 1e20; + for (uint32_t i = 0; i < cone_triangle_count; i++) { + Vector3 vertices[3]; + int32_t zero_index = -1; + for (uint32_t j = 0; j < 3; j++) { + uint32_t index = cone_triangle_indices[i * 3 + j]; + for (uint32_t k = 0; k < 3; k++) { + vertices[j][k] = cone_vertices[index * 3 + k]; + } + if (vertices[j] == Vector3()) { + zero_index = j; + } + } + + if (zero_index != -1) { + Vector3 a = vertices[(zero_index + 1) % 3]; + Vector3 b = vertices[(zero_index + 2) % 3]; + Vector3 c = a + Vector3(0, 0, 1); + Plane p(a, b, c); + min_d = MIN(Math::abs(p.d), min_d); + } + } + cone_overfit = 1.0 / min_d; + } + + { // BOX + static const uint32_t box_vertex_count = 8; + static const float box_vertices[box_vertex_count * 3] = { + -1, -1, -1, -1, -1, 1, -1, 1, -1, -1, 1, 1, 1, -1, -1, 1, -1, 1, 1, 1, -1, 1, 1, 1 + }; + static const uint32_t box_triangle_count = 12; + static const uint32_t box_triangle_indices[box_triangle_count * 3] = { + 1, 2, 0, 3, 6, 2, 7, 4, 6, 5, 0, 4, 6, 0, 2, 3, 5, 7, 1, 3, 2, 3, 7, 6, 7, 5, 4, 5, 1, 0, 6, 4, 0, 3, 1, 5 + }; + + Vector<uint8_t> vertex_data; + vertex_data.resize(sizeof(float) * box_vertex_count * 3); + copymem(vertex_data.ptrw(), box_vertices, vertex_data.size()); + + box_vertex_buffer = RD::get_singleton()->vertex_buffer_create(vertex_data.size(), vertex_data); + + Vector<uint8_t> index_data; + index_data.resize(sizeof(uint32_t) * box_triangle_count * 3); + copymem(index_data.ptrw(), box_triangle_indices, index_data.size()); + + box_index_buffer = RD::get_singleton()->index_buffer_create(box_triangle_count * 3, RD::INDEX_BUFFER_FORMAT_UINT32, index_data); + + Vector<RID> buffers; + buffers.push_back(box_vertex_buffer); + + box_vertex_array = RD::get_singleton()->vertex_array_create(box_vertex_count, vertex_format, buffers); + + box_index_array = RD::get_singleton()->index_array_create(box_index_buffer, 0, box_triangle_count * 3); + } +} +ClusterBuilderSharedDataRD::~ClusterBuilderSharedDataRD() { + RD::get_singleton()->free(sphere_vertex_buffer); + RD::get_singleton()->free(sphere_index_buffer); + RD::get_singleton()->free(cone_vertex_buffer); + RD::get_singleton()->free(cone_index_buffer); + RD::get_singleton()->free(box_vertex_buffer); + RD::get_singleton()->free(box_index_buffer); + + cluster_render.cluster_render_shader.version_free(cluster_render.shader_version); + cluster_store.cluster_store_shader.version_free(cluster_store.shader_version); + cluster_debug.cluster_debug_shader.version_free(cluster_debug.shader_version); +} + +///////////////////////////// + +void ClusterBuilderRD::_clear() { + if (cluster_buffer.is_null()) { + return; //nothing to clear + } + RD::get_singleton()->free(cluster_buffer); + RD::get_singleton()->free(cluster_render_buffer); + RD::get_singleton()->free(element_buffer); + cluster_buffer = RID(); + cluster_render_buffer = RID(); + element_buffer = RID(); + + memfree(render_elements); + + render_elements = nullptr; + render_element_max = 0; + render_element_count = 0; + + RD::get_singleton()->free(framebuffer); + framebuffer = RID(); + + cluster_render_uniform_set = RID(); + cluster_store_uniform_set = RID(); +} + +void ClusterBuilderRD::setup(Size2i p_screen_size, uint32_t p_max_elements, RID p_depth_buffer, RID p_depth_buffer_sampler, RID p_color_buffer) { + ERR_FAIL_COND(p_max_elements == 0); + ERR_FAIL_COND(p_screen_size.x < 1); + ERR_FAIL_COND(p_screen_size.y < 1); + + _clear(); + + screen_size = p_screen_size; + + cluster_screen_size.width = (p_screen_size.width - 1) / cluster_size + 1; + cluster_screen_size.height = (p_screen_size.height - 1) / cluster_size + 1; + + max_elements_by_type = p_max_elements; + if (max_elements_by_type % 32) { //need to be 32 aligned + max_elements_by_type += 32 - (max_elements_by_type % 32); + } + + cluster_buffer_size = cluster_screen_size.x * cluster_screen_size.y * (max_elements_by_type / 32 + 32) * ELEMENT_TYPE_MAX * 4; + + render_element_max = max_elements_by_type * ELEMENT_TYPE_MAX; + + uint32_t element_tag_bits_size = render_element_max / 32; + uint32_t element_tag_depth_bits_size = render_element_max; + cluster_render_buffer_size = cluster_screen_size.x * cluster_screen_size.y * (element_tag_bits_size + element_tag_depth_bits_size) * 4; // tag bits (element was used) and tag depth (depth range in which it was used) + + cluster_render_buffer = RD::get_singleton()->storage_buffer_create(cluster_render_buffer_size); + cluster_buffer = RD::get_singleton()->storage_buffer_create(cluster_buffer_size); + + render_elements = (RenderElementData *)memalloc(sizeof(RenderElementData *) * render_element_max); + render_element_count = 0; + + element_buffer = RD::get_singleton()->storage_buffer_create(sizeof(RenderElementData) * render_element_max); + + uint32_t div_value = 1 << divisor; + if (use_msaa) { + framebuffer = RD::get_singleton()->framebuffer_create_empty(p_screen_size / div_value, RD::TEXTURE_SAMPLES_4); + } else { + framebuffer = RD::get_singleton()->framebuffer_create_empty(p_screen_size / div_value); + } + + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 1; + u.ids.push_back(state_uniform); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + u.ids.push_back(element_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 3; + u.ids.push_back(cluster_render_buffer); + uniforms.push_back(u); + } + + cluster_render_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shared->cluster_render.shader, 0); + } + + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 1; + u.ids.push_back(cluster_render_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + u.ids.push_back(cluster_buffer); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 3; + u.ids.push_back(element_buffer); + uniforms.push_back(u); + } + + cluster_store_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shared->cluster_store.shader, 0); + } + + if (p_color_buffer.is_valid()) { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 1; + u.ids.push_back(cluster_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.ids.push_back(p_color_buffer); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 3; + u.ids.push_back(p_depth_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 4; + u.ids.push_back(p_depth_buffer_sampler); + uniforms.push_back(u); + } + + debug_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shared->cluster_debug.shader, 0); + } else { + debug_uniform_set = RID(); + } +} + +void ClusterBuilderRD::begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection, bool p_flip_y) { + view_xform = p_view_transform.affine_inverse(); + projection = p_cam_projection; + z_near = projection.get_z_near(); + z_far = projection.get_z_far(); + orthogonal = p_cam_projection.is_orthogonal(); + adjusted_projection = projection; + if (!orthogonal) { + adjusted_projection.adjust_perspective_znear(0.0001); + } + + CameraMatrix correction; + correction.set_depth_correction(p_flip_y); + projection = correction * projection; + adjusted_projection = correction * adjusted_projection; + + //reset counts + render_element_count = 0; + for (uint32_t i = 0; i < ELEMENT_TYPE_MAX; i++) { + cluster_count_by_type[i] = 0; + } +} + +void ClusterBuilderRD::bake_cluster() { + RENDER_TIMESTAMP(">Bake Cluster"); + + RD::get_singleton()->draw_command_begin_label("Bake Light Cluster"); + + //clear cluster buffer + RD::get_singleton()->buffer_clear(cluster_buffer, 0, cluster_buffer_size, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + + if (render_element_count > 0) { + //clear render buffer + RD::get_singleton()->buffer_clear(cluster_render_buffer, 0, cluster_render_buffer_size, RD::BARRIER_MASK_RASTER); + + { //fill state uniform + + StateUniform state; + + RendererStorageRD::store_camera(adjusted_projection, state.projection); + state.inv_z_far = 1.0 / z_far; + state.screen_to_clusters_shift = get_shift_from_power_of_2(cluster_size); + state.screen_to_clusters_shift -= divisor; //screen is smaller, shift one less + + state.cluster_screen_width = cluster_screen_size.x; + state.cluster_depth_offset = (render_element_max / 32); + state.cluster_data_size = state.cluster_depth_offset + render_element_max; + + RD::get_singleton()->buffer_update(state_uniform, 0, sizeof(StateUniform), &state, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + } + + //update instances + + RD::get_singleton()->buffer_update(element_buffer, 0, sizeof(RenderElementData) * render_element_count, render_elements, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + + RENDER_TIMESTAMP("Render Elements"); + + //render elements + { + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD); + ClusterBuilderSharedDataRD::ClusterRender::PushConstant push_constant = {}; + + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, shared->cluster_render.shader_pipelines[use_msaa ? ClusterBuilderSharedDataRD::ClusterRender::PIPELINE_MSAA : ClusterBuilderSharedDataRD::ClusterRender::PIPELINE_NORMAL]); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, cluster_render_uniform_set, 0); + + for (uint32_t i = 0; i < render_element_count;) { + push_constant.base_index = i; + switch (render_elements[i].type) { + case ELEMENT_TYPE_OMNI_LIGHT: { + RD::get_singleton()->draw_list_bind_vertex_array(draw_list, shared->sphere_vertex_array); + RD::get_singleton()->draw_list_bind_index_array(draw_list, shared->sphere_index_array); + } break; + case ELEMENT_TYPE_SPOT_LIGHT: { + RD::get_singleton()->draw_list_bind_vertex_array(draw_list, shared->cone_vertex_array); + RD::get_singleton()->draw_list_bind_index_array(draw_list, shared->cone_index_array); + } break; + case ELEMENT_TYPE_DECAL: + case ELEMENT_TYPE_REFLECTION_PROBE: { + RD::get_singleton()->draw_list_bind_vertex_array(draw_list, shared->box_vertex_array); + RD::get_singleton()->draw_list_bind_index_array(draw_list, shared->box_index_array); + } break; + } + + RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterRender::PushConstant)); + + uint32_t instances = 1; +#if 0 + for (uint32_t j = i+1; j < element_count; j++) { + if (elements[i].type!=elements[j].type) { + break; + } + instances++; + } +#endif + RD::get_singleton()->draw_list_draw(draw_list, true, instances); + i += instances; + } + RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_COMPUTE); + } + //store elements + RENDER_TIMESTAMP("Pack Elements"); + + { + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shared->cluster_store.shader_pipeline); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cluster_store_uniform_set, 0); + + ClusterBuilderSharedDataRD::ClusterStore::PushConstant push_constant; + push_constant.cluster_render_data_size = render_element_max / 32 + render_element_max; + push_constant.max_render_element_count_div_32 = render_element_max / 32; + push_constant.cluster_screen_size[0] = cluster_screen_size.x; + push_constant.cluster_screen_size[1] = cluster_screen_size.y; + push_constant.render_element_count_div_32 = render_element_count > 0 ? (render_element_count - 1) / 32 + 1 : 0; + push_constant.max_cluster_element_count_div_32 = max_elements_by_type / 32; + push_constant.pad1 = 0; + push_constant.pad2 = 0; + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterStore::PushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cluster_screen_size.x, cluster_screen_size.y, 1); + + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + } + } else { + RD::get_singleton()->barrier(RD::BARRIER_MASK_TRANSFER, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + } + RENDER_TIMESTAMP("<Bake Cluster"); + RD::get_singleton()->draw_command_end_label(); +} + +void ClusterBuilderRD::debug(ElementType p_element) { + ERR_FAIL_COND(debug_uniform_set.is_null()); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shared->cluster_debug.shader_pipeline); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, debug_uniform_set, 0); + + ClusterBuilderSharedDataRD::ClusterDebug::PushConstant push_constant; + push_constant.screen_size[0] = screen_size.x; + push_constant.screen_size[1] = screen_size.y; + push_constant.cluster_screen_size[0] = cluster_screen_size.x; + push_constant.cluster_screen_size[1] = cluster_screen_size.y; + push_constant.cluster_shift = get_shift_from_power_of_2(cluster_size); + push_constant.cluster_type = p_element; + push_constant.orthogonal = orthogonal; + push_constant.z_far = z_far; + push_constant.z_near = z_near; + push_constant.max_cluster_element_count_div_32 = max_elements_by_type / 32; + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterDebug::PushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, screen_size.x, screen_size.y, 1); + + RD::get_singleton()->compute_list_end(); +} + +RID ClusterBuilderRD::get_cluster_buffer() const { + return cluster_buffer; +} + +uint32_t ClusterBuilderRD::get_cluster_size() const { + return cluster_size; +} + +uint32_t ClusterBuilderRD::get_max_cluster_elements() const { + return max_elements_by_type; +} + +void ClusterBuilderRD::set_shared(ClusterBuilderSharedDataRD *p_shared) { + shared = p_shared; +} + +ClusterBuilderRD::ClusterBuilderRD() { + state_uniform = RD::get_singleton()->uniform_buffer_create(sizeof(StateUniform)); +} + +ClusterBuilderRD::~ClusterBuilderRD() { + _clear(); + RD::get_singleton()->free(state_uniform); +} diff --git a/servers/rendering/renderer_rd/cluster_builder_rd.h b/servers/rendering/renderer_rd/cluster_builder_rd.h new file mode 100644 index 0000000000..dc1707b534 --- /dev/null +++ b/servers/rendering/renderer_rd/cluster_builder_rd.h @@ -0,0 +1,378 @@ +/*************************************************************************/ +/* cluster_builder_rd.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef CLUSTER_BUILDER_RD_H +#define CLUSTER_BUILDER_RD_H + +#include "servers/rendering/renderer_rd/renderer_storage_rd.h" +#include "servers/rendering/renderer_rd/shaders/cluster_debug.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/cluster_render.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/cluster_store.glsl.gen.h" + +class ClusterBuilderSharedDataRD { + friend class ClusterBuilderRD; + + RID sphere_vertex_buffer; + RID sphere_vertex_array; + RID sphere_index_buffer; + RID sphere_index_array; + float sphere_overfit = 0.0; //because an icosphere is not a perfect sphere, we need to enlarge it to cover the sphere area + + RID cone_vertex_buffer; + RID cone_vertex_array; + RID cone_index_buffer; + RID cone_index_array; + float cone_overfit = 0.0; //because an cone mesh is not a perfect sphere, we need to enlarge it to cover the actual cone area + + RID box_vertex_buffer; + RID box_vertex_array; + RID box_index_buffer; + RID box_index_array; + + enum Divisor { + DIVISOR_1, + DIVISOR_2, + DIVISOR_4, + }; + + struct ClusterRender { + struct PushConstant { + uint32_t base_index; + uint32_t pad0; + uint32_t pad1; + uint32_t pad2; + }; + + ClusterRenderShaderRD cluster_render_shader; + RID shader_version; + RID shader; + enum PipelineVersion { + PIPELINE_NORMAL, + PIPELINE_MSAA, + PIPELINE_MAX + }; + + RID shader_pipelines[PIPELINE_MAX]; + } cluster_render; + + struct ClusterStore { + struct PushConstant { + uint32_t cluster_render_data_size; // how much data for a single cluster takes + uint32_t max_render_element_count_div_32; //divided by 32 + uint32_t cluster_screen_size[2]; + uint32_t render_element_count_div_32; //divided by 32 + uint32_t max_cluster_element_count_div_32; //divided by 32 + uint32_t pad1; + uint32_t pad2; + }; + + ClusterStoreShaderRD cluster_store_shader; + RID shader_version; + RID shader; + RID shader_pipeline; + } cluster_store; + + struct ClusterDebug { + struct PushConstant { + uint32_t screen_size[2]; + uint32_t cluster_screen_size[2]; + + uint32_t cluster_shift; + uint32_t cluster_type; + float z_near; + float z_far; + + uint32_t orthogonal; + uint32_t max_cluster_element_count_div_32; + uint32_t pad1; + uint32_t pad2; + }; + + ClusterDebugShaderRD cluster_debug_shader; + RID shader_version; + RID shader; + RID shader_pipeline; + } cluster_debug; + +public: + ClusterBuilderSharedDataRD(); + ~ClusterBuilderSharedDataRD(); +}; + +class ClusterBuilderRD { +public: + enum LightType { + LIGHT_TYPE_OMNI, + LIGHT_TYPE_SPOT + }; + + enum BoxType { + BOX_TYPE_REFLECTION_PROBE, + BOX_TYPE_DECAL, + }; + + enum ElementType { + ELEMENT_TYPE_OMNI_LIGHT, + ELEMENT_TYPE_SPOT_LIGHT, + ELEMENT_TYPE_DECAL, + ELEMENT_TYPE_REFLECTION_PROBE, + ELEMENT_TYPE_MAX, + + }; + +private: + ClusterBuilderSharedDataRD *shared = nullptr; + + struct RenderElementData { + uint32_t type; //0-4 + uint32_t touches_near; + uint32_t touches_far; + uint32_t original_index; + float transform_inv[12]; //transposed transform for less space + float scale[3]; + uint32_t pad; + }; + + uint32_t cluster_count_by_type[ELEMENT_TYPE_MAX] = {}; + uint32_t max_elements_by_type = 0; + + RenderElementData *render_elements = nullptr; + uint32_t render_element_count = 0; + uint32_t render_element_max = 0; + + Transform view_xform; + CameraMatrix adjusted_projection; + CameraMatrix projection; + float z_far = 0; + float z_near = 0; + bool orthogonal = false; + + enum Divisor { + DIVISOR_1, + DIVISOR_2, + DIVISOR_4, + }; + + uint32_t cluster_size = 32; + bool use_msaa = true; + Divisor divisor = DIVISOR_4; + + Size2i screen_size; + Size2i cluster_screen_size; + + RID framebuffer; + RID cluster_render_buffer; //used for creating + RID cluster_buffer; //used for rendering + RID element_buffer; //used for storing, to hint element touches far plane or near plane + uint32_t cluster_render_buffer_size = 0; + uint32_t cluster_buffer_size = 0; + + RID cluster_render_uniform_set; + RID cluster_store_uniform_set; + + //persistent data + + void _clear(); + + struct StateUniform { + float projection[16]; + float inv_z_far; + uint32_t screen_to_clusters_shift; // shift to obtain coordinates in block indices + uint32_t cluster_screen_width; // + uint32_t cluster_data_size; // how much data for a single cluster takes + uint32_t cluster_depth_offset; + uint32_t pad0; + uint32_t pad1; + uint32_t pad2; + }; + + RID state_uniform; + + RID debug_uniform_set; + +public: + void setup(Size2i p_screen_size, uint32_t p_max_elements, RID p_depth_buffer, RID p_depth_buffer_sampler, RID p_color_buffer); + + void begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection, bool p_flip_y); + + _FORCE_INLINE_ void add_light(LightType p_type, const Transform &p_transform, float p_radius, float p_spot_aperture) { + if (p_type == LIGHT_TYPE_OMNI && cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT] == max_elements_by_type) { + return; //max number elements reached + } + if (p_type == LIGHT_TYPE_SPOT && cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT] == max_elements_by_type) { + return; //max number elements reached + } + + RenderElementData &e = render_elements[render_element_count]; + + Transform xform = view_xform * p_transform; + + float radius = xform.basis.get_uniform_scale(); + if (radius > 0.98 || radius < 1.02) { + xform.basis.orthonormalize(); + } + + radius *= p_radius; + + if (p_type == LIGHT_TYPE_OMNI) { + radius *= shared->sphere_overfit; // overfit icosphere + + //omni + float depth = -xform.origin.z; + if (orthogonal) { + e.touches_near = (depth - radius) < z_near; + } else { + //contains camera inside light + float radius2 = radius * shared->sphere_overfit; // overfit again for outer size (camera may be outside actual sphere but behind an icosphere vertex) + e.touches_near = xform.origin.length_squared() < radius2 * radius2; + } + + e.touches_far = (depth + radius) > z_far; + e.scale[0] = radius; + e.scale[1] = radius; + e.scale[2] = radius; + e.type = ELEMENT_TYPE_OMNI_LIGHT; + e.original_index = cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT]; + + RendererStorageRD::store_transform_transposed_3x4(xform, e.transform_inv); + + cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT]++; + + } else { + //spot + radius *= shared->cone_overfit; // overfit icosphere + + real_t len = Math::tan(Math::deg2rad(p_spot_aperture)) * radius; + //approximate, probably better to use a cone support function + float max_d = -1e20; + float min_d = 1e20; +#define CONE_MINMAX(m_x, m_y) \ + { \ + float d = -xform.xform(Vector3(len * m_x, len * m_y, -radius)).z; \ + min_d = MIN(d, min_d); \ + max_d = MAX(d, max_d); \ + } + + CONE_MINMAX(1, 1); + CONE_MINMAX(-1, 1); + CONE_MINMAX(-1, -1); + CONE_MINMAX(1, -1); + + if (orthogonal) { + e.touches_near = min_d < z_near; + } else { + //contains camera inside light + Plane base_plane(xform.origin, -xform.basis.get_axis(Vector3::AXIS_Z)); + float dist = base_plane.distance_to(Vector3()); + if (dist >= 0 && dist < radius) { + //inside, check angle + float angle = Math::rad2deg(Math::acos((-xform.origin.normalized()).dot(-xform.basis.get_axis(Vector3::AXIS_Z)))); + e.touches_near = angle < p_spot_aperture * 1.05; //overfit aperture a little due to cone overfit + } else { + e.touches_near = false; + } + } + + e.touches_far = max_d > z_far; + + e.scale[0] = len * shared->cone_overfit; + e.scale[1] = len * shared->cone_overfit; + e.scale[2] = radius; + + e.type = ELEMENT_TYPE_SPOT_LIGHT; + e.original_index = cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]; //use omni since they share index + + RendererStorageRD::store_transform_transposed_3x4(xform, e.transform_inv); + + cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]++; + } + + render_element_count++; + } + + _FORCE_INLINE_ void add_box(BoxType p_box_type, const Transform &p_transform, const Vector3 &p_half_extents) { + if (p_box_type == BOX_TYPE_DECAL && cluster_count_by_type[ELEMENT_TYPE_DECAL] == max_elements_by_type) { + return; //max number elements reached + } + if (p_box_type == BOX_TYPE_REFLECTION_PROBE && cluster_count_by_type[ELEMENT_TYPE_REFLECTION_PROBE] == max_elements_by_type) { + return; //max number elements reached + } + + RenderElementData &e = render_elements[render_element_count]; + Transform xform = view_xform * p_transform; + + //extract scale and scale the matrix by it, makes things simpler + Vector3 scale = p_half_extents; + for (uint32_t i = 0; i < 3; i++) { + float s = xform.basis.elements[i].length(); + scale[i] *= s; + xform.basis.elements[i] /= s; + }; + + float box_depth = Math::abs(xform.basis.xform_inv(Vector3(0, 0, -1)).dot(scale)); + float depth = -xform.origin.z; + + if (orthogonal) { + e.touches_near = depth - box_depth < z_near; + } else { + //contains camera inside box + Vector3 inside = xform.xform_inv(Vector3(0, 0, 0)).abs(); + e.touches_near = inside.x < scale.x && inside.y < scale.y && inside.z < scale.z; + } + + e.touches_far = depth + box_depth > z_far; + + e.scale[0] = scale.x; + e.scale[1] = scale.y; + e.scale[2] = scale.z; + + e.type = (p_box_type == BOX_TYPE_DECAL) ? ELEMENT_TYPE_DECAL : ELEMENT_TYPE_REFLECTION_PROBE; + e.original_index = cluster_count_by_type[e.type]; + + RendererStorageRD::store_transform_transposed_3x4(xform, e.transform_inv); + + cluster_count_by_type[e.type]++; + render_element_count++; + } + + void bake_cluster(); + void debug(ElementType p_element); + + RID get_cluster_buffer() const; + uint32_t get_cluster_size() const; + uint32_t get_max_cluster_elements() const; + + void set_shared(ClusterBuilderSharedDataRD *p_shared); + + ClusterBuilderRD(); + ~ClusterBuilderRD(); +}; + +#endif // CLUSTER_BUILDER_H diff --git a/servers/rendering/renderer_rd/effects_rd.cpp b/servers/rendering/renderer_rd/effects_rd.cpp index a73eb3782c..a9cadb40df 100644 --- a/servers/rendering/renderer_rd/effects_rd.cpp +++ b/servers/rendering/renderer_rd/effects_rd.cpp @@ -299,15 +299,12 @@ void EffectsRD::copy_to_rect(RID p_source_rd_texture, RID p_dest_texture, const copy.push_constant.target[0] = p_rect.position.x; copy.push_constant.target[1] = p_rect.position.y; - int32_t x_groups = (p_rect.size.width - 1) / 8 + 1; - int32_t y_groups = (p_rect.size.height - 1) / 8 + 1; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8_bit_dst ? COPY_MODE_SIMPLY_COPY_8BIT : COPY_MODE_SIMPLY_COPY]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_rect.size.width, p_rect.size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -322,15 +319,12 @@ void EffectsRD::copy_cubemap_to_panorama(RID p_source_cube, RID p_dest_panorama, copy.push_constant.target[1] = 0; copy.push_constant.camera_z_far = p_lod; - int32_t x_groups = (p_panorama_size.width - 1) / 8 + 1; - int32_t y_groups = (p_panorama_size.height - 1) / 8 + 1; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_is_array ? COPY_MODE_CUBE_ARRAY_TO_PANORAMA : COPY_MODE_CUBE_TO_PANORAMA]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_cube), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_panorama), 3); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_panorama_size.width, p_panorama_size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -349,15 +343,12 @@ void EffectsRD::copy_depth_to_rect_and_linearize(RID p_source_rd_texture, RID p_ copy.push_constant.camera_z_far = p_z_far; copy.push_constant.camera_z_near = p_z_near; - int32_t x_groups = (p_rect.size.width - 1) / 8 + 1; - int32_t y_groups = (p_rect.size.height - 1) / 8 + 1; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[COPY_MODE_LINEARIZE_DEPTH]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_rect.size.width, p_rect.size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -374,15 +365,12 @@ void EffectsRD::copy_depth_to_rect(RID p_source_rd_texture, RID p_dest_texture, copy.push_constant.target[0] = p_rect.position.x; copy.push_constant.target[1] = p_rect.position.y; - int32_t x_groups = (p_rect.size.width - 1) / 8 + 1; - int32_t y_groups = (p_rect.size.height - 1) / 8 + 1; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[COPY_MODE_SIMPLY_COPY_DEPTH]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_rect.size.width, p_rect.size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -400,14 +388,11 @@ void EffectsRD::set_color(RID p_dest_texture, const Color &p_color, const Rect2i copy.push_constant.set_color[2] = p_color.b; copy.push_constant.set_color[3] = p_color.a; - int32_t x_groups = (p_region.size.width - 1) / 8 + 1; - int32_t y_groups = (p_region.size.height - 1) / 8 + 1; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8bit_dst ? COPY_MODE_SET_COLOR_8BIT : COPY_MODE_SET_COLOR]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -420,8 +405,6 @@ void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back copy.push_constant.section[2] = p_region.size.width; copy.push_constant.section[3] = p_region.size.height; - int32_t x_groups = (p_region.size.width - 1) / 8 + 1; - int32_t y_groups = (p_region.size.height - 1) / 8 + 1; //HORIZONTAL RD::DrawListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8bit_dst ? COPY_MODE_GAUSSIAN_COPY_8BIT : COPY_MODE_GAUSSIAN_COPY]); @@ -431,7 +414,7 @@ void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back copy.push_constant.flags = base_flags | COPY_FLAG_HORIZONTAL; RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -442,7 +425,7 @@ void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back copy.push_constant.flags = base_flags; RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -452,9 +435,6 @@ void EffectsRD::gaussian_glow(RID p_source_rd_texture, RID p_back_texture, const CopyMode copy_mode = p_first_pass && p_auto_exposure.is_valid() ? COPY_MODE_GAUSSIAN_GLOW_AUTO_EXPOSURE : COPY_MODE_GAUSSIAN_GLOW; uint32_t base_flags = 0; - int32_t x_groups = (p_size.width + 7) / 8; - int32_t y_groups = (p_size.height + 7) / 8; - copy.push_constant.section[2] = p_size.x; copy.push_constant.section[3] = p_size.y; @@ -479,16 +459,13 @@ void EffectsRD::gaussian_glow(RID p_source_rd_texture, RID p_back_texture, const copy.push_constant.flags = base_flags | (p_first_pass ? COPY_FLAG_GLOW_FIRST_PASS : 0) | (p_high_quality ? COPY_FLAG_HIGH_QUALITY_GLOW : 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_size.width, p_size.height, 1); RD::get_singleton()->compute_list_end(); } void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, RenderingServer::EnvironmentSSRRoughnessQuality p_roughness_quality, RID p_blur_radius, RID p_blur_radius2, RID p_metallic, const Color &p_metallic_mask, RID p_depth, RID p_scale_depth, RID p_scale_normal, RID p_output, RID p_output_blur, const Size2i &p_screen_size, int p_max_steps, float p_fade_in, float p_fade_out, float p_tolerance, const CameraMatrix &p_camera) { RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - int32_t x_groups = (p_screen_size.width - 1) / 8 + 1; - int32_t y_groups = (p_screen_size.height - 1) / 8 + 1; - { //scale color and depth to half ssr_scale.push_constant.camera_z_far = p_camera.get_z_far(); ssr_scale.push_constant.camera_z_near = p_camera.get_z_near(); @@ -506,7 +483,7 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_scale.push_constant, sizeof(ScreenSpaceReflectionScalePushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); } @@ -547,7 +524,7 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R } RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_scale_normal), 2); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); } if (p_roughness_quality != RS::ENV_SSR_ROUGNESS_QUALITY_DISABLED) { @@ -585,7 +562,7 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_filter.push_constant, sizeof(ScreenSpaceReflectionFilterPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -600,7 +577,7 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_filter.push_constant, sizeof(ScreenSpaceReflectionFilterPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); } RD::get_singleton()->compute_list_end(); @@ -609,9 +586,6 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R void EffectsRD::sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_depth, const CameraMatrix &p_camera, const Size2i &p_screen_size, float p_scale, float p_depth_scale, RenderingServer::SubSurfaceScatteringQuality p_quality) { RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - int32_t x_groups = (p_screen_size.width - 1) / 8 + 1; - int32_t y_groups = (p_screen_size.height - 1) / 8 + 1; - Plane p = p_camera.xform4(Plane(1, 0, -1, 1)); p.normal /= p.d; float unit_size = p.normal.x; @@ -635,7 +609,7 @@ void EffectsRD::sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_dept RD::get_singleton()->compute_list_set_push_constant(compute_list, &sss.push_constant, sizeof(SubSurfaceScatteringPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -646,7 +620,7 @@ void EffectsRD::sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_dept sss.push_constant.vertical = true; RD::get_singleton()->compute_list_set_push_constant(compute_list, &sss.push_constant, sizeof(SubSurfaceScatteringPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -690,39 +664,33 @@ void EffectsRD::make_mipmap(RID p_source_rd_texture, RID p_dest_texture, const S copy.push_constant.section[2] = p_size.width; copy.push_constant.section[3] = p_size.height; - int32_t x_groups = (p_size.width - 1) / 8 + 1; - int32_t y_groups = (p_size.height - 1) / 8 + 1; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[COPY_MODE_MIPMAP]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_size.width, p_size.height, 1); RD::get_singleton()->compute_list_end(); } -void EffectsRD::copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dest_texture, const Rect2i &p_rect, float p_z_near, float p_z_far, float p_bias, bool p_dp_flip) { +void EffectsRD::copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuffer, const Rect2 &p_rect, float p_z_near, float p_z_far, bool p_dp_flip) { CopyToDPPushConstant push_constant; - push_constant.screen_size[0] = p_rect.size.x; - push_constant.screen_size[1] = p_rect.size.y; - push_constant.dest_offset[0] = p_rect.position.x; - push_constant.dest_offset[1] = p_rect.position.y; - push_constant.bias = p_bias; + push_constant.screen_rect[0] = p_rect.position.x; + push_constant.screen_rect[1] = p_rect.position.y; + push_constant.screen_rect[2] = p_rect.size.width; + push_constant.screen_rect[3] = p_rect.size.height; push_constant.z_far = p_z_far; push_constant.z_near = p_z_near; push_constant.z_flip = p_dp_flip; - int32_t x_groups = (p_rect.size.width - 1) / 8 + 1; - int32_t y_groups = (p_rect.size.height - 1) / 8 + 1; + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dst_framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ); + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, cube_to_dp.pipeline.get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dst_framebuffer))); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_source_rd_texture), 0); + RD::get_singleton()->draw_list_bind_index_array(draw_list, index_array); - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, cube_to_dp.pipeline); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 1); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(CopyToDPPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); - RD::get_singleton()->compute_list_end(); + RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(CopyToDPPushConstant)); + RD::get_singleton()->draw_list_draw(draw_list, true); + RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_TRANSFER); } void EffectsRD::tonemapper(RID p_source_color, RID p_dst_framebuffer, const TonemapSettings &p_settings) { @@ -807,10 +775,7 @@ void EffectsRD::luminance_reduction(RID p_source_texture, const Size2i p_source_ RD::get_singleton()->compute_list_set_push_constant(compute_list, &luminance_reduce.push_constant, sizeof(LuminanceReducePushConstant)); - int32_t x_groups = (luminance_reduce.push_constant.source_size[0] - 1) / 8 + 1; - int32_t y_groups = (luminance_reduce.push_constant.source_size[1] - 1) / 8 + 1; - - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, luminance_reduce.push_constant.source_size[0], luminance_reduce.push_constant.source_size[1], 1); luminance_reduce.push_constant.source_size[0] = MAX(luminance_reduce.push_constant.source_size[0] / 8, 1); luminance_reduce.push_constant.source_size[1] = MAX(luminance_reduce.push_constant.source_size[1] / 8, 1); @@ -851,14 +816,12 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_texture), 1); - int32_t x_groups = (p_base_texture_size.x - 1) / 8 + 1; - int32_t y_groups = (p_base_texture_size.y - 1) / 8 + 1; bokeh.push_constant.size[0] = p_base_texture_size.x; bokeh.push_constant.size[1] = p_base_texture_size.y; RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_base_texture_size.x, p_base_texture_size.y, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); if (p_bokeh_shape == RS::DOF_BOKEH_BOX || p_bokeh_shape == RS::DOF_BOKEH_HEXAGON) { @@ -875,8 +838,6 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_halfsize_texture1), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_base_texture), 1); - x_groups = ((p_base_texture_size.x >> 1) - 1) / 8 + 1; - y_groups = ((p_base_texture_size.y >> 1) - 1) / 8 + 1; bokeh.push_constant.size[0] = p_base_texture_size.x >> 1; bokeh.push_constant.size[1] = p_base_texture_size.y >> 1; bokeh.push_constant.half_size = true; @@ -890,7 +851,7 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, bokeh.push_constant.size[0], bokeh.push_constant.size[1], 1); RD::get_singleton()->compute_list_add_barrier(compute_list); //third pass @@ -906,7 +867,7 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, bokeh.push_constant.size[0], bokeh.push_constant.size[1], 1); RD::get_singleton()->compute_list_add_barrier(compute_list); if (p_quality == RS::DOF_BLUR_QUALITY_VERY_LOW || p_quality == RS::DOF_BLUR_QUALITY_LOW) { @@ -917,8 +878,6 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_halfsize_texture2), 1); - x_groups = (p_base_texture_size.x - 1) / 8 + 1; - y_groups = (p_base_texture_size.y - 1) / 8 + 1; bokeh.push_constant.size[0] = p_base_texture_size.x; bokeh.push_constant.size[1] = p_base_texture_size.y; bokeh.push_constant.half_size = false; @@ -926,7 +885,7 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_base_texture_size.x, p_base_texture_size.y, 1); } } else { //circle @@ -944,15 +903,13 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_halfsize_texture1), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_base_texture), 1); - x_groups = ((p_base_texture_size.x >> 1) - 1) / 8 + 1; - y_groups = ((p_base_texture_size.y >> 1) - 1) / 8 + 1; bokeh.push_constant.size[0] = p_base_texture_size.x >> 1; bokeh.push_constant.size[1] = p_base_texture_size.y >> 1; bokeh.push_constant.half_size = true; RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, bokeh.push_constant.size[0], bokeh.push_constant.size[1], 1); RD::get_singleton()->compute_list_add_barrier(compute_list); //circle is just one pass, then upscale @@ -964,8 +921,6 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_halfsize_texture1), 1); - x_groups = (p_base_texture_size.x - 1) / 8 + 1; - y_groups = (p_base_texture_size.y - 1) / 8 + 1; bokeh.push_constant.size[0] = p_base_texture_size.x; bokeh.push_constant.size[1] = p_base_texture_size.y; bokeh.push_constant.half_size = false; @@ -973,7 +928,7 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_base_texture_size.x, p_base_texture_size.y, 1); } RD::get_singleton()->compute_list_end(); @@ -992,47 +947,47 @@ void EffectsRD::gather_ssao(RD::ComputeListID p_compute_list, const Vector<RID> ssao.gather_push_constant.pass_coord_offset[0] = i % 2; ssao.gather_push_constant.pass_coord_offset[1] = i / 2; - ssao.gather_push_constant.pass_uv_offset[0] = ((i % 2) - 0.0) / p_settings.screen_size.x; - ssao.gather_push_constant.pass_uv_offset[1] = ((i / 2) - 0.0) / p_settings.screen_size.y; + ssao.gather_push_constant.pass_uv_offset[0] = ((i % 2) - 0.0) / p_settings.full_screen_size.x; + ssao.gather_push_constant.pass_uv_offset[1] = ((i / 2) - 0.0) / p_settings.full_screen_size.y; ssao.gather_push_constant.pass = i; RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_uniform_set_from_image(p_ao_slices[i]), 2); RD::get_singleton()->compute_list_set_push_constant(p_compute_list, &ssao.gather_push_constant, sizeof(SSAOGatherPushConstant)); - int x_groups = ((p_settings.screen_size.x >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; - int y_groups = ((p_settings.screen_size.y >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; + Size2i size = Size2i(p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1), p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)); - RD::get_singleton()->compute_list_dispatch(p_compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(p_compute_list, size.x, size.y, 1); } RD::get_singleton()->compute_list_add_barrier(p_compute_list); } -void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_depth_mipmaps_texture, const Vector<RID> &depth_mipmaps, RID p_ao, const Vector<RID> p_ao_slices, RID p_ao_pong, const Vector<RID> p_ao_pong_slices, RID p_upscale_buffer, RID p_importance_map, RID p_importance_map_pong, const CameraMatrix &p_projection, const SSAOSettings &p_settings, bool p_invalidate_uniform_sets) { +void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_depth_mipmaps_texture, const Vector<RID> &p_depth_mipmaps, RID p_ao, const Vector<RID> p_ao_slices, RID p_ao_pong, const Vector<RID> p_ao_pong_slices, RID p_upscale_buffer, RID p_importance_map, RID p_importance_map_pong, const CameraMatrix &p_projection, const SSAOSettings &p_settings, bool p_invalidate_uniform_sets) { RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - + RD::get_singleton()->draw_command_begin_label("SSAO"); /* FIRST PASS */ // Downsample and deinterleave the depth buffer. { + RD::get_singleton()->draw_command_begin_label("Downsample Depth"); if (p_invalidate_uniform_sets) { Vector<RD::Uniform> uniforms; { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; u.binding = 0; - u.ids.push_back(depth_mipmaps[1]); + u.ids.push_back(p_depth_mipmaps[1]); uniforms.push_back(u); } { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; u.binding = 1; - u.ids.push_back(depth_mipmaps[2]); + u.ids.push_back(p_depth_mipmaps[2]); uniforms.push_back(u); } { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; u.binding = 2; - u.ids.push_back(depth_mipmaps[3]); + u.ids.push_back(p_depth_mipmaps[3]); uniforms.push_back(u); } ssao.downsample_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.downsample_shader.version_get_shader(ssao.downsample_shader_version, 2), 2); @@ -1051,8 +1006,8 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep ssao.downsample_push_constant.z_near = p_projection.get_z_near(); ssao.downsample_push_constant.z_far = p_projection.get_z_far(); } - ssao.downsample_push_constant.pixel_size[0] = 1.0 / p_settings.screen_size.x; - ssao.downsample_push_constant.pixel_size[1] = 1.0 / p_settings.screen_size.y; + ssao.downsample_push_constant.pixel_size[0] = 1.0 / p_settings.full_screen_size.x; + ssao.downsample_push_constant.pixel_size[1] = 1.0 / p_settings.full_screen_size.y; ssao.downsample_push_constant.radius_sq = p_settings.radius * p_settings.radius; int downsample_pipeline = SSAO_DOWNSAMPLE; @@ -1068,24 +1023,25 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[downsample_pipeline]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_buffer), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(depth_mipmaps[0]), 1); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_depth_mipmaps[0]), 1); if (p_settings.quality > RS::ENV_SSAO_QUALITY_MEDIUM) { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, ssao.downsample_uniform_set, 2); } RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.downsample_push_constant, sizeof(SSAODownsamplePushConstant)); - int x_groups = (MAX(1, p_settings.screen_size.x >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; - int y_groups = (MAX(1, p_settings.screen_size.y >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; + Size2i size(MAX(1, p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1)), MAX(1, p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1))); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, size.x, size.y, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); + RD::get_singleton()->draw_command_end_label(); // Downsample SSAO } /* SECOND PASS */ // Sample SSAO { - ssao.gather_push_constant.screen_size[0] = p_settings.screen_size.x; - ssao.gather_push_constant.screen_size[1] = p_settings.screen_size.y; + RD::get_singleton()->draw_command_begin_label("Gather Samples"); + ssao.gather_push_constant.screen_size[0] = p_settings.full_screen_size.x; + ssao.gather_push_constant.screen_size[1] = p_settings.full_screen_size.y; ssao.gather_push_constant.half_screen_pixel_size[0] = 1.0 / p_settings.half_screen_size.x; ssao.gather_push_constant.half_screen_pixel_size[1] = 1.0 / p_settings.half_screen_size.y; @@ -1122,7 +1078,7 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep ssao.gather_push_constant.inv_radius_near_limit = 1.0f / radius_near_limit; ssao.gather_push_constant.neg_inv_radius = -1.0 / ssao.gather_push_constant.radius; - ssao.gather_push_constant.load_counter_avg_div = 9.0 / float((p_settings.quarter_size.x) * (p_settings.quarter_size.y) * 255); + ssao.gather_push_constant.load_counter_avg_div = 9.0 / float((p_settings.quarter_screen_size.x) * (p_settings.quarter_screen_size.y) * 255); ssao.gather_push_constant.adaptive_sample_limit = p_settings.adaptive_target; ssao.gather_push_constant.detail_intensity = p_settings.detail; @@ -1184,6 +1140,7 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep } if (p_settings.quality == RS::ENV_SSAO_QUALITY_ULTRA) { + RD::get_singleton()->draw_command_begin_label("Generate Importance Map"); ssao.importance_map_push_constant.half_screen_pixel_size[0] = 1.0 / p_settings.half_screen_size.x; ssao.importance_map_push_constant.half_screen_pixel_size[1] = 1.0 / p_settings.half_screen_size.y; ssao.importance_map_push_constant.intensity = p_settings.intensity; @@ -1192,21 +1149,19 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER_BASE]); gather_ssao(compute_list, p_ao_pong_slices, p_settings, true); //generate importance map - int x_groups = (p_settings.quarter_size.x - 1) / 8 + 1; - int y_groups = (p_settings.quarter_size.y - 1) / 8 + 1; RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GENERATE_IMPORTANCE_MAP]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao_pong), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map), 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); //process importance map A RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_PROCESS_IMPORTANCE_MAPA]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_importance_map), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map_pong), 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); //process Importance Map B RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_PROCESS_IMPORTANCE_MAPB]); @@ -1214,21 +1169,24 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map), 1); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, ssao.counter_uniform_set, 2); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER_ADAPTIVE]); + RD::get_singleton()->draw_command_end_label(); // Importance Map } else { RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER]); } gather_ssao(compute_list, p_ao_slices, p_settings, false); + RD::get_singleton()->draw_command_end_label(); // Gather SSAO } // /* THIRD PASS */ // // Blur // { + RD::get_singleton()->draw_command_begin_label("Edge Aware Blur"); ssao.blur_push_constant.edge_sharpness = 1.0 - p_settings.sharpness; ssao.blur_push_constant.half_screen_pixel_size[0] = 1.0 / p_settings.half_screen_size.x; ssao.blur_push_constant.half_screen_pixel_size[1] = 1.0 / p_settings.half_screen_size.y; @@ -1268,25 +1226,25 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep } RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.blur_push_constant, sizeof(SSAOBlurPushConstant)); - int x_groups = ((p_settings.screen_size.x >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; - int y_groups = ((p_settings.screen_size.y >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; - - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + Size2i size(p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1), p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, size.x, size.y, 1); } if (p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW) { RD::get_singleton()->compute_list_add_barrier(compute_list); } } + RD::get_singleton()->draw_command_end_label(); // Blur } /* FOURTH PASS */ // Interleave buffers // back to full size { + RD::get_singleton()->draw_command_begin_label("Interleave Buffers"); ssao.interleave_push_constant.inv_sharpness = 1.0 - p_settings.sharpness; - ssao.interleave_push_constant.pixel_size[0] = 1.0 / p_settings.screen_size.x; - ssao.interleave_push_constant.pixel_size[1] = 1.0 / p_settings.screen_size.y; + ssao.interleave_push_constant.pixel_size[0] = 1.0 / p_settings.full_screen_size.x; + ssao.interleave_push_constant.pixel_size[1] = 1.0 / p_settings.full_screen_size.y; ssao.interleave_push_constant.size_modifier = uint32_t(p_settings.half_size ? 4 : 2); int interleave_pipeline = SSAO_INTERLEAVE_HALF; @@ -1307,17 +1265,15 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.interleave_push_constant, sizeof(SSAOInterleavePushConstant)); - int x_groups = (p_settings.screen_size.x - 1) / 8 + 1; - int y_groups = (p_settings.screen_size.y - 1) / 8 + 1; - - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.full_screen_size.x, p_settings.full_screen_size.y, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); + RD::get_singleton()->draw_command_end_label(); // Interleave } - - RD::get_singleton()->compute_list_end(); + RD::get_singleton()->draw_command_end_label(); //SSAO + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_TRANSFER); //wait for upcoming transfer int zero[1] = { 0 }; - RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero, false); + RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero, 0); //no barrier } void EffectsRD::roughness_limit(RID p_source_normal, RID p_roughness, const Size2i &p_size, float p_curve) { @@ -1330,12 +1286,9 @@ void EffectsRD::roughness_limit(RID p_source_normal, RID p_roughness, const Size RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_normal), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_roughness), 1); - int x_groups = (p_size.x - 1) / 8 + 1; - int y_groups = (p_size.y - 1) / 8 + 1; - RD::get_singleton()->compute_list_set_push_constant(compute_list, &roughness_limiter.push_constant, sizeof(RoughnessLimiterPushConstant)); //not used but set anyway - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_size.x, p_size.y, 1); RD::get_singleton()->compute_list_end(); } @@ -1448,7 +1401,7 @@ void EffectsRD::render_sky(RD::DrawListID p_list, float p_time, RID p_fb, RID p_ RD::get_singleton()->draw_list_draw(draw_list, true); } -void EffectsRD::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples) { +void EffectsRD::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples, uint32_t p_barrier) { ResolvePushConstant push_constant; push_constant.screen_size[0] = p_screen_size.x; push_constant.screen_size[1] = p_screen_size.y; @@ -1465,54 +1418,9 @@ void EffectsRD::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RI RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ResolvePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.x, p_screen_size.y, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.x, p_screen_size.y, 1); - RD::get_singleton()->compute_list_end(); -} - -void EffectsRD::reduce_shadow(RID p_source_shadow, RID p_dest_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, int p_shrink_limit, RD::ComputeListID compute_list) { - uint32_t push_constant[8] = { (uint32_t)p_source_size.x, (uint32_t)p_source_size.y, (uint32_t)p_source_rect.position.x, (uint32_t)p_source_rect.position.y, (uint32_t)p_shrink_limit, 0, 0, 0 }; - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shadow_reduce.pipelines[SHADOW_REDUCE_REDUCE]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_source_shadow, p_dest_shadow), 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(uint32_t) * 8); - - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1, 8, 8, 1); -} -void EffectsRD::filter_shadow(RID p_shadow, RID p_backing_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, RenderingServer::EnvVolumetricFogShadowFilter p_filter, RD::ComputeListID compute_list, bool p_vertical, bool p_horizontal) { - uint32_t push_constant[8] = { (uint32_t)p_source_size.x, (uint32_t)p_source_size.y, (uint32_t)p_source_rect.position.x, (uint32_t)p_source_rect.position.y, 0, 0, 0, 0 }; - - switch (p_filter) { - case RS::ENV_VOLUMETRIC_FOG_SHADOW_FILTER_DISABLED: - case RS::ENV_VOLUMETRIC_FOG_SHADOW_FILTER_LOW: { - push_constant[5] = 0; - } break; - case RS::ENV_VOLUMETRIC_FOG_SHADOW_FILTER_MEDIUM: { - push_constant[5] = 9; - } break; - case RS::ENV_VOLUMETRIC_FOG_SHADOW_FILTER_HIGH: { - push_constant[5] = 18; - } break; - } - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shadow_reduce.pipelines[SHADOW_REDUCE_FILTER]); - if (p_vertical) { - push_constant[6] = 1; - push_constant[7] = 0; - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_shadow, p_backing_shadow), 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(uint32_t) * 8); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1, 8, 8, 1); - } - if (p_vertical && p_horizontal) { - RD::get_singleton()->compute_list_add_barrier(compute_list); - } - if (p_horizontal) { - push_constant[6] = 0; - push_constant[7] = 1; - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_backing_shadow, p_shadow), 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(uint32_t) * 8); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1, 8, 8, 1); - } + RD::get_singleton()->compute_list_end(p_barrier); } void EffectsRD::sort_buffer(RID p_uniform_set, int p_size) { @@ -1678,8 +1586,12 @@ EffectsRD::EffectsRD() { cube_to_dp.shader.initialize(copy_modes); cube_to_dp.shader_version = cube_to_dp.shader.version_create(); - - cube_to_dp.pipeline = RD::get_singleton()->compute_pipeline_create(cube_to_dp.shader.version_get_shader(cube_to_dp.shader_version, 0)); + RID shader = cube_to_dp.shader.version_get_shader(cube_to_dp.shader_version, 0); + RD::PipelineDepthStencilState dss; + dss.enable_depth_test = true; + dss.depth_compare_operator = RD::COMPARE_OP_ALWAYS; + dss.enable_depth_write = true; + cube_to_dp.pipeline.setup(shader, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), dss, RD::PipelineColorBlendState(), 0); } { @@ -1776,7 +1688,7 @@ EffectsRD::EffectsRD() { } } - RD::get_singleton()->buffer_update(ssao.gather_constants_buffer, 0, sizeof(SSAOGatherConstants), &gather_constants, false); + RD::get_singleton()->buffer_update(ssao.gather_constants_buffer, 0, sizeof(SSAOGatherConstants), &gather_constants); } { Vector<String> ssao_modes; @@ -1795,7 +1707,8 @@ EffectsRD::EffectsRD() { } ssao.importance_map_load_counter = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t)); int zero[1] = { 0 }; - RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero, false); + RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero); + RD::get_singleton()->set_resource_name(ssao.importance_map_load_counter, "Importance Map Load Counter"); Vector<RD::Uniform> uniforms; { @@ -1806,6 +1719,7 @@ EffectsRD::EffectsRD() { uniforms.push_back(u); } ssao.counter_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.importance_map_shader.version_get_shader(ssao.importance_map_shader_version, 2), 2); + RD::get_singleton()->set_resource_name(ssao.counter_uniform_set, "Load Counter Uniform Set"); } { Vector<String> ssao_modes; @@ -1834,7 +1748,7 @@ EffectsRD::EffectsRD() { ssao.interleave_shader_version = ssao.interleave_shader.version_create(); for (int i = SSAO_INTERLEAVE; i <= SSAO_INTERLEAVE_HALF; i++) { ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.interleave_shader.version_get_shader(ssao.interleave_shader_version, i - SSAO_INTERLEAVE)); - + RD::get_singleton()->set_resource_name(ssao.pipelines[pipeline], "Interleave Pipeline " + itos(i)); pipeline++; } } @@ -1883,10 +1797,10 @@ EffectsRD::EffectsRD() { if (filter.use_high_quality) { filter.coefficient_buffer = RD::get_singleton()->storage_buffer_create(sizeof(high_quality_coeffs)); - RD::get_singleton()->buffer_update(filter.coefficient_buffer, 0, sizeof(high_quality_coeffs), &high_quality_coeffs[0], false); + RD::get_singleton()->buffer_update(filter.coefficient_buffer, 0, sizeof(high_quality_coeffs), &high_quality_coeffs[0]); } else { filter.coefficient_buffer = RD::get_singleton()->storage_buffer_create(sizeof(low_quality_coeffs)); - RD::get_singleton()->buffer_update(filter.coefficient_buffer, 0, sizeof(low_quality_coeffs), &low_quality_coeffs[0], false); + RD::get_singleton()->buffer_update(filter.coefficient_buffer, 0, sizeof(low_quality_coeffs), &low_quality_coeffs[0]); } Vector<RD::Uniform> uniforms; @@ -2005,20 +1919,6 @@ EffectsRD::EffectsRD() { } { - Vector<String> shadow_reduce_modes; - shadow_reduce_modes.push_back("\n#define MODE_REDUCE\n"); - shadow_reduce_modes.push_back("\n#define MODE_FILTER\n"); - - shadow_reduce.shader.initialize(shadow_reduce_modes); - - shadow_reduce.shader_version = shadow_reduce.shader.version_create(); - - for (int i = 0; i < SHADOW_REDUCE_MAX; i++) { - shadow_reduce.pipelines[i] = RD::get_singleton()->compute_pipeline_create(shadow_reduce.shader.version_get_shader(shadow_reduce.shader_version, i)); - } - } - - { Vector<String> sort_modes; sort_modes.push_back("\n#define MODE_SORT_BLOCK\n"); sort_modes.push_back("\n#define MODE_SORT_STEP\n"); @@ -2039,12 +1939,14 @@ EffectsRD::EffectsRD() { sampler.max_lod = 0; default_sampler = RD::get_singleton()->sampler_create(sampler); + RD::get_singleton()->set_resource_name(default_sampler, "Default Linear Sampler"); sampler.min_filter = RD::SAMPLER_FILTER_LINEAR; sampler.mip_filter = RD::SAMPLER_FILTER_LINEAR; sampler.max_lod = 1e20; default_mipmap_sampler = RD::get_singleton()->sampler_create(sampler); + RD::get_singleton()->set_resource_name(default_mipmap_sampler, "Default MipMap Sampler"); { //create index array for copy shaders Vector<uint8_t> pv; @@ -2104,5 +2006,4 @@ EffectsRD::~EffectsRD() { ssr_scale.shader.version_free(ssr_scale.shader_version); sss.shader.version_free(sss.shader_version); tonemap.shader.version_free(tonemap.shader_version); - shadow_reduce.shader.version_free(shadow_reduce.shader_version); } diff --git a/servers/rendering/renderer_rd/effects_rd.h b/servers/rendering/renderer_rd/effects_rd.h index ad4a660944..1ba25e301b 100644 --- a/servers/rendering/renderer_rd/effects_rd.h +++ b/servers/rendering/renderer_rd/effects_rd.h @@ -46,7 +46,6 @@ #include "servers/rendering/renderer_rd/shaders/screen_space_reflection.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/screen_space_reflection_filter.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/screen_space_reflection_scale.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/shadow_reduce.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/sort.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/specular_merge.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/ssao.glsl.gen.h" @@ -234,18 +233,17 @@ class EffectsRD { } luminance_reduce; struct CopyToDPPushConstant { - int32_t screen_size[2]; - int32_t dest_offset[2]; - float bias; float z_far; float z_near; uint32_t z_flip; + uint32_t pad; + float screen_rect[4]; }; struct CoptToDP { CubeToDpShaderRD shader; RID shader_version; - RID pipeline; + PipelineCacheRD pipeline; } cube_to_dp; struct BokehPushConstant { @@ -598,18 +596,6 @@ class EffectsRD { RID pipelines[RESOLVE_MODE_MAX]; //3 quality levels } resolve; - enum ShadowReduceMode { - SHADOW_REDUCE_REDUCE, - SHADOW_REDUCE_FILTER, - SHADOW_REDUCE_MAX - }; - - struct ShadowReduce { - ShadowReduceShaderRD shader; - RID shader_version; - RID pipelines[SHADOW_REDUCE_MAX]; - } shadow_reduce; - enum SortMode { SORT_MODE_BLOCK, SORT_MODE_STEP, @@ -687,7 +673,7 @@ public: void cubemap_roughness(RID p_source_rd_texture, RID p_dest_framebuffer, uint32_t p_face_id, uint32_t p_sample_count, float p_roughness, float p_size); void make_mipmap(RID p_source_rd_texture, RID p_dest_texture, const Size2i &p_size); - void copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dest_texture, const Rect2i &p_rect, float p_z_near, float p_z_far, float p_bias, bool p_dp_flip); + void copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dest_texture, const Rect2 &p_rect, float p_z_near, float p_z_far, bool p_dp_flip); void luminance_reduction(RID p_source_texture, const Size2i p_source_size, const Vector<RID> p_reduce, RID p_prev_luminance, float p_min_luminance, float p_max_luminance, float p_adjust, bool p_set = false); void bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i &p_base_texture_size, RID p_secondary_texture, RID p_bokeh_texture1, RID p_bokeh_texture2, bool p_dof_far, float p_dof_far_begin, float p_dof_far_size, bool p_dof_near, float p_dof_near_begin, float p_dof_near_size, float p_bokeh_size, RS::DOFBokehShape p_bokeh_shape, RS::DOFBlurQuality p_quality, bool p_use_jitter, float p_cam_znear, float p_cam_zfar, bool p_cam_orthogonal); @@ -745,9 +731,9 @@ public: float fadeout_from = 50.0; float fadeout_to = 300.0; - Size2i screen_size = Size2i(); + Size2i full_screen_size = Size2i(); Size2i half_screen_size = Size2i(); - Size2i quarter_size = Size2i(); + Size2i quarter_screen_size = Size2i(); }; void tonemapper(RID p_source_color, RID p_dst_framebuffer, const TonemapSettings &p_settings); @@ -764,10 +750,7 @@ public: void merge_specular(RID p_dest_framebuffer, RID p_specular, RID p_base, RID p_reflection); void sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_depth, const CameraMatrix &p_camera, const Size2i &p_screen_size, float p_scale, float p_depth_scale, RS::SubSurfaceScatteringQuality p_quality); - void resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples); - - void reduce_shadow(RID p_source_shadow, RID p_dest_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, int p_shrink_limit, RenderingDevice::ComputeListID compute_list); - void filter_shadow(RID p_shadow, RID p_backing_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, RS::EnvVolumetricFogShadowFilter p_filter, RenderingDevice::ComputeListID compute_list, bool p_vertical = true, bool p_horizontal = true); + void resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples, uint32_t p_barrier = RD::BARRIER_MASK_ALL); void sort_buffer(RID p_uniform_set, int p_size); diff --git a/servers/rendering/renderer_rd/light_cluster_builder.cpp b/servers/rendering/renderer_rd/light_cluster_builder.cpp deleted file mode 100644 index bb807ca4ca..0000000000 --- a/servers/rendering/renderer_rd/light_cluster_builder.cpp +++ /dev/null @@ -1,252 +0,0 @@ -/*************************************************************************/ -/* light_cluster_builder.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -#include "light_cluster_builder.h" - -void LightClusterBuilder::begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection) { - view_xform = p_view_transform; - projection = p_cam_projection; - z_near = -projection.get_z_near(); - z_far = -projection.get_z_far(); - - //reset counts - light_count = 0; - refprobe_count = 0; - decal_count = 0; - item_count = 0; - sort_id_count = 0; -} - -void LightClusterBuilder::bake_cluster() { - float slice_depth = (z_near - z_far) / depth; - - uint8_t *cluster_dataw = cluster_data.ptrw(); - Cell *cluster_data_ptr = (Cell *)cluster_dataw; - //clear the cluster - zeromem(cluster_data_ptr, (width * height * depth * sizeof(Cell))); - - /* Step 1, create cell positions and count them */ - - for (uint32_t i = 0; i < item_count; i++) { - const Item &item = items[i]; - - int from_slice = Math::floor((z_near - (item.aabb.position.z + item.aabb.size.z)) / slice_depth); - int to_slice = Math::floor((z_near - item.aabb.position.z) / slice_depth); - - if (from_slice >= (int)depth || to_slice < 0) { - continue; //sorry no go - } - - from_slice = MAX(0, from_slice); - to_slice = MIN((int)depth - 1, to_slice); - - for (int j = from_slice; j <= to_slice; j++) { - Vector3 min = item.aabb.position; - Vector3 max = item.aabb.position + item.aabb.size; - - float limit_near = MIN((z_near - slice_depth * j), max.z); - float limit_far = MAX((z_near - slice_depth * (j + 1)), min.z); - - max.z = limit_near; - min.z = limit_near; - - Vector3 proj_min = projection.xform(min); - Vector3 proj_max = projection.xform(max); - - int near_from_x = int(Math::floor((proj_min.x * 0.5 + 0.5) * width)); - int near_from_y = int(Math::floor((-proj_max.y * 0.5 + 0.5) * height)); - int near_to_x = int(Math::floor((proj_max.x * 0.5 + 0.5) * width)); - int near_to_y = int(Math::floor((-proj_min.y * 0.5 + 0.5) * height)); - - max.z = limit_far; - min.z = limit_far; - - proj_min = projection.xform(min); - proj_max = projection.xform(max); - - int far_from_x = int(Math::floor((proj_min.x * 0.5 + 0.5) * width)); - int far_from_y = int(Math::floor((-proj_max.y * 0.5 + 0.5) * height)); - int far_to_x = int(Math::floor((proj_max.x * 0.5 + 0.5) * width)); - int far_to_y = int(Math::floor((-proj_min.y * 0.5 + 0.5) * height)); - - //print_line(itos(j) + " near - " + Vector2i(near_from_x, near_from_y) + " -> " + Vector2i(near_to_x, near_to_y)); - //print_line(itos(j) + " far - " + Vector2i(far_from_x, far_from_y) + " -> " + Vector2i(far_to_x, far_to_y)); - - int from_x = MIN(near_from_x, far_from_x); - int from_y = MIN(near_from_y, far_from_y); - int to_x = MAX(near_to_x, far_to_x); - int to_y = MAX(near_to_y, far_to_y); - - if (from_x >= (int)width || to_x < 0 || from_y >= (int)height || to_y < 0) { - continue; - } - - int sx = MAX(0, from_x); - int sy = MAX(0, from_y); - int dx = MIN((int)width - 1, to_x); - int dy = MIN((int)height - 1, to_y); - - //print_line(itos(j) + " - " + Vector2i(sx, sy) + " -> " + Vector2i(dx, dy)); - - for (int x = sx; x <= dx; x++) { - for (int y = sy; y <= dy; y++) { - uint32_t offset = j * (width * height) + y * width + x; - - if (unlikely(sort_id_count == sort_id_max)) { - sort_id_max = nearest_power_of_2_templated(sort_id_max + 1); - sort_ids = (SortID *)memrealloc(sort_ids, sizeof(SortID) * sort_id_max); - if (ids.size()) { - ids.resize(sort_id_max); - RD::get_singleton()->free(items_buffer); - items_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * sort_id_max); - } - } - - sort_ids[sort_id_count].cell_index = offset; - sort_ids[sort_id_count].item_index = item.index; - sort_ids[sort_id_count].item_type = item.type; - - sort_id_count++; - - //for now, only count - cluster_data_ptr[offset].item_pointers[item.type]++; - //print_line("at offset " + itos(offset) + " value: " + itos(cluster_data_ptr[offset].item_pointers[item.type])); - } - } - } - } - - /* Step 2, Assign pointers (and reset counters) */ - - uint32_t offset = 0; - for (uint32_t i = 0; i < (width * height * depth); i++) { - for (int j = 0; j < ITEM_TYPE_MAX; j++) { - uint32_t count = cluster_data_ptr[i].item_pointers[j]; //save count - cluster_data_ptr[i].item_pointers[j] = offset; //replace count by pointer - offset += count; //increase offset by count; - } - } - - //print_line("offset: " + itos(offset)); - /* Step 3, Place item lists */ - - uint32_t *ids_ptr = ids.ptrw(); - - for (uint32_t i = 0; i < sort_id_count; i++) { - const SortID &id = sort_ids[i]; - Cell &cell = cluster_data_ptr[id.cell_index]; - uint32_t pointer = cell.item_pointers[id.item_type] & POINTER_MASK; - uint32_t counter = cell.item_pointers[id.item_type] >> COUNTER_SHIFT; - ids_ptr[pointer + counter] = id.item_index; - - cell.item_pointers[id.item_type] = pointer | ((counter + 1) << COUNTER_SHIFT); - } - - RD::get_singleton()->texture_update(cluster_texture, 0, cluster_data, true); - RD::get_singleton()->buffer_update(items_buffer, 0, offset * sizeof(uint32_t), ids_ptr, true); -} - -void LightClusterBuilder::setup(uint32_t p_width, uint32_t p_height, uint32_t p_depth) { - if (width == p_width && height == p_height && depth == p_depth) { - return; - } - if (cluster_texture.is_valid()) { - RD::get_singleton()->free(cluster_texture); - } - - width = p_width; - height = p_height; - depth = p_depth; - - cluster_data.resize(width * height * depth * sizeof(Cell)); - - { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R32G32B32A32_UINT; - tf.texture_type = RD::TEXTURE_TYPE_3D; - tf.width = width; - tf.height = height; - tf.depth = depth; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; - - cluster_texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); - } -} - -RID LightClusterBuilder::get_cluster_texture() const { - return cluster_texture; -} - -RID LightClusterBuilder::get_cluster_indices_buffer() const { - return items_buffer; -} - -LightClusterBuilder::LightClusterBuilder() { - //initialize accumulators to something - lights = (LightData *)memalloc(sizeof(LightData) * 1024); - light_max = 1024; - - refprobes = (OrientedBoxData *)memalloc(sizeof(OrientedBoxData) * 1024); - refprobe_max = 1024; - - decals = (OrientedBoxData *)memalloc(sizeof(OrientedBoxData) * 1024); - decal_max = 1024; - - items = (Item *)memalloc(sizeof(Item) * 1024); - item_max = 1024; - - sort_ids = (SortID *)memalloc(sizeof(SortID) * 1024); - ids.resize(2014); - items_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 1024); - item_max = 1024; -} - -LightClusterBuilder::~LightClusterBuilder() { - if (cluster_data.size()) { - RD::get_singleton()->free(cluster_texture); - } - - if (lights) { - memfree(lights); - } - if (refprobes) { - memfree(refprobes); - } - if (decals) { - memfree(decals); - } - if (items) { - memfree(items); - } - if (sort_ids) { - memfree(sort_ids); - RD::get_singleton()->free(items_buffer); - } -} diff --git a/servers/rendering/renderer_rd/light_cluster_builder.h b/servers/rendering/renderer_rd/light_cluster_builder.h deleted file mode 100644 index 8f77ece6f5..0000000000 --- a/servers/rendering/renderer_rd/light_cluster_builder.h +++ /dev/null @@ -1,290 +0,0 @@ -/*************************************************************************/ -/* light_cluster_builder.h */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -#ifndef LIGHT_CLUSTER_BUILDER_H -#define LIGHT_CLUSTER_BUILDER_H - -#include "servers/rendering/renderer_rd/renderer_storage_rd.h" - -class LightClusterBuilder { -public: - enum LightType { - LIGHT_TYPE_OMNI, - LIGHT_TYPE_SPOT - }; - - enum ItemType { - ITEM_TYPE_OMNI_LIGHT, - ITEM_TYPE_SPOT_LIGHT, - ITEM_TYPE_REFLECTION_PROBE, - ITEM_TYPE_DECAL, - ITEM_TYPE_MAX //should always be 4 - }; - - enum { - COUNTER_SHIFT = 20, //one million total ids - POINTER_MASK = (1 << COUNTER_SHIFT) - 1, - COUNTER_MASK = 0xfff // 4096 items per cell - }; - -private: - struct LightData { - float position[3]; - uint32_t type; - float radius; - float spot_aperture; - uint32_t pad[2]; - }; - - uint32_t light_count = 0; - uint32_t light_max = 0; - LightData *lights = nullptr; - - struct OrientedBoxData { - float position[3]; - uint32_t pad; - float x_axis[3]; - uint32_t pad2; - float y_axis[3]; - uint32_t pad3; - float z_axis[3]; - uint32_t pad4; - }; - - uint32_t refprobe_count = 0; - uint32_t refprobe_max = 0; - OrientedBoxData *refprobes = nullptr; - - uint32_t decal_count = 0; - uint32_t decal_max = 0; - OrientedBoxData *decals = nullptr; - - struct Item { - AABB aabb; - ItemType type; - uint32_t index; - }; - - Item *items = nullptr; - uint32_t item_count = 0; - uint32_t item_max = 0; - - uint32_t width = 0; - uint32_t height = 0; - uint32_t depth = 0; - - struct Cell { - uint32_t item_pointers[ITEM_TYPE_MAX]; - }; - - Vector<uint8_t> cluster_data; - RID cluster_texture; - - struct SortID { - uint32_t cell_index; - uint32_t item_index; - ItemType item_type; - }; - - SortID *sort_ids = nullptr; - Vector<uint32_t> ids; - uint32_t sort_id_count = 0; - uint32_t sort_id_max = 0; - RID items_buffer; - - Transform view_xform; - CameraMatrix projection; - float z_far = 0; - float z_near = 0; - - _FORCE_INLINE_ void _add_item(const AABB &p_aabb, ItemType p_type, uint32_t p_index) { - if (unlikely(item_count == item_max)) { - item_max = nearest_power_of_2_templated(item_max + 1); - items = (Item *)memrealloc(items, sizeof(Item) * item_max); - } - - Item &item = items[item_count]; - item.aabb = p_aabb; - item.index = p_index; - item.type = p_type; - item_count++; - } - -public: - void begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection); - - _FORCE_INLINE_ void add_light(LightType p_type, const Transform &p_transform, float p_radius, float p_spot_aperture) { - if (unlikely(light_count == light_max)) { - light_max = nearest_power_of_2_templated(light_max + 1); - lights = (LightData *)memrealloc(lights, sizeof(LightData) * light_max); - } - - LightData &ld = lights[light_count]; - ld.type = p_type; - ld.position[0] = p_transform.origin.x; - ld.position[1] = p_transform.origin.y; - ld.position[2] = p_transform.origin.z; - ld.radius = p_radius; - ld.spot_aperture = p_spot_aperture; - - Transform xform = view_xform * p_transform; - - ld.radius *= xform.basis.get_uniform_scale(); - - AABB aabb; - - switch (p_type) { - case LIGHT_TYPE_OMNI: { - aabb.position = xform.origin; - aabb.size = Vector3(ld.radius, ld.radius, ld.radius); - aabb.position -= aabb.size; - aabb.size *= 2.0; - - _add_item(aabb, ITEM_TYPE_OMNI_LIGHT, light_count); - } break; - case LIGHT_TYPE_SPOT: { - float r = ld.radius; - real_t len = Math::tan(Math::deg2rad(ld.spot_aperture)) * r; - - aabb.position = xform.origin; - aabb.expand_to(xform.xform(Vector3(len, len, -r))); - aabb.expand_to(xform.xform(Vector3(-len, len, -r))); - aabb.expand_to(xform.xform(Vector3(-len, -len, -r))); - aabb.expand_to(xform.xform(Vector3(len, -len, -r))); - _add_item(aabb, ITEM_TYPE_SPOT_LIGHT, light_count); - } break; - } - - light_count++; - } - - _FORCE_INLINE_ void add_reflection_probe(const Transform &p_transform, const Vector3 &p_half_extents) { - if (unlikely(refprobe_count == refprobe_max)) { - refprobe_max = nearest_power_of_2_templated(refprobe_max + 1); - refprobes = (OrientedBoxData *)memrealloc(refprobes, sizeof(OrientedBoxData) * refprobe_max); - } - - Transform xform = view_xform * p_transform; - - OrientedBoxData &rp = refprobes[refprobe_count]; - Vector3 origin = xform.origin; - rp.position[0] = origin.x; - rp.position[1] = origin.y; - rp.position[2] = origin.z; - - Vector3 x_axis = xform.basis.get_axis(0) * p_half_extents.x; - rp.x_axis[0] = x_axis.x; - rp.x_axis[1] = x_axis.y; - rp.x_axis[2] = x_axis.z; - - Vector3 y_axis = xform.basis.get_axis(1) * p_half_extents.y; - rp.y_axis[0] = y_axis.x; - rp.y_axis[1] = y_axis.y; - rp.y_axis[2] = y_axis.z; - - Vector3 z_axis = xform.basis.get_axis(2) * p_half_extents.z; - rp.z_axis[0] = z_axis.x; - rp.z_axis[1] = z_axis.y; - rp.z_axis[2] = z_axis.z; - - AABB aabb; - - aabb.position = origin + x_axis + y_axis + z_axis; - aabb.expand_to(origin + x_axis + y_axis - z_axis); - aabb.expand_to(origin + x_axis - y_axis + z_axis); - aabb.expand_to(origin + x_axis - y_axis - z_axis); - aabb.expand_to(origin - x_axis + y_axis + z_axis); - aabb.expand_to(origin - x_axis + y_axis - z_axis); - aabb.expand_to(origin - x_axis - y_axis + z_axis); - aabb.expand_to(origin - x_axis - y_axis - z_axis); - - _add_item(aabb, ITEM_TYPE_REFLECTION_PROBE, refprobe_count); - - refprobe_count++; - } - - _FORCE_INLINE_ void add_decal(const Transform &p_transform, const Vector3 &p_half_extents) { - if (unlikely(decal_count == decal_max)) { - decal_max = nearest_power_of_2_templated(decal_max + 1); - decals = (OrientedBoxData *)memrealloc(decals, sizeof(OrientedBoxData) * decal_max); - } - - Transform xform = view_xform * p_transform; - - OrientedBoxData &dc = decals[decal_count]; - - Vector3 origin = xform.origin; - dc.position[0] = origin.x; - dc.position[1] = origin.y; - dc.position[2] = origin.z; - - Vector3 x_axis = xform.basis.get_axis(0) * p_half_extents.x; - dc.x_axis[0] = x_axis.x; - dc.x_axis[1] = x_axis.y; - dc.x_axis[2] = x_axis.z; - - Vector3 y_axis = xform.basis.get_axis(1) * p_half_extents.y; - dc.y_axis[0] = y_axis.x; - dc.y_axis[1] = y_axis.y; - dc.y_axis[2] = y_axis.z; - - Vector3 z_axis = xform.basis.get_axis(2) * p_half_extents.z; - dc.z_axis[0] = z_axis.x; - dc.z_axis[1] = z_axis.y; - dc.z_axis[2] = z_axis.z; - - AABB aabb; - - aabb.position = origin + x_axis + y_axis + z_axis; - aabb.expand_to(origin + x_axis + y_axis - z_axis); - aabb.expand_to(origin + x_axis - y_axis + z_axis); - aabb.expand_to(origin + x_axis - y_axis - z_axis); - aabb.expand_to(origin - x_axis + y_axis + z_axis); - aabb.expand_to(origin - x_axis + y_axis - z_axis); - aabb.expand_to(origin - x_axis - y_axis + z_axis); - aabb.expand_to(origin - x_axis - y_axis - z_axis); - - _add_item(aabb, ITEM_TYPE_DECAL, decal_count); - - decal_count++; - } - - void bake_cluster(); - - void setup(uint32_t p_width, uint32_t p_height, uint32_t p_depth); - - RID get_cluster_texture() const; - RID get_cluster_indices_buffer() const; - - LightClusterBuilder(); - ~LightClusterBuilder(); -}; - -#endif // LIGHT_CLUSTER_BUILDER_H diff --git a/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp b/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp index 508d56cfab..2a1a4efe48 100644 --- a/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp @@ -1367,7 +1367,7 @@ void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p } if (light_count > 0) { - RD::get_singleton()->buffer_update(state.lights_uniform_buffer, 0, sizeof(LightUniform) * light_count, &state.light_uniforms[0], true); + RD::get_singleton()->buffer_update(state.lights_uniform_buffer, 0, sizeof(LightUniform) * light_count, &state.light_uniforms[0]); } { @@ -1421,7 +1421,7 @@ void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p //print_line("w: " + itos(ssize.width) + " s: " + rtos(canvas_scale)); state_buffer.tex_to_sdf = 1.0 / ((canvas_scale.x + canvas_scale.y) * 0.5); - RD::get_singleton()->buffer_update(state.canvas_state_buffer, 0, sizeof(State::Buffer), &state_buffer, true); + RD::get_singleton()->buffer_update(state.canvas_state_buffer, 0, sizeof(State::Buffer), &state_buffer); } { //default filter/repeat @@ -1622,7 +1622,7 @@ void RendererCanvasRenderRD::light_update_shadow(RID p_rid, int p_shadow_index, projection.set_frustum(xmin, xmax, ymin, ymax, nearp, farp); } - Vector3 cam_target = Basis(Vector3(0, 0, Math_PI * 2 * ((i + 3) / 4.0))).xform(Vector3(0, 1, 0)); + Vector3 cam_target = Basis(Vector3(0, 0, Math_TAU * ((i + 3) / 4.0))).xform(Vector3(0, 1, 0)); projection = projection * CameraMatrix(Transform().looking_at(cam_target, Vector3(0, 0, -1)).affine_inverse()); ShadowRenderPushConstant push_constant; @@ -2239,6 +2239,11 @@ Variant RendererCanvasRenderRD::ShaderData::get_default_parameter(const StringNa return Variant(); } +RS::ShaderNativeSourceCode RendererCanvasRenderRD::ShaderData::get_native_source_code() const { + RendererCanvasRenderRD *canvas_singleton = (RendererCanvasRenderRD *)RendererCanvasRender::singleton; + return canvas_singleton->shader.canvas_shader.version_get_native_source_code(version); +} + RendererCanvasRenderRD::ShaderData::ShaderData() { valid = false; uses_screen_texture = false; @@ -2489,7 +2494,7 @@ RendererCanvasRenderRD::RendererCanvasRenderRD(RendererStorageRD *p_storage) { actions.renames["COLOR"] = "color"; actions.renames["NORMAL"] = "normal"; actions.renames["NORMAL_MAP"] = "normal_map"; - actions.renames["NORMAL_MAP_DEPTH"] = "normal_depth"; + actions.renames["NORMAL_MAP_DEPTH"] = "normal_map_depth"; actions.renames["TEXTURE"] = "color_texture"; actions.renames["TEXTURE_PIXEL_SIZE"] = "draw_data.color_texture_pixel_size"; actions.renames["NORMAL_TEXTURE"] = "normal_texture"; @@ -2501,7 +2506,7 @@ RendererCanvasRenderRD::RendererCanvasRenderRD(RendererStorageRD *p_storage) { actions.renames["FRAGCOORD"] = "gl_FragCoord"; actions.renames["POINT_COORD"] = "gl_PointCoord"; - actions.renames["LIGHT_POSITION"] = "light_pos"; + actions.renames["LIGHT_POSITION"] = "light_position"; actions.renames["LIGHT_COLOR"] = "light_color"; actions.renames["LIGHT_ENERGY"] = "light_energy"; actions.renames["LIGHT"] = "light"; diff --git a/servers/rendering/renderer_rd/renderer_canvas_render_rd.h b/servers/rendering/renderer_rd/renderer_canvas_render_rd.h index 545eeaa106..cb947d7180 100644 --- a/servers/rendering/renderer_rd/renderer_canvas_render_rd.h +++ b/servers/rendering/renderer_rd/renderer_canvas_render_rd.h @@ -188,6 +188,8 @@ class RendererCanvasRenderRD : public RendererCanvasRender { virtual bool is_animated() const; virtual bool casts_shadows() const; virtual Variant get_default_parameter(const StringName &p_parameter) const; + virtual RS::ShaderNativeSourceCode get_native_source_code() const; + ShaderData(); virtual ~ShaderData(); }; diff --git a/servers/rendering/renderer_rd/renderer_compositor_rd.cpp b/servers/rendering/renderer_rd/renderer_compositor_rd.cpp index fb9c114ade..be2552bd32 100644 --- a/servers/rendering/renderer_rd/renderer_compositor_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_compositor_rd.cpp @@ -154,12 +154,9 @@ void RendererCompositorRD::initialize() { } } -ThreadWorkPool RendererCompositorRD::thread_work_pool; uint64_t RendererCompositorRD::frame = 1; void RendererCompositorRD::finalize() { - thread_work_pool.finish(); - memdelete(scene); memdelete(canvas); memdelete(storage); @@ -174,7 +171,6 @@ RendererCompositorRD *RendererCompositorRD::singleton = nullptr; RendererCompositorRD::RendererCompositorRD() { singleton = this; - thread_work_pool.init(); time = 0; storage = memnew(RendererStorageRD); diff --git a/servers/rendering/renderer_rd/renderer_compositor_rd.h b/servers/rendering/renderer_rd/renderer_compositor_rd.h index e1995872af..cb85fc79e0 100644 --- a/servers/rendering/renderer_rd/renderer_compositor_rd.h +++ b/servers/rendering/renderer_rd/renderer_compositor_rd.h @@ -90,8 +90,6 @@ public: virtual bool is_low_end() const { return false; } - static ThreadWorkPool thread_work_pool; - static RendererCompositorRD *singleton; RendererCompositorRD(); ~RendererCompositorRD() {} diff --git a/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp b/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp index 6881d7913f..509495680a 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp @@ -394,6 +394,12 @@ Variant RendererSceneRenderForward::ShaderData::get_default_parameter(const Stri return Variant(); } +RS::ShaderNativeSourceCode RendererSceneRenderForward::ShaderData::get_native_source_code() const { + RendererSceneRenderForward *scene_singleton = (RendererSceneRenderForward *)RendererSceneRenderForward::singleton; + + return scene_singleton->shader.scene_shader.version_get_native_source_code(version); +} + RendererSceneRenderForward::ShaderData::ShaderData() { valid = false; uses_screen_texture = false; @@ -447,7 +453,7 @@ void RendererSceneRenderForward::MaterialData::update_parameters(const Map<Strin //check whether buffer changed if (p_uniform_dirty && ubo_data.size()) { update_uniform_buffer(shader_data->uniforms, shader_data->ubo_offsets.ptr(), p_parameters, ubo_data.ptrw(), ubo_data.size(), false); - RD::get_singleton()->buffer_update(uniform_buffer, 0, ubo_data.size(), ubo_data.ptrw()); + RD::get_singleton()->buffer_update(uniform_buffer, 0, ubo_data.size(), ubo_data.ptrw(), RD::BARRIER_MASK_RASTER); } uint32_t tex_uniform_count = shader_data->texture_uniforms.size(); @@ -577,19 +583,6 @@ void RendererSceneRenderForward::RenderBufferDataForward::ensure_specular() { } } -void RendererSceneRenderForward::RenderBufferDataForward::ensure_gi() { - if (!reflection_buffer.is_valid()) { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; - tf.width = width; - tf.height = height; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - - reflection_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); - ambient_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); - } -} - void RendererSceneRenderForward::RenderBufferDataForward::ensure_giprobe() { if (!giprobe_buffer.is_valid()) { RD::TextureFormat tf; @@ -627,16 +620,6 @@ void RendererSceneRenderForward::RenderBufferDataForward::ensure_giprobe() { } void RendererSceneRenderForward::RenderBufferDataForward::clear() { - if (ambient_buffer != RID() && ambient_buffer != color) { - RD::get_singleton()->free(ambient_buffer); - ambient_buffer = RID(); - } - - if (reflection_buffer != RID() && reflection_buffer != specular) { - RD::get_singleton()->free(reflection_buffer); - reflection_buffer = RID(); - } - if (giprobe_buffer != RID()) { RD::get_singleton()->free(giprobe_buffer); giprobe_buffer = RID(); @@ -806,252 +789,92 @@ bool RendererSceneRenderForward::free(RID p_rid) { return false; } -void RendererSceneRenderForward::_fill_instances(RenderList::Element **p_elements, int p_element_count, bool p_for_depth, bool p_has_sdfgi, bool p_has_opaque_gi) { - uint32_t lightmap_captures_used = 0; - - for (int i = 0; i < p_element_count; i++) { - const RenderList::Element *e = p_elements[i]; - InstanceData &id = scene_state.instances[i]; - bool store_transform = true; - id.flags = 0; - id.mask = e->instance->layer_mask; - id.instance_uniforms_ofs = e->instance->instance_allocated_shader_parameters_offset >= 0 ? e->instance->instance_allocated_shader_parameters_offset : 0; - - if (e->instance->base_type == RS::INSTANCE_MULTIMESH) { - id.flags |= INSTANCE_DATA_FLAG_MULTIMESH; - uint32_t stride; - if (storage->multimesh_get_transform_format(e->instance->base) == RS::MULTIMESH_TRANSFORM_2D) { - id.flags |= INSTANCE_DATA_FLAG_MULTIMESH_FORMAT_2D; - stride = 2; - } else { - stride = 3; - } - if (storage->multimesh_uses_colors(e->instance->base)) { - id.flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_COLOR; - stride += 1; - } - if (storage->multimesh_uses_custom_data(e->instance->base)) { - id.flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_CUSTOM_DATA; - stride += 1; - } - - id.flags |= (stride << INSTANCE_DATA_FLAGS_MULTIMESH_STRIDE_SHIFT); - } else if (e->instance->base_type == RS::INSTANCE_PARTICLES) { - id.flags |= INSTANCE_DATA_FLAG_MULTIMESH; - uint32_t stride; - if (false) { // 2D particles - id.flags |= INSTANCE_DATA_FLAG_MULTIMESH_FORMAT_2D; - stride = 2; - } else { - stride = 3; - } - - id.flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_COLOR; - stride += 1; - - id.flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_CUSTOM_DATA; - stride += 1; - - id.flags |= (stride << INSTANCE_DATA_FLAGS_MULTIMESH_STRIDE_SHIFT); - - if (!storage->particles_is_using_local_coords(e->instance->base)) { - store_transform = false; - } - - } else if (e->instance->base_type == RS::INSTANCE_MESH) { - if (e->instance->skeleton.is_valid()) { - id.flags |= INSTANCE_DATA_FLAG_SKELETON; - } - } - - if (store_transform) { - RendererStorageRD::store_transform(e->instance->transform, id.transform); - RendererStorageRD::store_transform(Transform(e->instance->transform.basis.inverse().transposed()), id.normal_transform); - } else { - RendererStorageRD::store_transform(Transform(), id.transform); - RendererStorageRD::store_transform(Transform(), id.normal_transform); - } - - if (p_for_depth) { - id.gi_offset = 0xFFFFFFFF; - continue; - } - - if (e->instance->lightmap) { - int32_t lightmap_index = storage->lightmap_get_array_index(e->instance->lightmap->base); - if (lightmap_index >= 0) { - id.gi_offset = lightmap_index; - id.gi_offset |= e->instance->lightmap_slice_index << 12; - id.gi_offset |= e->instance->lightmap_cull_index << 20; - id.lightmap_uv_scale[0] = e->instance->lightmap_uv_scale.position.x; - id.lightmap_uv_scale[1] = e->instance->lightmap_uv_scale.position.y; - id.lightmap_uv_scale[2] = e->instance->lightmap_uv_scale.size.width; - id.lightmap_uv_scale[3] = e->instance->lightmap_uv_scale.size.height; - id.flags |= INSTANCE_DATA_FLAG_USE_LIGHTMAP; - if (storage->lightmap_uses_spherical_harmonics(e->instance->lightmap->base)) { - id.flags |= INSTANCE_DATA_FLAG_USE_SH_LIGHTMAP; - } - } else { - id.gi_offset = 0xFFFFFFFF; - } - } else if (!e->instance->lightmap_sh.is_empty()) { - if (lightmap_captures_used < scene_state.max_lightmap_captures) { - const Color *src_capture = e->instance->lightmap_sh.ptr(); - LightmapCaptureData &lcd = scene_state.lightmap_captures[lightmap_captures_used]; - for (int j = 0; j < 9; j++) { - lcd.sh[j * 4 + 0] = src_capture[j].r; - lcd.sh[j * 4 + 1] = src_capture[j].g; - lcd.sh[j * 4 + 2] = src_capture[j].b; - lcd.sh[j * 4 + 3] = src_capture[j].a; - } - id.flags |= INSTANCE_DATA_FLAG_USE_LIGHTMAP_CAPTURE; - id.gi_offset = lightmap_captures_used; - lightmap_captures_used++; - } - - } else { - if (p_has_opaque_gi) { - id.flags |= INSTANCE_DATA_FLAG_USE_GI_BUFFERS; - } - - if (!low_end && !e->instance->gi_probe_instances.is_empty()) { - uint32_t written = 0; - for (int j = 0; j < e->instance->gi_probe_instances.size(); j++) { - RID probe = e->instance->gi_probe_instances[j]; - - uint32_t index = gi_probe_instance_get_render_index(probe); - - if (written == 0) { - id.gi_offset = index; - id.flags |= INSTANCE_DATA_FLAG_USE_GIPROBE; - written = 1; - } else { - id.gi_offset = index << 16; - written = 2; - break; - } - } - if (written == 0) { - id.gi_offset = 0xFFFFFFFF; - } else if (written == 1) { - id.gi_offset |= 0xFFFF0000; - } - } else { - if (p_has_sdfgi && (e->instance->baked_light || e->instance->dynamic_gi)) { - id.flags |= INSTANCE_DATA_FLAG_USE_SDFGI; - } - id.gi_offset = 0xFFFFFFFF; - } - } - } - - RD::get_singleton()->buffer_update(scene_state.instance_buffer, 0, sizeof(InstanceData) * p_element_count, scene_state.instances, true); - if (lightmap_captures_used) { - RD::get_singleton()->buffer_update(scene_state.lightmap_capture_buffer, 0, sizeof(LightmapCaptureData) * lightmap_captures_used, scene_state.lightmap_captures, true); - } -} - /// RENDERING /// -void RendererSceneRenderForward::_render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderList::Element **p_elements, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe, const Vector2 &p_uv_offset, const Plane &p_lod_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold) { +template <RendererSceneRenderForward::PassMode p_pass_mode> +void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element) { RD::DrawListID draw_list = p_draw_list; RD::FramebufferFormatID framebuffer_format = p_framebuffer_Format; //global scope bindings RD::get_singleton()->draw_list_bind_uniform_set(draw_list, render_base_uniform_set, SCENE_UNIFORM_SET); - RD::get_singleton()->draw_list_bind_uniform_set(draw_list, p_render_pass_uniform_set, RENDER_PASS_UNIFORM_SET); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, p_params->render_pass_uniform_set, RENDER_PASS_UNIFORM_SET); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, default_vec4_xform_uniform_set, TRANSFORMS_UNIFORM_SET); - MaterialData *prev_material = nullptr; + RID prev_material_uniform_set; RID prev_vertex_array_rd; RID prev_index_array_rd; RID prev_pipeline_rd; RID prev_xforms_uniform_set; - PushConstant push_constant; - zeromem(&push_constant, sizeof(PushConstant)); - push_constant.bake_uv2_offset[0] = p_uv_offset.x; - push_constant.bake_uv2_offset[1] = p_uv_offset.y; + bool shadow_pass = (p_params->pass_mode == PASS_MODE_SHADOW) || (p_params->pass_mode == PASS_MODE_SHADOW_DP); + + SceneState::PushConstant push_constant; + + if (p_params->pass_mode == PASS_MODE_DEPTH_MATERIAL) { + push_constant.uv_offset = Math::make_half_float(p_params->uv_offset.y) << 16; + push_constant.uv_offset |= Math::make_half_float(p_params->uv_offset.x); + } else { + push_constant.uv_offset = 0; + } + + for (uint32_t i = p_from_element; i < p_to_element; i++) { + const GeometryInstanceSurfaceDataCache *surf = p_params->elements[i]; + const RenderElementInfo &element_info = p_params->element_info[i]; + + push_constant.base_index = i + p_params->element_offset; + + RID material_uniform_set; + ShaderData *shader; + void *mesh_surface; + + if (shadow_pass || p_params->pass_mode == PASS_MODE_DEPTH) { //regular depth pass can use these too + material_uniform_set = surf->material_uniform_set_shadow; + shader = surf->shader_shadow; + mesh_surface = surf->surface_shadow; - for (int i = 0; i < p_element_count; i++) { - const RenderList::Element *e = p_elements[i]; + } else { + material_uniform_set = surf->material_uniform_set; + shader = surf->shader; + mesh_surface = surf->surface; + } - MaterialData *material = e->material; - ShaderData *shader = material->shader_data; - RID xforms_uniform_set; + if (!mesh_surface) { + continue; + } //find cull variant ShaderData::CullVariant cull_variant; - if (p_pass_mode == PASS_MODE_DEPTH_MATERIAL || p_pass_mode == PASS_MODE_SDF || ((p_pass_mode == PASS_MODE_SHADOW || p_pass_mode == PASS_MODE_SHADOW_DP) && e->instance->cast_shadows == RS::SHADOW_CASTING_SETTING_DOUBLE_SIDED)) { + if (p_params->pass_mode == PASS_MODE_DEPTH_MATERIAL || p_params->pass_mode == PASS_MODE_SDF || ((p_params->pass_mode == PASS_MODE_SHADOW || p_params->pass_mode == PASS_MODE_SHADOW_DP) && surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_DOUBLE_SIDED_SHADOWS)) { cull_variant = ShaderData::CULL_VARIANT_DOUBLE_SIDED; } else { - bool mirror = e->instance->mirror; - if (p_reverse_cull) { + bool mirror = surf->owner->mirror; + if (p_params->reverse_cull) { mirror = !mirror; } cull_variant = mirror ? ShaderData::CULL_VARIANT_REVERSED : ShaderData::CULL_VARIANT_NORMAL; } - //find primitive and vertex format - RS::PrimitiveType primitive; - void *mesh_surface = nullptr; - - switch (e->instance->base_type) { - case RS::INSTANCE_MESH: { - mesh_surface = storage->mesh_get_surface(e->instance->base, e->surface_index); - - primitive = storage->mesh_surface_get_primitive(mesh_surface); - if (e->instance->skeleton.is_valid()) { - xforms_uniform_set = storage->skeleton_get_3d_uniform_set(e->instance->skeleton, default_shader_rd, TRANSFORMS_UNIFORM_SET); - } - } break; - case RS::INSTANCE_MULTIMESH: { - RID mesh = storage->multimesh_get_mesh(e->instance->base); - ERR_CONTINUE(!mesh.is_valid()); //should be a bug - - mesh_surface = storage->mesh_get_surface(e->instance->base, e->surface_index); - - primitive = storage->mesh_surface_get_primitive(mesh_surface); - - xforms_uniform_set = storage->multimesh_get_3d_uniform_set(e->instance->base, default_shader_rd, TRANSFORMS_UNIFORM_SET); - - } break; - case RS::INSTANCE_IMMEDIATE: { - ERR_CONTINUE(true); //should be a bug - } break; - case RS::INSTANCE_PARTICLES: { - RID mesh = storage->particles_get_draw_pass_mesh(e->instance->base, e->surface_index >> 16); - ERR_CONTINUE(!mesh.is_valid()); //should be a bug - - mesh_surface = storage->mesh_get_surface(e->instance->base, e->surface_index & 0xFFFF); - - primitive = storage->mesh_surface_get_primitive(mesh_surface); - - xforms_uniform_set = storage->particles_get_instance_buffer_uniform_set(e->instance->base, default_shader_rd, TRANSFORMS_UNIFORM_SET); - - } break; - default: { - ERR_CONTINUE(true); //should be a bug - } - } + RS::PrimitiveType primitive = surf->primitive; + RID xforms_uniform_set = surf->owner->transforms_uniform_set; ShaderVersion shader_version = SHADER_VERSION_MAX; // Assigned to silence wrong -Wmaybe-initialized. - switch (p_pass_mode) { + switch (p_params->pass_mode) { case PASS_MODE_COLOR: case PASS_MODE_COLOR_TRANSPARENT: { - if (e->uses_lightmap) { + if (element_info.uses_lightmap) { shader_version = SHADER_VERSION_LIGHTMAP_COLOR_PASS; - } else if (e->uses_forward_gi) { + } else if (element_info.uses_forward_gi) { shader_version = SHADER_VERSION_COLOR_PASS_WITH_FORWARD_GI; } else { shader_version = SHADER_VERSION_COLOR_PASS; } } break; case PASS_MODE_COLOR_SPECULAR: { - if (e->uses_lightmap) { + if (element_info.uses_lightmap) { shader_version = SHADER_VERSION_LIGHTMAP_COLOR_PASS_WITH_SEPARATE_SPECULAR; } else { shader_version = SHADER_VERSION_COLOR_PASS_WITH_SEPARATE_SPECULAR; @@ -1086,42 +909,15 @@ void RendererSceneRenderForward::_render_list(RenderingDevice::DrawListID p_draw RID vertex_array_rd; RID index_array_rd; - if (mesh_surface) { - if (e->instance->mesh_instance.is_valid()) { //skeleton and blend shape - storage->mesh_instance_surface_get_vertex_arrays_and_format(e->instance->mesh_instance, e->surface_index, pipeline->get_vertex_input_mask(), vertex_array_rd, vertex_format); - } else { - storage->mesh_surface_get_vertex_arrays_and_format(mesh_surface, pipeline->get_vertex_input_mask(), vertex_array_rd, vertex_format); - } - - if (p_screen_lod_threshold > 0.0 && storage->mesh_surface_has_lod(mesh_surface)) { - Vector3 support_min = e->instance->transformed_aabb.get_support(-p_lod_plane.normal); - Vector3 support_max = e->instance->transformed_aabb.get_support(p_lod_plane.normal); - - float distance_min = p_lod_plane.distance_to(support_min); - float distance_max = p_lod_plane.distance_to(support_max); - - float distance = 0.0; - - if (distance_min * distance_max < 0.0) { - //crossing plane - distance = 0.0; - } else if (distance_min >= 0.0) { - distance = distance_min; - } else if (distance_max <= 0.0) { - distance = -distance_max; - } - - Vector3 model_scale_vec = e->instance->transform.basis.get_scale_abs(); - - float model_scale = MAX(model_scale_vec.x, MAX(model_scale_vec.y, model_scale_vec.z)); - - index_array_rd = storage->mesh_surface_get_index_array_with_lod(mesh_surface, model_scale * e->instance->lod_bias, distance * p_lod_distance_multiplier, p_screen_lod_threshold); - - } else { - index_array_rd = storage->mesh_surface_get_index_array(mesh_surface); - } + //skeleton and blend shape + if (surf->owner->mesh_instance.is_valid()) { + storage->mesh_instance_surface_get_vertex_arrays_and_format(surf->owner->mesh_instance, surf->surface_index, pipeline->get_vertex_input_mask(), vertex_array_rd, vertex_format); + } else { + storage->mesh_surface_get_vertex_arrays_and_format(mesh_surface, pipeline->get_vertex_input_mask(), vertex_array_rd, vertex_format); } + index_array_rd = storage->mesh_surface_get_index_array(mesh_surface, element_info.lod_index); + if (prev_vertex_array_rd != vertex_array_rd) { RD::get_singleton()->draw_list_bind_vertex_array(draw_list, vertex_array_rd); prev_vertex_array_rd = vertex_array_rd; @@ -1134,7 +930,7 @@ void RendererSceneRenderForward::_render_list(RenderingDevice::DrawListID p_draw prev_index_array_rd = index_array_rd; } - RID pipeline_rd = pipeline->get_render_pipeline(vertex_format, framebuffer_format, p_force_wireframe); + RID pipeline_rd = pipeline->get_render_pipeline(vertex_format, framebuffer_format, p_params->force_wireframe); if (pipeline_rd != prev_pipeline_rd) { // checking with prev shader does not make so much sense, as @@ -1148,40 +944,87 @@ void RendererSceneRenderForward::_render_list(RenderingDevice::DrawListID p_draw prev_xforms_uniform_set = xforms_uniform_set; } - if (material != prev_material) { + if (material_uniform_set != prev_material_uniform_set) { //update uniform set - if (material->uniform_set.is_valid()) { - RD::get_singleton()->draw_list_bind_uniform_set(draw_list, material->uniform_set, MATERIAL_UNIFORM_SET); + if (material_uniform_set.is_valid()) { + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, material_uniform_set, MATERIAL_UNIFORM_SET); } - prev_material = material; + prev_material_uniform_set = material_uniform_set; } - push_constant.index = i; - RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(PushConstant)); + RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(SceneState::PushConstant)); - switch (e->instance->base_type) { - case RS::INSTANCE_MESH: { - RD::get_singleton()->draw_list_draw(draw_list, index_array_rd.is_valid()); - } break; - case RS::INSTANCE_MULTIMESH: { - uint32_t instances = storage->multimesh_get_instances_to_draw(e->instance->base); - RD::get_singleton()->draw_list_draw(draw_list, index_array_rd.is_valid(), instances); - } break; - case RS::INSTANCE_IMMEDIATE: { - } break; - case RS::INSTANCE_PARTICLES: { - uint32_t instances = storage->particles_get_amount(e->instance->base); - RD::get_singleton()->draw_list_draw(draw_list, index_array_rd.is_valid(), instances); - } break; - default: { - ERR_CONTINUE(true); //should be a bug - } - } + uint32_t instance_count = surf->owner->instance_count > 1 ? surf->owner->instance_count : element_info.repeat; + RD::get_singleton()->draw_list_draw(draw_list, index_array_rd.is_valid(), instance_count); + i += element_info.repeat - 1; //skip equal elements } } -void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2 &p_screen_pixel_size, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers, bool p_pancake_shadows) { +void RendererSceneRenderForward::_render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element) { + //use template for faster performance (pass mode comparisons are inlined) + + switch (p_params->pass_mode) { + case PASS_MODE_COLOR: { + _render_list_template<PASS_MODE_COLOR>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + case PASS_MODE_COLOR_SPECULAR: { + _render_list_template<PASS_MODE_COLOR_SPECULAR>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + case PASS_MODE_COLOR_TRANSPARENT: { + _render_list_template<PASS_MODE_COLOR_TRANSPARENT>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + case PASS_MODE_SHADOW: { + _render_list_template<PASS_MODE_SHADOW>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + case PASS_MODE_SHADOW_DP: { + _render_list_template<PASS_MODE_SHADOW_DP>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + case PASS_MODE_DEPTH: { + _render_list_template<PASS_MODE_DEPTH>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + case PASS_MODE_DEPTH_NORMAL_ROUGHNESS: { + _render_list_template<PASS_MODE_DEPTH_NORMAL_ROUGHNESS>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + case PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE: { + _render_list_template<PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + case PASS_MODE_DEPTH_MATERIAL: { + _render_list_template<PASS_MODE_DEPTH_MATERIAL>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + case PASS_MODE_SDF: { + _render_list_template<PASS_MODE_SDF>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + } +} + +void RendererSceneRenderForward::_render_list_thread_function(uint32_t p_thread, RenderListParameters *p_params) { + uint32_t render_total = p_params->element_count; + uint32_t total_threads = RendererThreadPool::singleton->thread_work_pool.get_thread_count(); + uint32_t render_from = p_thread * render_total / total_threads; + uint32_t render_to = (p_thread + 1 == total_threads) ? render_total : ((p_thread + 1) * render_total / total_threads); + _render_list(thread_draw_lists[p_thread], p_params->framebuffer_format, p_params, render_from, render_to); +} + +void RendererSceneRenderForward::_render_list_with_threads(RenderListParameters *p_params, RID p_framebuffer, RD::InitialAction p_initial_color_action, RD::FinalAction p_final_color_action, RD::InitialAction p_initial_depth_action, RD::FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const Vector<RID> &p_storage_textures) { + RD::FramebufferFormatID fb_format = RD::get_singleton()->framebuffer_get_format(p_framebuffer); + p_params->framebuffer_format = fb_format; + + if ((uint32_t)p_params->element_count > render_list_thread_threshold && false) { // secondary command buffers need more testing at this time + //multi threaded + thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count()); + RD::get_singleton()->draw_list_begin_split(p_framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures); + RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RendererSceneRenderForward::_render_list_thread_function, p_params); + RD::get_singleton()->draw_list_end(p_params->barrier); + } else { + //single threaded + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures); + _render_list(draw_list, fb_format, p_params, 0, p_params->element_count); + RD::get_singleton()->draw_list_end(p_params->barrier); + } +} + +void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers, bool p_pancake_shadows, int p_index) { //CameraMatrix projection = p_cam_projection; //projection.flip_y(); // Vulkan and modern APIs use Y-Down CameraMatrix correction; @@ -1209,8 +1052,18 @@ void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_ren scene_state.ubo.penumbra_shadow_samples = penumbra_shadow_samples_get(); scene_state.ubo.soft_shadow_samples = soft_shadow_samples_get(); - scene_state.ubo.screen_pixel_size[0] = p_screen_pixel_size.x; - scene_state.ubo.screen_pixel_size[1] = p_screen_pixel_size.y; + Size2 screen_pixel_size = Vector2(1.0, 1.0) / Size2(p_screen_size); + scene_state.ubo.screen_pixel_size[0] = screen_pixel_size.x; + scene_state.ubo.screen_pixel_size[1] = screen_pixel_size.y; + + scene_state.ubo.cluster_shift = get_shift_from_power_of_2(p_cluster_size); + scene_state.ubo.max_cluster_element_count_div_32 = p_max_cluster_elements / 32; + { + uint32_t cluster_screen_width = (p_screen_size.width - 1) / p_cluster_size + 1; + uint32_t cluster_screen_height = (p_screen_size.height - 1) / p_cluster_size + 1; + scene_state.ubo.cluster_type_size = cluster_screen_width * cluster_screen_height * (scene_state.ubo.max_cluster_element_count_div_32 + 32); + scene_state.ubo.cluster_width = cluster_screen_width; + } if (p_shadow_atlas.is_valid()) { Vector2 sas = shadow_atlas_get_size(p_shadow_atlas); @@ -1410,276 +1263,332 @@ void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_ren scene_state.ubo.roughness_limiter_amount = screen_space_roughness_limiter_get_amount(); scene_state.ubo.roughness_limiter_limit = screen_space_roughness_limiter_get_limit(); - RD::get_singleton()->buffer_update(scene_state.uniform_buffer, 0, sizeof(SceneState::UBO), &scene_state.ubo, true); -} - -void RendererSceneRenderForward::_add_geometry(InstanceBase *p_instance, uint32_t p_surface, RID p_material, PassMode p_pass_mode, uint32_t p_geometry_index, bool p_using_sdfgi) { - RID m_src; - - m_src = p_instance->material_override.is_valid() ? p_instance->material_override : p_material; - - if (unlikely(get_debug_draw_mode() != RS::VIEWPORT_DEBUG_DRAW_DISABLED)) { - if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_OVERDRAW) { - m_src = overdraw_material; - } else if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_LIGHTING) { - m_src = default_material; - } - } - - MaterialData *material = nullptr; - - if (m_src.is_valid()) { - material = (MaterialData *)storage->material_get_data(m_src, RendererStorageRD::SHADER_TYPE_3D); - if (!material || !material->shader_data->valid) { - material = nullptr; + if (p_index >= (int)scene_state.uniform_buffers.size()) { + uint32_t from = scene_state.uniform_buffers.size(); + scene_state.uniform_buffers.resize(p_index + 1); + render_pass_uniform_sets.resize(p_index + 1); + for (uint32_t i = from; i < scene_state.uniform_buffers.size(); i++) { + scene_state.uniform_buffers[i] = RD::get_singleton()->uniform_buffer_create(sizeof(SceneState::UBO)); } } + RD::get_singleton()->buffer_update(scene_state.uniform_buffers[p_index], 0, sizeof(SceneState::UBO), &scene_state.ubo, RD::BARRIER_MASK_RASTER); +} - if (!material) { - material = (MaterialData *)storage->material_get_data(default_material, RendererStorageRD::SHADER_TYPE_3D); - m_src = default_material; - } - - ERR_FAIL_COND(!material); - - _add_geometry_with_material(p_instance, p_surface, material, m_src, p_pass_mode, p_geometry_index, p_using_sdfgi); - - while (material->next_pass.is_valid()) { - material = (MaterialData *)storage->material_get_data(material->next_pass, RendererStorageRD::SHADER_TYPE_3D); - if (!material || !material->shader_data->valid) { - break; +void RendererSceneRenderForward::_update_instance_data_buffer(RenderListType p_render_list) { + if (scene_state.instance_data[p_render_list].size() > 0) { + if (scene_state.instance_buffer[p_render_list] == RID() || scene_state.instance_buffer_size[p_render_list] < scene_state.instance_data[p_render_list].size()) { + if (scene_state.instance_buffer[p_render_list] != RID()) { + RD::get_singleton()->free(scene_state.instance_buffer[p_render_list]); + } + uint32_t new_size = nearest_power_of_2_templated(MAX(uint64_t(INSTANCE_DATA_BUFFER_MIN_SIZE), scene_state.instance_data[p_render_list].size())); + scene_state.instance_buffer[p_render_list] = RD::get_singleton()->storage_buffer_create(new_size * sizeof(SceneState::InstanceData)); + scene_state.instance_buffer_size[p_render_list] = new_size; } - _add_geometry_with_material(p_instance, p_surface, material, material->next_pass, p_pass_mode, p_geometry_index, p_using_sdfgi); + RD::get_singleton()->buffer_update(scene_state.instance_buffer[p_render_list], 0, sizeof(SceneState::InstanceData) * scene_state.instance_data[p_render_list].size(), scene_state.instance_data[p_render_list].ptr(), RD::BARRIER_MASK_RASTER); } } +void RendererSceneRenderForward::_fill_instance_data(RenderListType p_render_list, uint32_t p_offset, int32_t p_max_elements, bool p_update_buffer) { + RenderList *rl = &render_list[p_render_list]; + uint32_t element_total = p_max_elements >= 0 ? uint32_t(p_max_elements) : rl->elements.size(); -void RendererSceneRenderForward::_add_geometry_with_material(InstanceBase *p_instance, uint32_t p_surface, MaterialData *p_material, RID p_material_rid, PassMode p_pass_mode, uint32_t p_geometry_index, bool p_using_sdfgi) { - bool has_read_screen_alpha = p_material->shader_data->uses_screen_texture || p_material->shader_data->uses_depth_texture || p_material->shader_data->uses_normal_texture; - bool has_base_alpha = (p_material->shader_data->uses_alpha || has_read_screen_alpha); - bool has_blend_alpha = p_material->shader_data->uses_blend_alpha; - bool has_alpha = has_base_alpha || has_blend_alpha; + scene_state.instance_data[p_render_list].resize(p_offset + element_total); + rl->element_info.resize(p_offset + element_total); - if (p_material->shader_data->uses_sss) { - scene_state.used_sss = true; - } + uint32_t repeats = 0; + GeometryInstanceSurfaceDataCache *prev_surface = nullptr; + for (uint32_t i = 0; i < element_total; i++) { + GeometryInstanceSurfaceDataCache *surface = rl->elements[i + p_offset]; + GeometryInstanceForward *inst = surface->owner; - if (p_material->shader_data->uses_screen_texture) { - scene_state.used_screen_texture = true; - } + SceneState::InstanceData &instance_data = scene_state.instance_data[p_render_list][i + p_offset]; - if (p_material->shader_data->uses_depth_texture) { - scene_state.used_depth_texture = true; - } + if (inst->store_transform_cache) { + RendererStorageRD::store_transform(inst->transform, instance_data.transform); + } else { + RendererStorageRD::store_transform(Transform(), instance_data.transform); + } - if (p_material->shader_data->uses_normal_texture) { - scene_state.used_normal_texture = true; - } + instance_data.flags = inst->flags_cache; + instance_data.gi_offset = inst->gi_offset_cache; + instance_data.layer_mask = inst->layer_mask; + instance_data.instance_uniforms_ofs = uint32_t(inst->shader_parameters_offset); + instance_data.lightmap_uv_scale[0] = inst->lightmap_uv_scale.position.x; + instance_data.lightmap_uv_scale[1] = inst->lightmap_uv_scale.position.y; + instance_data.lightmap_uv_scale[2] = inst->lightmap_uv_scale.size.x; + instance_data.lightmap_uv_scale[3] = inst->lightmap_uv_scale.size.y; - if (p_pass_mode != PASS_MODE_COLOR && p_pass_mode != PASS_MODE_COLOR_SPECULAR) { - if (has_blend_alpha || has_read_screen_alpha || (has_base_alpha && !p_material->shader_data->uses_depth_pre_pass) || p_material->shader_data->depth_draw == ShaderData::DEPTH_DRAW_DISABLED || p_material->shader_data->depth_test == ShaderData::DEPTH_TEST_DISABLED || p_instance->cast_shadows == RS::SHADOW_CASTING_SETTING_OFF) { - //conditions in which no depth pass should be processed - return; - } + bool cant_repeat = instance_data.flags & INSTANCE_DATA_FLAG_MULTIMESH || inst->mesh_instance.is_valid(); - if ((p_pass_mode != PASS_MODE_DEPTH_MATERIAL && p_pass_mode != PASS_MODE_SDF) && !p_material->shader_data->writes_modelview_or_projection && !p_material->shader_data->uses_vertex && !p_material->shader_data->uses_discard && !p_material->shader_data->uses_depth_pre_pass) { - //shader does not use discard and does not write a vertex position, use generic material - if (p_pass_mode == PASS_MODE_SHADOW || p_pass_mode == PASS_MODE_DEPTH) { - p_material = (MaterialData *)storage->material_get_data(default_material, RendererStorageRD::SHADER_TYPE_3D); - } else if ((p_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS || p_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE) && !p_material->shader_data->uses_normal && !p_material->shader_data->uses_roughness) { - p_material = (MaterialData *)storage->material_get_data(default_material, RendererStorageRD::SHADER_TYPE_3D); + if (prev_surface != nullptr && !cant_repeat && prev_surface->sort.sort_key1 == surface->sort.sort_key1 && prev_surface->sort.sort_key2 == surface->sort.sort_key2) { + //this element is the same as the previous one, count repeats to draw it using instancing + repeats++; + } else { + if (repeats > 0) { + for (uint32_t j = 1; j <= repeats; j++) { + rl->element_info[p_offset + i - j].repeat = j; + } } + repeats = 1; } - has_alpha = false; - } - - has_alpha = has_alpha || p_material->shader_data->depth_test == ShaderData::DEPTH_TEST_DISABLED; + RenderElementInfo &element_info = rl->element_info[p_offset + i]; - RenderList::Element *e = has_alpha ? render_list.add_alpha_element() : render_list.add_element(); + element_info.lod_index = surface->sort.lod_index; + element_info.uses_forward_gi = surface->sort.uses_forward_gi; + element_info.uses_lightmap = surface->sort.uses_lightmap; - if (!e) { - return; + if (cant_repeat) { + prev_surface = nullptr; + } else { + prev_surface = surface; + } } - e->instance = p_instance; - e->material = p_material; - e->surface_index = p_surface; - e->sort_key = 0; - - if (e->material->last_pass != render_pass) { - if (!RD::get_singleton()->uniform_set_is_valid(e->material->uniform_set)) { - //uniform set no longer valid, probably a texture changed - storage->material_force_update_textures(p_material_rid, RendererStorageRD::SHADER_TYPE_3D); - } - e->material->last_pass = render_pass; - e->material->index = scene_state.current_material_index++; - if (e->material->shader_data->last_pass != render_pass) { - e->material->shader_data->last_pass = scene_state.current_material_index++; - e->material->shader_data->index = scene_state.current_shader_index++; + if (repeats > 0) { + for (uint32_t j = 1; j <= repeats; j++) { + rl->element_info[p_offset + element_total - j].repeat = j; } } - e->geometry_index = p_geometry_index; - e->material_index = e->material->index; - e->uses_instancing = e->instance->base_type == RS::INSTANCE_MULTIMESH; - e->uses_lightmap = e->instance->lightmap != nullptr || !e->instance->lightmap_sh.is_empty(); - e->uses_forward_gi = has_alpha && (e->instance->gi_probe_instances.size() || p_using_sdfgi); - e->shader_index = e->shader_index; - e->depth_layer = e->instance->depth_layer; - e->priority = p_material->priority; - if (p_material->shader_data->uses_time) { - RenderingServerDefault::redraw_request(); + if (p_update_buffer) { + _update_instance_data_buffer(p_render_list); } } -void RendererSceneRenderForward::_fill_render_list(const PagedArray<InstanceBase *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi) { - scene_state.current_shader_index = 0; - scene_state.current_material_index = 0; - scene_state.used_sss = false; - scene_state.used_screen_texture = false; - scene_state.used_normal_texture = false; - scene_state.used_depth_texture = false; +void RendererSceneRenderForward::_fill_render_list(RenderListType p_render_list, const PagedArray<GeometryInstance *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi, bool p_using_opaque_gi, const Plane &p_lod_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold, bool p_append) { + if (p_render_list == RENDER_LIST_OPAQUE) { + scene_state.used_sss = false; + scene_state.used_screen_texture = false; + scene_state.used_normal_texture = false; + scene_state.used_depth_texture = false; + } + uint32_t lightmap_captures_used = 0; Plane near_plane(p_cam_transform.origin, -p_cam_transform.basis.get_axis(Vector3::AXIS_Z)); near_plane.d += p_cam_projection.get_z_near(); float z_max = p_cam_projection.get_z_far() - p_cam_projection.get_z_near(); - uint32_t geometry_index = 0; + RenderList *rl = &render_list[p_render_list]; + _update_dirty_geometry_instances(); + + if (!p_append) { + rl->clear(); + if (p_render_list == RENDER_LIST_OPAQUE) { + render_list[RENDER_LIST_ALPHA].clear(); //opaque fills alpha too + } + } //fill list for (int i = 0; i < (int)p_instances.size(); i++) { - InstanceBase *inst = p_instances[i]; + GeometryInstanceForward *inst = static_cast<GeometryInstanceForward *>(p_instances[i]); - inst->depth = near_plane.distance_to(inst->transform.origin); - inst->depth_layer = CLAMP(int(inst->depth * 16 / z_max), 0, 15); + Vector3 support_min = inst->transformed_aabb.get_support(-near_plane.normal); + inst->depth = near_plane.distance_to(support_min); + uint32_t depth_layer = CLAMP(int(inst->depth * 16 / z_max), 0, 15); - //add geometry for drawing - switch (inst->base_type) { - case RS::INSTANCE_MESH: { - const RID *materials = nullptr; - uint32_t surface_count; + uint32_t flags = inst->base_flags; //fill flags if appropriate - materials = storage->mesh_get_surface_count_and_materials(inst->base, surface_count); - if (!materials) { - continue; //nothing to do - } + bool uses_lightmap = false; + bool uses_gi = false; - const RID *inst_materials = inst->materials.ptr(); + if (p_render_list == RENDER_LIST_OPAQUE) { + //setup GI - for (uint32_t j = 0; j < surface_count; j++) { - RID material = inst_materials[j].is_valid() ? inst_materials[j] : materials[j]; + if (inst->lightmap_instance.is_valid()) { + int32_t lightmap_cull_index = -1; + for (uint32_t j = 0; j < scene_state.lightmaps_used; j++) { + if (scene_state.lightmap_ids[j] == inst->lightmap_instance) { + lightmap_cull_index = j; + break; + } + } + if (lightmap_cull_index >= 0) { + inst->gi_offset_cache = inst->lightmap_slice_index << 16; + inst->gi_offset_cache |= lightmap_cull_index; + flags |= INSTANCE_DATA_FLAG_USE_LIGHTMAP; + if (scene_state.lightmap_has_sh[lightmap_cull_index]) { + flags |= INSTANCE_DATA_FLAG_USE_SH_LIGHTMAP; + } + uses_lightmap = true; + } else { + inst->gi_offset_cache = 0xFFFFFFFF; + } - uint32_t surface_index = storage->mesh_surface_get_render_pass_index(inst->base, j, render_pass, &geometry_index); - _add_geometry(inst, j, material, p_pass_mode, surface_index, p_using_sdfgi); + } else if (inst->lightmap_sh) { + if (lightmap_captures_used < scene_state.max_lightmap_captures) { + const Color *src_capture = inst->lightmap_sh->sh; + LightmapCaptureData &lcd = scene_state.lightmap_captures[lightmap_captures_used]; + for (int j = 0; j < 9; j++) { + lcd.sh[j * 4 + 0] = src_capture[j].r; + lcd.sh[j * 4 + 1] = src_capture[j].g; + lcd.sh[j * 4 + 2] = src_capture[j].b; + lcd.sh[j * 4 + 3] = src_capture[j].a; + } + flags |= INSTANCE_DATA_FLAG_USE_LIGHTMAP_CAPTURE; + inst->gi_offset_cache = lightmap_captures_used; + lightmap_captures_used++; + uses_lightmap = true; } - //mesh->last_pass=frame; + } else if (!low_end) { + if (p_using_opaque_gi) { + flags |= INSTANCE_DATA_FLAG_USE_GI_BUFFERS; + } - } break; + if (inst->gi_probes[0].is_valid()) { + uint32_t probe0_index = 0xFFFF; + uint32_t probe1_index = 0xFFFF; - case RS::INSTANCE_MULTIMESH: { - if (storage->multimesh_get_instances_to_draw(inst->base) == 0) { - //not visible, 0 instances - continue; - } + for (uint32_t j = 0; j < scene_state.giprobes_used; j++) { + if (scene_state.giprobe_ids[j] == inst->gi_probes[0]) { + probe0_index = j; + } else if (scene_state.giprobe_ids[j] == inst->gi_probes[1]) { + probe1_index = j; + } + } - RID mesh = storage->multimesh_get_mesh(inst->base); - if (!mesh.is_valid()) { - continue; + if (probe0_index == 0xFFFF && probe1_index != 0xFFFF) { + //0 must always exist if a probe exists + SWAP(probe0_index, probe1_index); + } + + inst->gi_offset_cache = probe0_index | (probe1_index << 16); + flags |= INSTANCE_DATA_FLAG_USE_GIPROBE; + uses_gi = true; + } else { + if (p_using_sdfgi && inst->can_sdfgi) { + flags |= INSTANCE_DATA_FLAG_USE_SDFGI; + uses_gi = true; + } + inst->gi_offset_cache = 0xFFFFFFFF; } + } + } + inst->flags_cache = flags; - const RID *materials = nullptr; - uint32_t surface_count; + GeometryInstanceSurfaceDataCache *surf = inst->surface_caches; - materials = storage->mesh_get_surface_count_and_materials(mesh, surface_count); - if (!materials) { - continue; //nothing to do - } + while (surf) { + surf->sort.uses_forward_gi = 0; + surf->sort.uses_lightmap = 0; - for (uint32_t j = 0; j < surface_count; j++) { - uint32_t surface_index = storage->mesh_surface_get_multimesh_render_pass_index(mesh, j, render_pass, &geometry_index); - _add_geometry(inst, j, materials[j], p_pass_mode, surface_index, p_using_sdfgi); - } + // LOD - } break; -#if 0 - case RS::INSTANCE_IMMEDIATE: { - RasterizerStorageGLES3::Immediate *immediate = storage->immediate_owner.getornull(inst->base); - ERR_CONTINUE(!immediate); + if (p_screen_lod_threshold > 0.0 && storage->mesh_surface_has_lod(surf->surface)) { + //lod + Vector3 lod_support_min = inst->transformed_aabb.get_support(-p_lod_plane.normal); + Vector3 lod_support_max = inst->transformed_aabb.get_support(p_lod_plane.normal); - _add_geometry(immediate, inst, nullptr, -1, p_depth_pass, p_shadow_pass); + float distance_min = p_lod_plane.distance_to(lod_support_min); + float distance_max = p_lod_plane.distance_to(lod_support_max); - } break; -#endif - case RS::INSTANCE_PARTICLES: { - int draw_passes = storage->particles_get_draw_passes(inst->base); + float distance = 0.0; - for (int j = 0; j < draw_passes; j++) { - RID mesh = storage->particles_get_draw_pass_mesh(inst->base, j); - if (!mesh.is_valid()) - continue; + if (distance_min * distance_max < 0.0) { + //crossing plane + distance = 0.0; + } else if (distance_min >= 0.0) { + distance = distance_min; + } else if (distance_max <= 0.0) { + distance = -distance_max; + } - const RID *materials = nullptr; - uint32_t surface_count; + surf->sort.lod_index = storage->mesh_surface_get_lod(surf->surface, inst->lod_model_scale * inst->lod_bias, distance * p_lod_distance_multiplier, p_screen_lod_threshold); + } else { + surf->sort.lod_index = 0; + } - materials = storage->mesh_get_surface_count_and_materials(mesh, surface_count); - if (!materials) { - continue; //nothing to do + // ADD Element + if (p_pass_mode == PASS_MODE_COLOR) { + if (surf->flags & (GeometryInstanceSurfaceDataCache::FLAG_PASS_DEPTH | GeometryInstanceSurfaceDataCache::FLAG_PASS_OPAQUE)) { + rl->add_element(surf); + } + if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_PASS_ALPHA) { + render_list[RENDER_LIST_ALPHA].add_element(surf); + if (uses_gi) { + surf->sort.uses_forward_gi = 1; } + } - for (uint32_t k = 0; k < surface_count; k++) { - uint32_t surface_index = storage->mesh_surface_get_particles_render_pass_index(mesh, j, render_pass, &geometry_index); - _add_geometry(inst, (j << 16) | k, materials[j], p_pass_mode, surface_index, p_using_sdfgi); - } + if (uses_lightmap) { + surf->sort.uses_lightmap = 1; } - } break; + if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_SUBSURFACE_SCATTERING) { + scene_state.used_sss = true; + } + if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_SCREEN_TEXTURE) { + scene_state.used_screen_texture = true; + } + if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_NORMAL_TEXTURE) { + scene_state.used_normal_texture = true; + } + if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_DEPTH_TEXTURE) { + scene_state.used_depth_texture = true; + } - default: { + } else if (p_pass_mode == PASS_MODE_SHADOW || p_pass_mode == PASS_MODE_SHADOW_DP) { + if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_PASS_SHADOW) { + rl->add_element(surf); + } + } else { + if (surf->flags & (GeometryInstanceSurfaceDataCache::FLAG_PASS_DEPTH | GeometryInstanceSurfaceDataCache::FLAG_PASS_OPAQUE)) { + rl->add_element(surf); + } } + + surf->sort.depth_layer = depth_layer; + + surf = surf->next; } } + + if (p_render_list == RENDER_LIST_OPAQUE && lightmap_captures_used) { + RD::get_singleton()->buffer_update(scene_state.lightmap_capture_buffer, 0, sizeof(LightmapCaptureData) * lightmap_captures_used, scene_state.lightmap_captures, RD::BARRIER_MASK_RASTER); + } +} + +void RendererSceneRenderForward::_setup_giprobes(const PagedArray<RID> &p_giprobes) { + scene_state.giprobes_used = MIN(p_giprobes.size(), uint32_t(MAX_GI_PROBES)); + for (uint32_t i = 0; i < scene_state.giprobes_used; i++) { + scene_state.giprobe_ids[i] = p_giprobes[i]; + } } -void RendererSceneRenderForward::_setup_lightmaps(const PagedArray<InstanceBase *> &p_lightmaps, const Transform &p_cam_transform) { - uint32_t lightmaps_used = 0; +void RendererSceneRenderForward::_setup_lightmaps(const PagedArray<RID> &p_lightmaps, const Transform &p_cam_transform) { + scene_state.lightmaps_used = 0; for (int i = 0; i < (int)p_lightmaps.size(); i++) { if (i >= (int)scene_state.max_lightmaps) { break; } - InstanceBase *lm = p_lightmaps[i]; - Basis to_lm = lm->transform.basis.inverse() * p_cam_transform.basis; + RID lightmap = lightmap_instance_get_lightmap(p_lightmaps[i]); + + Basis to_lm = lightmap_instance_get_transform(p_lightmaps[i]).basis.inverse() * p_cam_transform.basis; to_lm = to_lm.inverse().transposed(); //will transform normals RendererStorageRD::store_transform_3x3(to_lm, scene_state.lightmaps[i].normal_xform); - lm->lightmap_cull_index = i; - lightmaps_used++; + scene_state.lightmap_ids[i] = p_lightmaps[i]; + scene_state.lightmap_has_sh[i] = storage->lightmap_uses_spherical_harmonics(lightmap); + + scene_state.lightmaps_used++; } - if (lightmaps_used > 0) { - RD::get_singleton()->buffer_update(scene_state.lightmap_buffer, 0, sizeof(LightmapData) * lightmaps_used, scene_state.lightmaps, true); + if (scene_state.lightmaps_used > 0) { + RD::get_singleton()->buffer_update(scene_state.lightmap_buffer, 0, sizeof(LightmapData) * scene_state.lightmaps_used, scene_state.lightmaps, RD::BARRIER_MASK_RASTER); } } -void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<InstanceBase *> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_screen_lod_threshold) { +void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_screen_lod_threshold) { RenderBufferDataForward *render_buffer = nullptr; if (p_render_buffer.is_valid()) { render_buffer = (RenderBufferDataForward *)render_buffers_get_data(p_render_buffer); } //first of all, make a new render pass - render_pass++; - //fill up ubo RENDER_TIMESTAMP("Setup 3D Scene"); - if (p_reflection_probe.is_valid()) { - scene_state.ubo.reflection_multiplier = 0.0; - } else { - scene_state.ubo.reflection_multiplier = 1.0; - } - float lod_distance_multiplier = p_cam_projection.get_lod_multiplier(); Plane lod_camera_plane(p_cam_transform.get_origin(), -p_cam_transform.basis.get_axis(Vector3::AXIS_Z)); @@ -1692,9 +1601,8 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf Vector2 vp_he = p_cam_projection.get_viewport_half_extents(); scene_state.ubo.viewport_size[0] = vp_he.x; scene_state.ubo.viewport_size[1] = vp_he.y; - scene_state.ubo.directional_light_count = p_directional_light_count; + scene_state.ubo.directional_light_count = 0; - Size2 screen_pixel_size; Size2i screen_size; RID opaque_framebuffer; RID opaque_specular_framebuffer; @@ -1709,8 +1617,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf bool using_giprobe = false; if (render_buffer) { - screen_pixel_size.width = 1.0 / render_buffer->width; - screen_pixel_size.height = 1.0 / render_buffer->height; screen_size.x = render_buffer->width; screen_size.y = render_buffer->height; @@ -1718,7 +1624,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf if (!low_end && p_gi_probes.size() > 0) { using_giprobe = true; - render_buffer->ensure_gi(); } if (!p_environment.is_valid() && using_giprobe) { @@ -1728,7 +1633,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf if (environment_is_sdfgi_enabled(p_environment)) { depth_pass_mode = using_giprobe ? PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE : PASS_MODE_DEPTH_NORMAL_ROUGHNESS; // also giprobe using_sdfgi = true; - render_buffer->ensure_gi(); } else { depth_pass_mode = using_giprobe ? PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE : PASS_MODE_DEPTH_NORMAL_ROUGHNESS; } @@ -1767,8 +1671,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf alpha_framebuffer = opaque_framebuffer; } else if (p_reflection_probe.is_valid()) { uint32_t resolution = reflection_probe_instance_get_resolution(p_reflection_probe); - screen_pixel_size.width = 1.0 / resolution; - screen_pixel_size.height = 1.0 / resolution; screen_size.x = resolution; screen_size.y = resolution; @@ -1783,13 +1685,21 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf ERR_FAIL(); //bug? } + RD::get_singleton()->draw_command_begin_label("Render Setup"); + _setup_lightmaps(p_lightmaps, p_cam_transform); - _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); + _setup_giprobes(p_gi_probes); + _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); _update_render_base_uniform_set(); //may have changed due to the above (light buffer enlarged, as an example) - render_list.clear(); - _fill_render_list(p_instances, PASS_MODE_COLOR, p_cam_projection, p_cam_transform, using_sdfgi); + _fill_render_list(RENDER_LIST_OPAQUE, p_instances, PASS_MODE_COLOR, p_cam_projection, p_cam_transform, using_sdfgi, using_sdfgi || using_giprobe, lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + render_list[RENDER_LIST_OPAQUE].sort_by_key(); + render_list[RENDER_LIST_ALPHA].sort_by_depth(); + _fill_instance_data(RENDER_LIST_OPAQUE); + _fill_instance_data(RENDER_LIST_ALPHA); + + RD::get_singleton()->draw_command_end_label(); bool using_sss = !low_end && render_buffer && scene_state.used_sss && sub_surface_scattering_get_quality() != RS::SUB_SURFACE_SCATTERING_QUALITY_DISABLED; @@ -1849,6 +1759,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf // setup sky if used for ambient, reflections, or background if (draw_sky || draw_sky_fog_only || environment_get_reflection_source(p_environment) == RS::ENV_REFLECTION_SOURCE_SKY || environment_get_ambient_source(p_environment) == RS::ENV_AMBIENT_SOURCE_SKY) { RENDER_TIMESTAMP("Setup Sky"); + RD::get_singleton()->draw_command_begin_label("Setup Sky"); CameraMatrix projection = p_cam_projection; if (p_reflection_probe.is_valid()) { CameraMatrix correction; @@ -1866,58 +1777,77 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf // do not try to draw sky if invalid draw_sky = false; } + RD::get_singleton()->draw_command_end_label(); } } else { clear_color = p_default_bg_color; } - render_list.sort_by_key(false); - - _fill_instances(render_list.elements, render_list.element_count, false, false, using_sdfgi || using_giprobe); - bool debug_giprobes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_ALBEDO || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_LIGHTING || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_EMISSION; bool debug_sdfgi_probes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_SDFGI_PROBES; - bool depth_pre_pass = !low_end && depth_framebuffer.is_valid(); bool using_ssao = depth_pre_pass && p_render_buffer.is_valid() && p_environment.is_valid() && environment_is_ssao_enabled(p_environment); bool continue_depth = false; if (depth_pre_pass) { //depth pre pass - RENDER_TIMESTAMP("Render Depth Pre-Pass"); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>()); + bool needs_pre_resolve = _needs_post_prepass_render(using_sdfgi || using_giprobe); + if (needs_pre_resolve) { + RENDER_TIMESTAMP("GI + Render Depth Pre-Pass (parallel)"); + } else { + RENDER_TIMESTAMP("Render Depth Pre-Pass"); + } + if (needs_pre_resolve) { + //pre clear the depth framebuffer, as AMD (and maybe others?) use compute for it, and barrier other compute shaders. + RD::get_singleton()->draw_list_begin(depth_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_CONTINUE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_CONTINUE, depth_pass_clear); + RD::get_singleton()->draw_list_end(); + //start compute processes here, so they run at the same time as depth pre-pass + _post_prepass_render(using_sdfgi || using_giprobe); + } + + RD::get_singleton()->draw_command_begin_label("Render Depth Pre-Pass"); + + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_OPAQUE, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); bool finish_depth = using_ssao || using_sdfgi || using_giprobe; - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(depth_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, finish_depth ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE, depth_pass_clear); - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(depth_framebuffer), render_list.elements, render_list.element_count, false, depth_pass_mode, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); - RD::get_singleton()->draw_list_end(); + RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].element_info.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), false, depth_pass_mode, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + _render_list_with_threads(&render_list_params, depth_framebuffer, needs_pre_resolve ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, needs_pre_resolve ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_CLEAR, finish_depth ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE, needs_pre_resolve ? Vector<Color>() : depth_pass_clear); + + RD::get_singleton()->draw_command_end_label(); + + if (needs_pre_resolve) { + _pre_resolve_render(using_sdfgi || using_giprobe); + } if (render_buffer && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { RENDER_TIMESTAMP("Resolve Depth Pre-Pass"); + RD::get_singleton()->draw_command_begin_label("Resolve Depth Pre-Pass"); if (depth_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS || depth_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE) { + if (needs_pre_resolve) { + RD::get_singleton()->barrier(RD::BARRIER_MASK_RASTER, RD::BARRIER_MASK_COMPUTE); + } static int texture_samples[RS::VIEWPORT_MSAA_MAX] = { 1, 2, 4, 8, 16 }; storage->get_effects()->resolve_gi(render_buffer->depth_msaa, render_buffer->normal_roughness_buffer_msaa, using_giprobe ? render_buffer->giprobe_buffer_msaa : RID(), render_buffer->depth, render_buffer->normal_roughness_buffer, using_giprobe ? render_buffer->giprobe_buffer : RID(), Vector2i(render_buffer->width, render_buffer->height), texture_samples[render_buffer->msaa]); } else if (finish_depth) { - RD::get_singleton()->texture_resolve_multisample(render_buffer->depth_msaa, render_buffer->depth, true); + RD::get_singleton()->texture_resolve_multisample(render_buffer->depth_msaa, render_buffer->depth); } + RD::get_singleton()->draw_command_end_label(); } continue_depth = !finish_depth; } - if (using_ssao) { - _process_ssao(p_render_buffer, p_environment, render_buffer->normal_roughness_buffer, p_cam_projection); - } + _pre_opaque_render(using_ssao, using_sdfgi || using_giprobe, render_buffer ? render_buffer->normal_roughness_buffer : RID(), render_buffer ? render_buffer->giprobe_buffer : RID()); - if (using_sdfgi || using_giprobe) { - _process_gi(p_render_buffer, render_buffer->normal_roughness_buffer, render_buffer->ambient_buffer, render_buffer->reflection_buffer, render_buffer->giprobe_buffer, p_environment, p_cam_projection, p_cam_transform, p_gi_probes); - } + RD::get_singleton()->draw_command_begin_label("Render Opaque Pass"); + + scene_state.ubo.directional_light_count = _get_render_state_directional_light_count(); - _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), p_render_buffer.is_valid()); + _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), p_render_buffer.is_valid()); RENDER_TIMESTAMP("Render Opaque Pass"); - RID rp_uniform_set = _setup_render_pass_uniform_set(p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_gi_probes); + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_OPAQUE, p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_cluster_buffer, p_gi_probes, p_lightmaps, true); bool can_continue_color = !scene_state.used_screen_texture && !using_ssr && !using_sss; bool can_continue_depth = !scene_state.used_depth_texture && !using_ssr && !using_sss; @@ -1938,17 +1868,17 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf } RID framebuffer = using_separate_specular ? opaque_specular_framebuffer : opaque_framebuffer; - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, depth_pre_pass ? (continue_depth ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CONTINUE) : RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0); - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(framebuffer), render_list.elements, render_list.element_count, false, using_separate_specular ? PASS_MODE_COLOR_SPECULAR : PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); - RD::get_singleton()->draw_list_end(); - + RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].element_info.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), false, using_separate_specular ? PASS_MODE_COLOR_SPECULAR : PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + _render_list_with_threads(&render_list_params, framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, depth_pre_pass ? (continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP) : RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0); if (will_continue_color && using_separate_specular) { // close the specular framebuffer, as it's no longer used - draw_list = RD::get_singleton()->draw_list_begin(render_buffer->specular_only_fb, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_CONTINUE); + RD::get_singleton()->draw_list_begin(render_buffer->specular_only_fb, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_CONTINUE); RD::get_singleton()->draw_list_end(); } } + RD::get_singleton()->draw_command_end_label(); + if (debug_giprobes) { //debug giprobes bool will_continue_color = (can_continue_color || draw_sky || draw_sky_fog_only); @@ -1958,9 +1888,11 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf dc.set_depth_correction(true); CameraMatrix cm = (dc * p_cam_projection) * CameraMatrix(p_cam_transform.affine_inverse()); RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(opaque_framebuffer, RD::INITIAL_ACTION_CONTINUE, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ); + RD::get_singleton()->draw_command_begin_label("Debug GIProbes"); for (int i = 0; i < (int)p_gi_probes.size(); i++) { _debug_giprobe(p_gi_probes[i], draw_list, opaque_framebuffer, cm, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_LIGHTING, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_EMISSION, 1.0); } + RD::get_singleton()->draw_command_end_label(); RD::get_singleton()->draw_list_end(); } @@ -1973,7 +1905,9 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf dc.set_depth_correction(true); CameraMatrix cm = (dc * p_cam_projection) * CameraMatrix(p_cam_transform.affine_inverse()); RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(opaque_framebuffer, RD::INITIAL_ACTION_CONTINUE, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ); + RD::get_singleton()->draw_command_begin_label("Debug SDFGI"); _debug_sdfgi_probes(p_render_buffer, draw_list, opaque_framebuffer, cm); + RD::get_singleton()->draw_command_end_label(); RD::get_singleton()->draw_list_end(); } @@ -1986,30 +1920,35 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf correction.set_depth_correction(true); projection = correction * p_cam_projection; } - + RD::get_singleton()->draw_command_begin_label("Draw Sky"); _draw_sky(can_continue_color, can_continue_depth, opaque_framebuffer, p_environment, projection, p_cam_transform); + RD::get_singleton()->draw_command_end_label(); } if (render_buffer && !can_continue_color && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { - RD::get_singleton()->texture_resolve_multisample(render_buffer->color_msaa, render_buffer->color, true); + RD::get_singleton()->texture_resolve_multisample(render_buffer->color_msaa, render_buffer->color); if (using_separate_specular) { - RD::get_singleton()->texture_resolve_multisample(render_buffer->specular_msaa, render_buffer->specular, true); + RD::get_singleton()->texture_resolve_multisample(render_buffer->specular_msaa, render_buffer->specular); } } if (render_buffer && !can_continue_depth && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { - RD::get_singleton()->texture_resolve_multisample(render_buffer->depth_msaa, render_buffer->depth, true); + RD::get_singleton()->texture_resolve_multisample(render_buffer->depth_msaa, render_buffer->depth); } if (using_separate_specular) { if (using_sss) { RENDER_TIMESTAMP("Sub Surface Scattering"); + RD::get_singleton()->draw_command_begin_label("Process Sub Surface Scattering"); _process_sss(p_render_buffer, p_cam_projection); + RD::get_singleton()->draw_command_end_label(); } if (using_ssr) { RENDER_TIMESTAMP("Screen Space Reflection"); + RD::get_singleton()->draw_command_begin_label("Process Screen Space Reflections"); _process_ssr(p_render_buffer, render_buffer->color_fb, render_buffer->normal_roughness_buffer, render_buffer->specular, render_buffer->specular, Color(0, 0, 0, 1), p_environment, p_cam_projection, render_buffer->msaa == RS::VIEWPORT_MSAA_DISABLED); + RD::get_singleton()->draw_command_end_label(); } else { //just mix specular back RENDER_TIMESTAMP("Merge Specular"); @@ -2019,119 +1958,161 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf RENDER_TIMESTAMP("Render Transparent Pass"); - _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); + RD::get_singleton()->draw_command_begin_label("Render Transparent Pass"); - render_list.sort_by_reverse_depth_and_priority(true); + rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_ALPHA, p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_cluster_buffer, p_gi_probes, p_lightmaps, true); - _fill_instances(&render_list.elements[render_list.max_elements - render_list.alpha_element_count], render_list.alpha_element_count, false, using_sdfgi); + _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); { - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(alpha_framebuffer, can_continue_color ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, can_continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ); - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(alpha_framebuffer), &render_list.elements[render_list.max_elements - render_list.alpha_element_count], render_list.alpha_element_count, false, PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); - RD::get_singleton()->draw_list_end(); + RenderListParameters render_list_params(render_list[RENDER_LIST_ALPHA].elements.ptr(), render_list[RENDER_LIST_ALPHA].element_info.ptr(), render_list[RENDER_LIST_ALPHA].elements.size(), false, PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + _render_list_with_threads(&render_list_params, alpha_framebuffer, can_continue_color ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, can_continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ); } + RD::get_singleton()->draw_command_end_label(); + + RD::get_singleton()->draw_command_begin_label("Resolve"); + if (render_buffer && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { - RD::get_singleton()->texture_resolve_multisample(render_buffer->color_msaa, render_buffer->color, true); + RD::get_singleton()->texture_resolve_multisample(render_buffer->color_msaa, render_buffer->color); } -} -void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, const PagedArray<InstanceBase *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold) { - RENDER_TIMESTAMP("Setup Rendering Shadow"); + RD::get_singleton()->draw_command_end_label(); +} +void RendererSceneRenderForward::_render_shadow_begin() { + scene_state.shadow_passes.clear(); + RD::get_singleton()->draw_command_begin_label("Shadow Setup"); _update_render_base_uniform_set(); - render_pass++; + render_list[RENDER_LIST_SECONDARY].clear(); + scene_state.instance_data[RENDER_LIST_SECONDARY].clear(); +} +void RendererSceneRenderForward::_render_shadow_append(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold, const Rect2i &p_rect, bool p_flip_y, bool p_clear_region, bool p_begin, bool p_end) { + uint32_t shadow_pass_index = scene_state.shadow_passes.size(); + + SceneState::ShadowPass shadow_pass; scene_state.ubo.dual_paraboloid_side = p_use_dp_flip ? -1 : 1; - _setup_environment(RID(), RID(), p_projection, p_transform, RID(), true, Vector2(1, 1), RID(), true, Color(), 0, p_zfar, false, p_use_pancake); + _setup_environment(RID(), RID(), p_projection, p_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), !p_flip_y, Color(), 0, p_zfar, false, p_use_pancake, shadow_pass_index); if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_DISABLE_LOD) { p_screen_lod_threshold = 0.0; } - render_list.clear(); - PassMode pass_mode = p_use_dp ? PASS_MODE_SHADOW_DP : PASS_MODE_SHADOW; - _fill_render_list(p_instances, pass_mode, p_projection, p_transform); + uint32_t render_list_from = render_list[RENDER_LIST_SECONDARY].elements.size(); + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, p_projection, p_transform, false, false, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold, true); + uint32_t render_list_size = render_list[RENDER_LIST_SECONDARY].elements.size() - render_list_from; + render_list[RENDER_LIST_SECONDARY].sort_by_key_range(render_list_from, render_list_size); + _fill_instance_data(RENDER_LIST_SECONDARY, render_list_from, render_list_size, false); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>()); + { + //regular forward for now + bool flip_cull = p_use_dp_flip; + if (p_flip_y) { + flip_cull = !flip_cull; + } - RENDER_TIMESTAMP("Render Shadow"); + shadow_pass.element_from = render_list_from; + shadow_pass.element_count = render_list_size; + shadow_pass.flip_cull = flip_cull; + shadow_pass.pass_mode = pass_mode; - render_list.sort_by_key(false); + shadow_pass.rp_uniform_set = RID(); //will be filled later when instance buffer is complete + shadow_pass.camera_plane = p_camera_plane; + shadow_pass.screen_lod_threshold = p_screen_lod_threshold; + shadow_pass.lod_distance_multiplier = p_lod_distance_multiplier; - _fill_instances(render_list.elements, render_list.element_count, true); + shadow_pass.framebuffer = p_framebuffer; + shadow_pass.initial_depth_action = p_begin ? (p_clear_region ? RD::INITIAL_ACTION_CLEAR_REGION : RD::INITIAL_ACTION_CLEAR) : (p_clear_region ? RD::INITIAL_ACTION_CLEAR_REGION_CONTINUE : RD::INITIAL_ACTION_CONTINUE); + shadow_pass.final_depth_action = p_end ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE; + shadow_pass.rect = p_rect; - { - //regular forward for now - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ); - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), render_list.elements, render_list.element_count, p_use_dp_flip, pass_mode, true, rp_uniform_set, false, Vector2(), p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold); - RD::get_singleton()->draw_list_end(); + scene_state.shadow_passes.push_back(shadow_pass); } } -void RendererSceneRenderForward::_render_particle_collider_heightfield(RID p_fb, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, const PagedArray<InstanceBase *> &p_instances) { - RENDER_TIMESTAMP("Setup Render Collider Heightfield"); +void RendererSceneRenderForward::_render_shadow_process() { + _update_instance_data_buffer(RENDER_LIST_SECONDARY); + //render shadows one after the other, so this can be done un-barriered and the driver can optimize (as well as allow us to run compute at the same time) - _update_render_base_uniform_set(); + for (uint32_t i = 0; i < scene_state.shadow_passes.size(); i++) { + //render passes need to be configured after instance buffer is done, since they need the latest version + SceneState::ShadowPass &shadow_pass = scene_state.shadow_passes[i]; + shadow_pass.rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>(), false, i); + } - render_pass++; + RD::get_singleton()->draw_command_end_label(); +} +void RendererSceneRenderForward::_render_shadow_end(uint32_t p_barrier) { + RD::get_singleton()->draw_command_begin_label("Shadow Render"); - scene_state.ubo.dual_paraboloid_side = 0; + for (uint32_t i = 0; i < scene_state.shadow_passes.size(); i++) { + SceneState::ShadowPass &shadow_pass = scene_state.shadow_passes[i]; + RenderListParameters render_list_parameters(render_list[RENDER_LIST_SECONDARY].elements.ptr() + shadow_pass.element_from, render_list[RENDER_LIST_SECONDARY].element_info.ptr() + shadow_pass.element_from, shadow_pass.element_count, shadow_pass.flip_cull, shadow_pass.pass_mode, true, shadow_pass.rp_uniform_set, false, Vector2(), shadow_pass.camera_plane, shadow_pass.lod_distance_multiplier, shadow_pass.screen_lod_threshold, shadow_pass.element_from, RD::BARRIER_MASK_NO_BARRIER); + _render_list_with_threads(&render_list_parameters, shadow_pass.framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, shadow_pass.initial_depth_action, shadow_pass.final_depth_action, Vector<Color>(), 1.0, 0, shadow_pass.rect); + } - _setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), RID(), true, Color(), 0, p_cam_projection.get_z_far(), false, false); + if (p_barrier != RD::BARRIER_MASK_NO_BARRIER) { + RD::get_singleton()->barrier(RD::BARRIER_MASK_RASTER, p_barrier); + } + RD::get_singleton()->draw_command_end_label(); +} - render_list.clear(); +void RendererSceneRenderForward::_render_particle_collider_heightfield(RID p_fb, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, const PagedArray<GeometryInstance *> &p_instances) { + RENDER_TIMESTAMP("Setup Render Collider Heightfield"); - PassMode pass_mode = PASS_MODE_SHADOW; + RD::get_singleton()->draw_command_begin_label("Render Collider Heightfield"); - _fill_render_list(p_instances, pass_mode, p_cam_projection, p_cam_transform); + _update_render_base_uniform_set(); + scene_state.ubo.dual_paraboloid_side = 0; + + _setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), true, Color(), 0, p_cam_projection.get_z_far(), false, false); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>()); + PassMode pass_mode = PASS_MODE_SHADOW; - RENDER_TIMESTAMP("Render Collider Heightield"); + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, p_cam_projection, p_cam_transform); + render_list[RENDER_LIST_SECONDARY].sort_by_key(); + _fill_instance_data(RENDER_LIST_SECONDARY); - render_list.sort_by_key(false); + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); - _fill_instances(render_list.elements, render_list.element_count, true); + RENDER_TIMESTAMP("Render Collider Heightfield"); { //regular forward for now - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ); - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_fb), render_list.elements, render_list.element_count, false, pass_mode, true, rp_uniform_set); - RD::get_singleton()->draw_list_end(); + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), false, pass_mode, true, rp_uniform_set); + _render_list_with_threads(&render_list_params, p_fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ); } + RD::get_singleton()->draw_command_end_label(); } -void RendererSceneRenderForward::_render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, RID p_framebuffer, const Rect2i &p_region) { +void RendererSceneRenderForward::_render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) { RENDER_TIMESTAMP("Setup Rendering Material"); - _update_render_base_uniform_set(); + RD::get_singleton()->draw_command_begin_label("Render Material"); - render_pass++; + _update_render_base_uniform_set(); scene_state.ubo.dual_paraboloid_side = 0; - scene_state.ubo.material_uv2_mode = true; - - _setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), RID(), false, Color(), 0, 0); + scene_state.ubo.material_uv2_mode = false; - render_list.clear(); + _setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0); PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL; - _fill_render_list(p_instances, pass_mode, p_cam_projection, p_cam_transform); + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, p_cam_projection, p_cam_transform); + render_list[RENDER_LIST_SECONDARY].sort_by_key(); + _fill_instance_data(RENDER_LIST_SECONDARY); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>()); + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); RENDER_TIMESTAMP("Render Material"); - render_list.sort_by_key(false); - - _fill_instances(render_list.elements, render_list.element_count, true); - { + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), true, pass_mode, true, rp_uniform_set); //regular forward for now Vector<Color> clear; clear.push_back(Color(0, 0, 0, 0)); @@ -2140,37 +2121,36 @@ void RendererSceneRenderForward::_render_material(const Transform &p_cam_transfo clear.push_back(Color(0, 0, 0, 0)); clear.push_back(Color(0, 0, 0, 0)); RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, clear, 1.0, 0, p_region); - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set); + _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), &render_list_params, 0, render_list_params.element_count); RD::get_singleton()->draw_list_end(); } + + RD::get_singleton()->draw_command_end_label(); } -void RendererSceneRenderForward::_render_uv2(const PagedArray<InstanceBase *> &p_instances, RID p_framebuffer, const Rect2i &p_region) { +void RendererSceneRenderForward::_render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) { RENDER_TIMESTAMP("Setup Rendering UV2"); - _update_render_base_uniform_set(); + RD::get_singleton()->draw_command_begin_label("Render UV2"); - render_pass++; + _update_render_base_uniform_set(); scene_state.ubo.dual_paraboloid_side = 0; scene_state.ubo.material_uv2_mode = true; - _setup_environment(RID(), RID(), CameraMatrix(), Transform(), RID(), true, Vector2(1, 1), RID(), false, Color(), 0, 0); - - render_list.clear(); + _setup_environment(RID(), RID(), CameraMatrix(), Transform(), RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0); PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL; - _fill_render_list(p_instances, pass_mode, CameraMatrix(), Transform()); + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, CameraMatrix(), Transform()); + render_list[RENDER_LIST_SECONDARY].sort_by_key(); + _fill_instance_data(RENDER_LIST_SECONDARY); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>()); + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); RENDER_TIMESTAMP("Render Material"); - render_list.sort_by_key(false); - - _fill_instances(render_list.elements, render_list.element_count, true); - { + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), true, pass_mode, true, rp_uniform_set, true); //regular forward for now Vector<Color> clear; clear.push_back(Color(0, 0, 0, 0)); @@ -2198,31 +2178,32 @@ void RendererSceneRenderForward::_render_uv2(const PagedArray<InstanceBase *> &p Vector2 ofs = uv_offsets[i]; ofs.x /= p_region.size.width; ofs.y /= p_region.size.height; - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set, true, ofs); //first wireframe, for pseudo conservative + render_list_params.uv_offset = ofs; + _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), &render_list_params, 0, render_list_params.element_count); //first wireframe, for pseudo conservative } - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set, false); //second regular triangles + render_list_params.uv_offset = Vector2(); + _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), &render_list_params, 0, render_list_params.element_count); //second regular triangles RD::get_singleton()->draw_list_end(); } + + RD::get_singleton()->draw_command_end_label(); } -void RendererSceneRenderForward::_render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<InstanceBase *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) { +void RendererSceneRenderForward::_render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<GeometryInstance *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) { RENDER_TIMESTAMP("Render SDFGI"); + RD::get_singleton()->draw_command_begin_label("Render SDFGI Voxel"); + _update_render_base_uniform_set(); RenderBufferDataForward *render_buffer = (RenderBufferDataForward *)render_buffers_get_data(p_render_buffers); ERR_FAIL_COND(!render_buffer); - render_pass++; - render_list.clear(); - PassMode pass_mode = PASS_MODE_SDF; - _fill_render_list(p_instances, pass_mode, CameraMatrix(), Transform()); - render_list.sort_by_key(false); - _fill_instances(render_list.elements, render_list.element_count, true); - - RID rp_uniform_set = _setup_sdfgi_render_pass_uniform_set(p_albedo_texture, p_emission_texture, p_emission_aniso_texture, p_geom_facing_texture); + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, CameraMatrix(), Transform()); + render_list[RENDER_LIST_SECONDARY].sort_by_key(); + _fill_instance_data(RENDER_LIST_SECONDARY); Vector3 half_extents = p_bounds.size * 0.5; Vector3 center = p_bounds.position + half_extents; @@ -2273,7 +2254,9 @@ void RendererSceneRenderForward::_render_sdfgi(RID p_render_buffers, const Vecto RendererStorageRD::store_transform(to_bounds.affine_inverse() * cam_xform, scene_state.ubo.sdf_to_bounds); - _setup_environment(RID(), RID(), camera_proj, cam_xform, RID(), true, Vector2(1, 1), RID(), false, Color(), 0, 0); + _setup_environment(RID(), RID(), camera_proj, cam_xform, RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0); + + RID rp_uniform_set = _setup_sdfgi_render_pass_uniform_set(p_albedo_texture, p_emission_texture, p_emission_aniso_texture, p_geom_facing_texture); Map<Size2i, RID>::Element *E = sdfgi_framebuffer_size_cache.find(fb_size); if (!E) { @@ -2281,10 +2264,11 @@ void RendererSceneRenderForward::_render_sdfgi(RID p_render_buffers, const Vecto E = sdfgi_framebuffer_size_cache.insert(fb_size, fb); } - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(E->get(), RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, Vector<Color>(), 1.0, 0, Rect2(), sbs); - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(E->get()), render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set, false); //second regular triangles - RD::get_singleton()->draw_list_end(); + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), true, pass_mode, true, rp_uniform_set, false); + _render_list_with_threads(&render_list_params, E->get(), RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, Vector<Color>(), 1.0, 0, Rect2(), sbs); } + + RD::get_singleton()->draw_command_end_label(); } void RendererSceneRenderForward::_base_uniforms_changed() { @@ -2336,15 +2320,15 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { { RD::Uniform u; u.binding = 3; - u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.ids.push_back(scene_state.uniform_buffer); + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(get_omni_light_buffer()); uniforms.push_back(u); } { RD::Uniform u; u.binding = 4; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.ids.push_back(scene_state.instance_buffer); + u.ids.push_back(get_spot_light_buffer()); uniforms.push_back(u); } @@ -2352,48 +2336,33 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { RD::Uniform u; u.binding = 5; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.ids.push_back(get_positional_light_buffer()); - uniforms.push_back(u); - } - - { - RD::Uniform u; - u.binding = 6; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.ids.push_back(get_reflection_probe_buffer()); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 7; + u.binding = 6; u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; u.ids.push_back(get_directional_light_buffer()); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 10; + u.binding = 7; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.ids.push_back(scene_state.lightmap_buffer); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 11; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.ids = storage->lightmap_array_get_textures(); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 12; + u.binding = 8; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.ids.push_back(scene_state.lightmap_capture_buffer); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 13; + u.binding = 9; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID decal_atlas = storage->decal_atlas_get_texture(); u.ids.push_back(decal_atlas); @@ -2401,7 +2370,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { } { RD::Uniform u; - u.binding = 14; + u.binding = 10; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID decal_atlas = storage->decal_atlas_get_texture_srgb(); u.ids.push_back(decal_atlas); @@ -2409,7 +2378,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { } { RD::Uniform u; - u.binding = 15; + u.binding = 11; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.ids.push_back(get_decal_buffer()); uniforms.push_back(u); @@ -2417,35 +2386,8 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { { RD::Uniform u; - u.binding = 16; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.ids.push_back(get_cluster_builder_texture()); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 17; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.ids.push_back(get_cluster_builder_indices_buffer()); - uniforms.push_back(u); - } - - { - RD::Uniform u; - u.binding = 18; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - if (directional_shadow_get_texture().is_valid()) { - u.ids.push_back(directional_shadow_get_texture()); - } else { - u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE)); - } - uniforms.push_back(u); - } - - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 19; + u.binding = 12; u.ids.push_back(storage->global_variables_get_storage_buffer()); uniforms.push_back(u); } @@ -2453,7 +2395,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { if (!low_end) { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.binding = 20; + u.binding = 13; u.ids.push_back(sdfgi_get_ubo()); uniforms.push_back(u); } @@ -2462,10 +2404,9 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { } } -RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, const PagedArray<RID> &p_gi_probes) { - if (render_pass_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(render_pass_uniform_set)) { - RD::get_singleton()->free(render_pass_uniform_set); - } +RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RenderListType p_render_list, RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, bool p_use_directional_shadow_atlas, int p_index) { + //there should always be enough uniform buffers for render passes, otherwise bugs + ERR_FAIL_INDEX_V(p_index, (int)scene_state.uniform_buffers.size(), RID()); RenderBufferDataForward *rb = nullptr; if (p_render_buffers.is_valid()) { @@ -2477,6 +2418,24 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff Vector<RD::Uniform> uniforms; { + RD::Uniform u; + u.binding = 0; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.ids.push_back(scene_state.uniform_buffers[p_index]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + RID instance_buffer = scene_state.instance_buffer[p_render_list]; + if (instance_buffer == RID()) { + instance_buffer = default_vec4_xform_buffer; // any buffer will do since its not used + } + u.ids.push_back(instance_buffer); + uniforms.push_back(u); + } + { RID radiance_texture; if (p_radiance_texture.is_valid()) { radiance_texture = p_radiance_texture; @@ -2484,7 +2443,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff radiance_texture = storage->texture_rd_get_default(is_using_radiance_cubemap_array() ? RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_ARRAY_BLACK : RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_BLACK); } RD::Uniform u; - u.binding = 0; + u.binding = 2; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.push_back(radiance_texture); uniforms.push_back(u); @@ -2493,7 +2452,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RID ref_texture = p_reflection_atlas.is_valid() ? reflection_atlas_get_texture(p_reflection_atlas) : RID(); RD::Uniform u; - u.binding = 1; + u.binding = 3; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; if (ref_texture.is_valid()) { u.ids.push_back(ref_texture); @@ -2505,7 +2464,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 2; + u.binding = 4; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID texture; if (p_shadow_atlas.is_valid()) { @@ -2517,10 +2476,39 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff u.ids.push_back(texture); uniforms.push_back(u); } + { + RD::Uniform u; + u.binding = 5; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + if (p_use_directional_shadow_atlas && directional_shadow_get_texture().is_valid()) { + u.ids.push_back(directional_shadow_get_texture()); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE)); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 6; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.ids.resize(scene_state.max_lightmaps); + RID default_tex = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE); + for (uint32_t i = 0; i < scene_state.max_lightmaps; i++) { + if (i < p_lightmaps.size()) { + RID base = lightmap_instance_get_lightmap(p_lightmaps[i]); + RID texture = storage->lightmap_get_texture(base); + RID rd_texture = storage->texture_get_rd_texture(texture); + u.ids.write[i] = rd_texture; + } else { + u.ids.write[i] = default_tex; + } + } + uniforms.push_back(u); + } { RD::Uniform u; - u.binding = 3; + u.binding = 7; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.resize(MAX_GI_PROBES); RID default_tex = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE); @@ -2541,7 +2529,16 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 4; + u.binding = 8; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + RID cb = p_cluster_buffer.is_valid() ? p_cluster_buffer : default_vec4_xform_buffer; + u.ids.push_back(cb); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.binding = 9; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID texture = (false && rb && rb->depth.is_valid()) ? rb->depth : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE); u.ids.push_back(texture); @@ -2549,17 +2546,18 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 5; + u.binding = 10; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID bbt = rb ? render_buffers_get_back_buffer_texture(p_render_buffers) : RID(); RID texture = bbt.is_valid() ? bbt : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); u.ids.push_back(texture); uniforms.push_back(u); } + if (!low_end) { { RD::Uniform u; - u.binding = 6; + u.binding = 11; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID texture = rb && rb->normal_roughness_buffer.is_valid() ? rb->normal_roughness_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_NORMAL); u.ids.push_back(texture); @@ -2568,7 +2566,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 7; + u.binding = 12; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID aot = rb ? render_buffers_get_ao_texture(p_render_buffers) : RID(); RID texture = aot.is_valid() ? aot : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); @@ -2578,24 +2576,26 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 8; + u.binding = 13; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - RID texture = rb && rb->ambient_buffer.is_valid() ? rb->ambient_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); + RID ambient_buffer = p_render_buffers.is_valid() ? render_buffers_get_gi_ambient_texture(p_render_buffers) : RID(); + RID texture = ambient_buffer.is_valid() ? ambient_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); u.ids.push_back(texture); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 9; + u.binding = 14; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - RID texture = rb && rb->reflection_buffer.is_valid() ? rb->reflection_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); + RID reflection_buffer = p_render_buffers.is_valid() ? render_buffers_get_gi_reflection_texture(p_render_buffers) : RID(); + RID texture = reflection_buffer.is_valid() ? reflection_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); u.ids.push_back(texture); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 10; + u.binding = 15; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID t; if (rb && render_buffers_is_sdfgi_enabled(p_render_buffers)) { @@ -2608,7 +2608,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 11; + u.binding = 16; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; if (rb && render_buffers_is_sdfgi_enabled(p_render_buffers)) { u.ids.push_back(render_buffers_get_sdfgi_occlusion_texture(p_render_buffers)); @@ -2619,14 +2619,14 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 12; + u.binding = 17; u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; u.ids.push_back(rb ? render_buffers_get_gi_probe_buffer(p_render_buffers) : render_buffers_get_default_gi_probe_buffer()); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 13; + u.binding = 18; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID vfog = RID(); if (rb && render_buffers_has_volumetric_fog(p_render_buffers)) { @@ -2642,8 +2642,16 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } } - render_pass_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, default_shader_rd, RENDER_PASS_UNIFORM_SET); - return render_pass_uniform_set; + if (p_index >= (int)render_pass_uniform_sets.size()) { + render_pass_uniform_sets.resize(p_index + 1); + } + + if (render_pass_uniform_sets[p_index].is_valid() && RD::get_singleton()->uniform_set_is_valid(render_pass_uniform_sets[p_index])) { + RD::get_singleton()->free(render_pass_uniform_sets[p_index]); + } + + render_pass_uniform_sets[p_index] = RD::get_singleton()->uniform_set_create(uniforms, default_shader_rd, RENDER_PASS_UNIFORM_SET); + return render_pass_uniform_sets[p_index]; } RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albedo_texture, RID p_emission_texture, RID p_emission_aniso_texture, RID p_geom_facing_texture) { @@ -2654,10 +2662,24 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed Vector<RD::Uniform> uniforms; { + RD::Uniform u; + u.binding = 0; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.ids.push_back(scene_state.uniform_buffers[0]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(scene_state.instance_buffer[RENDER_LIST_SECONDARY]); + uniforms.push_back(u); + } + { // No radiance texture. RID radiance_texture = storage->texture_rd_get_default(is_using_radiance_cubemap_array() ? RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_ARRAY_BLACK : RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_BLACK); RD::Uniform u; - u.binding = 0; + u.binding = 2; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.push_back(radiance_texture); uniforms.push_back(u); @@ -2667,7 +2689,7 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed // No reflection atlas. RID ref_texture = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_ARRAY_BLACK); RD::Uniform u; - u.binding = 1; + u.binding = 3; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.push_back(ref_texture); uniforms.push_back(u); @@ -2676,7 +2698,7 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed { // No shadow atlas. RD::Uniform u; - u.binding = 2; + u.binding = 4; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID texture = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE); u.ids.push_back(texture); @@ -2684,9 +2706,33 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed } { + // No directional shadow atlas. + RD::Uniform u; + u.binding = 5; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + RID texture = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE); + u.ids.push_back(texture); + uniforms.push_back(u); + } + + { + // No Lightmaps + RD::Uniform u; + u.binding = 6; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.ids.resize(scene_state.max_lightmaps); + RID default_tex = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE); + for (uint32_t i = 0; i < scene_state.max_lightmaps; i++) { + u.ids.write[i] = default_tex; + } + + uniforms.push_back(u); + } + + { // No GIProbes RD::Uniform u; - u.binding = 3; + u.binding = 7; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.resize(MAX_GI_PROBES); RID default_tex = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE); @@ -2696,33 +2742,43 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed uniforms.push_back(u); } + + { + RD::Uniform u; + u.binding = 8; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + RID cb = default_vec4_xform_buffer; + u.ids.push_back(cb); + uniforms.push_back(u); + } + // actual sdfgi stuff { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 4; + u.binding = 9; u.ids.push_back(p_albedo_texture); uniforms.push_back(u); } { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 5; + u.binding = 10; u.ids.push_back(p_emission_texture); uniforms.push_back(u); } { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 6; + u.binding = 11; u.ids.push_back(p_emission_aniso_texture); uniforms.push_back(u); } { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 7; + u.binding = 12; u.ids.push_back(p_geom_facing_texture); uniforms.push_back(u); } @@ -2746,23 +2802,530 @@ RID RendererSceneRenderForward::_render_buffers_get_normal_texture(RID p_render_ return rb->normal_roughness_buffer; } -RID RendererSceneRenderForward::_render_buffers_get_ambient_texture(RID p_render_buffers) { - RenderBufferDataForward *rb = (RenderBufferDataForward *)render_buffers_get_data(p_render_buffers); +RendererSceneRenderForward *RendererSceneRenderForward::singleton = nullptr; - return rb->ambient_buffer; +void RendererSceneRenderForward::set_time(double p_time, double p_step) { + time = p_time; + RendererSceneRenderRD::set_time(p_time, p_step); } -RID RendererSceneRenderForward::_render_buffers_get_reflection_texture(RID p_render_buffers) { - RenderBufferDataForward *rb = (RenderBufferDataForward *)render_buffers_get_data(p_render_buffers); +void RendererSceneRenderForward::_geometry_instance_mark_dirty(GeometryInstance *p_geometry_instance) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + if (ginstance->dirty_list_element.in_list()) { + return; + } + + //clear surface caches + GeometryInstanceSurfaceDataCache *surf = ginstance->surface_caches; + + while (surf) { + GeometryInstanceSurfaceDataCache *next = surf->next; + geometry_instance_surface_alloc.free(surf); + surf = next; + } + + ginstance->surface_caches = nullptr; - return rb->reflection_buffer; + geometry_instance_dirty_list.add(&ginstance->dirty_list_element); } -RendererSceneRenderForward *RendererSceneRenderForward::singleton = nullptr; +void RendererSceneRenderForward::_geometry_instance_add_surface_with_material(GeometryInstanceForward *ginstance, uint32_t p_surface, MaterialData *p_material, uint32_t p_material_id, uint32_t p_shader_id, RID p_mesh) { + bool has_read_screen_alpha = p_material->shader_data->uses_screen_texture || p_material->shader_data->uses_depth_texture || p_material->shader_data->uses_normal_texture; + bool has_base_alpha = (p_material->shader_data->uses_alpha || has_read_screen_alpha); + bool has_blend_alpha = p_material->shader_data->uses_blend_alpha; + bool has_alpha = has_base_alpha || has_blend_alpha; -void RendererSceneRenderForward::set_time(double p_time, double p_step) { - time = p_time; - RendererSceneRenderRD::set_time(p_time, p_step); + uint32_t flags = 0; + + if (p_material->shader_data->uses_sss) { + flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_SUBSURFACE_SCATTERING; + } + + if (p_material->shader_data->uses_screen_texture) { + flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_SCREEN_TEXTURE; + } + + if (p_material->shader_data->uses_depth_texture) { + flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_DEPTH_TEXTURE; + } + + if (p_material->shader_data->uses_normal_texture) { + flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_NORMAL_TEXTURE; + } + + if (ginstance->data->cast_double_sided_shaodows) { + flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_DOUBLE_SIDED_SHADOWS; + } + + if (has_alpha || has_read_screen_alpha || p_material->shader_data->depth_draw == ShaderData::DEPTH_DRAW_DISABLED || p_material->shader_data->depth_test == ShaderData::DEPTH_TEST_DISABLED) { + //material is only meant for alpha pass + flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_ALPHA; + if (p_material->shader_data->uses_depth_pre_pass && !(p_material->shader_data->depth_draw == ShaderData::DEPTH_DRAW_DISABLED || p_material->shader_data->depth_test == ShaderData::DEPTH_TEST_DISABLED)) { + flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_DEPTH; + flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_SHADOW; + } + } else { + flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_OPAQUE; + flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_DEPTH; + flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_SHADOW; + } + + MaterialData *material_shadow = nullptr; + void *surface_shadow = nullptr; + if (!p_material->shader_data->writes_modelview_or_projection && !p_material->shader_data->uses_vertex && !p_material->shader_data->uses_discard && !p_material->shader_data->uses_depth_pre_pass) { + flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_SHARED_SHADOW_MATERIAL; + material_shadow = (MaterialData *)storage->material_get_data(default_material, RendererStorageRD::SHADER_TYPE_3D); + + RID shadow_mesh = storage->mesh_get_shadow_mesh(p_mesh); + + if (shadow_mesh.is_valid()) { + surface_shadow = storage->mesh_get_surface(shadow_mesh, p_surface); + } + + } else { + material_shadow = p_material; + } + + GeometryInstanceSurfaceDataCache *sdcache = geometry_instance_surface_alloc.alloc(); + + sdcache->flags = flags; + + sdcache->shader = p_material->shader_data; + sdcache->material_uniform_set = p_material->uniform_set; + sdcache->surface = storage->mesh_get_surface(p_mesh, p_surface); + sdcache->primitive = storage->mesh_surface_get_primitive(sdcache->surface); + sdcache->surface_index = p_surface; + + if (ginstance->data->dirty_dependencies) { + storage->base_update_dependency(p_mesh, &ginstance->data->dependency_tracker); + } + + //shadow + sdcache->shader_shadow = material_shadow->shader_data; + sdcache->material_uniform_set_shadow = material_shadow->uniform_set; + + sdcache->surface_shadow = surface_shadow ? surface_shadow : sdcache->surface; + + sdcache->owner = ginstance; + + sdcache->next = ginstance->surface_caches; + ginstance->surface_caches = sdcache; + + //sortkey + + sdcache->sort.sort_key1 = 0; + sdcache->sort.sort_key2 = 0; + + sdcache->sort.surface_index = p_surface; + sdcache->sort.material_id_low = p_material_id & 0x3FFF; + sdcache->sort.material_id_hi = p_material_id >> 14; + sdcache->sort.shader_id = p_shader_id; + sdcache->sort.geometry_id = p_mesh.get_local_index(); //only meshes can repeat anyway + sdcache->sort.uses_forward_gi = ginstance->can_sdfgi; + sdcache->sort.priority = p_material->priority; +} + +void RendererSceneRenderForward::_geometry_instance_add_surface(GeometryInstanceForward *ginstance, uint32_t p_surface, RID p_material, RID p_mesh) { + RID m_src; + + m_src = ginstance->data->material_override.is_valid() ? ginstance->data->material_override : p_material; + + MaterialData *material = nullptr; + + if (m_src.is_valid()) { + material = (MaterialData *)storage->material_get_data(m_src, RendererStorageRD::SHADER_TYPE_3D); + if (!material || !material->shader_data->valid) { + material = nullptr; + } + } + + if (material) { + if (ginstance->data->dirty_dependencies) { + storage->material_update_dependency(m_src, &ginstance->data->dependency_tracker); + } + } else { + material = (MaterialData *)storage->material_get_data(default_material, RendererStorageRD::SHADER_TYPE_3D); + m_src = default_material; + } + + ERR_FAIL_COND(!material); + + _geometry_instance_add_surface_with_material(ginstance, p_surface, material, m_src.get_local_index(), storage->material_get_shader_id(m_src), p_mesh); + + while (material->next_pass.is_valid()) { + RID next_pass = material->next_pass; + material = (MaterialData *)storage->material_get_data(next_pass, RendererStorageRD::SHADER_TYPE_3D); + if (!material || !material->shader_data->valid) { + break; + } + if (ginstance->data->dirty_dependencies) { + storage->material_update_dependency(next_pass, &ginstance->data->dependency_tracker); + } + _geometry_instance_add_surface_with_material(ginstance, p_surface, material, next_pass.get_local_index(), storage->material_get_shader_id(next_pass), p_mesh); + } +} + +void RendererSceneRenderForward::_geometry_instance_update(GeometryInstance *p_geometry_instance) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + + if (ginstance->data->dirty_dependencies) { + ginstance->data->dependency_tracker.update_begin(); + } + + //add geometry for drawing + switch (ginstance->data->base_type) { + case RS::INSTANCE_MESH: { + const RID *materials = nullptr; + uint32_t surface_count; + RID mesh = ginstance->data->base; + + materials = storage->mesh_get_surface_count_and_materials(mesh, surface_count); + if (materials) { + //if no materials, no surfaces. + const RID *inst_materials = ginstance->data->surface_materials.ptr(); + uint32_t surf_mat_count = ginstance->data->surface_materials.size(); + + for (uint32_t j = 0; j < surface_count; j++) { + RID material = (j < surf_mat_count && inst_materials[j].is_valid()) ? inst_materials[j] : materials[j]; + _geometry_instance_add_surface(ginstance, j, material, mesh); + } + } + + ginstance->instance_count = 1; + + } break; + + case RS::INSTANCE_MULTIMESH: { + RID mesh = storage->multimesh_get_mesh(ginstance->data->base); + if (mesh.is_valid()) { + const RID *materials = nullptr; + uint32_t surface_count; + + materials = storage->mesh_get_surface_count_and_materials(mesh, surface_count); + if (materials) { + for (uint32_t j = 0; j < surface_count; j++) { + _geometry_instance_add_surface(ginstance, j, materials[j], mesh); + } + } + + ginstance->instance_count = storage->multimesh_get_instances_to_draw(ginstance->data->base); + } + + } break; +#if 0 + case RS::INSTANCE_IMMEDIATE: { + RasterizerStorageGLES3::Immediate *immediate = storage->immediate_owner.getornull(inst->base); + ERR_CONTINUE(!immediate); + + _add_geometry(immediate, inst, nullptr, -1, p_depth_pass, p_shadow_pass); + + } break; +#endif + case RS::INSTANCE_PARTICLES: { + int draw_passes = storage->particles_get_draw_passes(ginstance->data->base); + + for (int j = 0; j < draw_passes; j++) { + RID mesh = storage->particles_get_draw_pass_mesh(ginstance->data->base, j); + if (!mesh.is_valid()) + continue; + + const RID *materials = nullptr; + uint32_t surface_count; + + materials = storage->mesh_get_surface_count_and_materials(mesh, surface_count); + if (materials) { + for (uint32_t k = 0; k < surface_count; k++) { + _geometry_instance_add_surface(ginstance, k, materials[k], mesh); + } + } + } + + ginstance->instance_count = storage->particles_get_amount(ginstance->data->base); + + } break; + + default: { + } + } + + //Fill push constant + + bool store_transform = true; + + if (ginstance->data->base_type == RS::INSTANCE_MULTIMESH) { + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH; + uint32_t stride; + if (storage->multimesh_get_transform_format(ginstance->data->base) == RS::MULTIMESH_TRANSFORM_2D) { + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_FORMAT_2D; + stride = 2; + } else { + stride = 3; + } + if (storage->multimesh_uses_colors(ginstance->data->base)) { + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_COLOR; + stride += 1; + } + if (storage->multimesh_uses_custom_data(ginstance->data->base)) { + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_CUSTOM_DATA; + stride += 1; + } + + ginstance->base_flags |= (stride << INSTANCE_DATA_FLAGS_MULTIMESH_STRIDE_SHIFT); + ginstance->transforms_uniform_set = storage->multimesh_get_3d_uniform_set(ginstance->data->base, default_shader_rd, TRANSFORMS_UNIFORM_SET); + + } else if (ginstance->data->base_type == RS::INSTANCE_PARTICLES) { + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH; + uint32_t stride; + if (false) { // 2D particles + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_FORMAT_2D; + stride = 2; + } else { + stride = 3; + } + + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_COLOR; + stride += 1; + + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_CUSTOM_DATA; + stride += 1; + + ginstance->base_flags |= (stride << INSTANCE_DATA_FLAGS_MULTIMESH_STRIDE_SHIFT); + + if (!storage->particles_is_using_local_coords(ginstance->data->base)) { + store_transform = false; + } + ginstance->transforms_uniform_set = storage->particles_get_instance_buffer_uniform_set(ginstance->data->base, default_shader_rd, TRANSFORMS_UNIFORM_SET); + + } else if (ginstance->data->base_type == RS::INSTANCE_MESH) { + if (storage->skeleton_is_valid(ginstance->data->skeleton)) { + ginstance->base_flags |= INSTANCE_DATA_FLAG_SKELETON; + ginstance->transforms_uniform_set = storage->skeleton_get_3d_uniform_set(ginstance->data->skeleton, default_shader_rd, TRANSFORMS_UNIFORM_SET); + if (ginstance->data->dirty_dependencies) { + storage->skeleton_update_dependency(ginstance->data->skeleton, &ginstance->data->dependency_tracker); + } + } + } + + ginstance->store_transform_cache = store_transform; + ginstance->can_sdfgi = false; + + if (!lightmap_instance_is_valid(ginstance->lightmap_instance) && !low_end) { + if (ginstance->gi_probes[0].is_null() && (ginstance->data->use_baked_light || ginstance->data->use_dynamic_gi)) { + ginstance->can_sdfgi = true; + } + } + + if (ginstance->data->dirty_dependencies) { + ginstance->data->dependency_tracker.update_end(); + ginstance->data->dirty_dependencies = false; + } + + ginstance->dirty_list_element.remove_from_list(); +} + +void RendererSceneRenderForward::_update_dirty_geometry_instances() { + while (geometry_instance_dirty_list.first()) { + _geometry_instance_update(geometry_instance_dirty_list.first()->self()); + } +} + +void RendererSceneRenderForward::_geometry_instance_dependency_changed(RendererStorage::DependencyChangedNotification p_notification, RendererStorage::DependencyTracker *p_tracker) { + switch (p_notification) { + case RendererStorage::DEPENDENCY_CHANGED_MATERIAL: + case RendererStorage::DEPENDENCY_CHANGED_MESH: + case RendererStorage::DEPENDENCY_CHANGED_MULTIMESH: + case RendererStorage::DEPENDENCY_CHANGED_SKELETON_DATA: { + static_cast<RendererSceneRenderForward *>(singleton)->_geometry_instance_mark_dirty(static_cast<GeometryInstance *>(p_tracker->userdata)); + } break; + case RendererStorage::DEPENDENCY_CHANGED_MULTIMESH_VISIBLE_INSTANCES: { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_tracker->userdata); + if (ginstance->data->base_type == RS::INSTANCE_MULTIMESH) { + ginstance->instance_count = static_cast<RendererSceneRenderForward *>(singleton)->storage->multimesh_get_instances_to_draw(ginstance->data->base); + } + } break; + default: { + //rest of notifications of no interest + } break; + } +} +void RendererSceneRenderForward::_geometry_instance_dependency_deleted(const RID &p_dependency, RendererStorage::DependencyTracker *p_tracker) { + static_cast<RendererSceneRenderForward *>(singleton)->_geometry_instance_mark_dirty(static_cast<GeometryInstance *>(p_tracker->userdata)); +} + +RendererSceneRender::GeometryInstance *RendererSceneRenderForward::geometry_instance_create(RID p_base) { + RS::InstanceType type = storage->get_base_type(p_base); + ERR_FAIL_COND_V(!((1 << type) & RS::INSTANCE_GEOMETRY_MASK), nullptr); + + GeometryInstanceForward *ginstance = geometry_instance_alloc.alloc(); + ginstance->data = memnew(GeometryInstanceForward::Data); + + ginstance->data->base = p_base; + ginstance->data->base_type = type; + ginstance->data->dependency_tracker.userdata = ginstance; + ginstance->data->dependency_tracker.changed_callback = _geometry_instance_dependency_changed; + ginstance->data->dependency_tracker.deleted_callback = _geometry_instance_dependency_deleted; + + _geometry_instance_mark_dirty(ginstance); + + return ginstance; +} +void RendererSceneRenderForward::geometry_instance_set_skeleton(GeometryInstance *p_geometry_instance, RID p_skeleton) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->data->skeleton = p_skeleton; + _geometry_instance_mark_dirty(ginstance); + ginstance->data->dirty_dependencies = true; +} +void RendererSceneRenderForward::geometry_instance_set_material_override(GeometryInstance *p_geometry_instance, RID p_override) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->data->material_override = p_override; + _geometry_instance_mark_dirty(ginstance); + ginstance->data->dirty_dependencies = true; +} +void RendererSceneRenderForward::geometry_instance_set_surface_materials(GeometryInstance *p_geometry_instance, const Vector<RID> &p_materials) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->data->surface_materials = p_materials; + _geometry_instance_mark_dirty(ginstance); + ginstance->data->dirty_dependencies = true; +} +void RendererSceneRenderForward::geometry_instance_set_mesh_instance(GeometryInstance *p_geometry_instance, RID p_mesh_instance) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->mesh_instance = p_mesh_instance; + _geometry_instance_mark_dirty(ginstance); +} +void RendererSceneRenderForward::geometry_instance_set_transform(GeometryInstance *p_geometry_instance, const Transform &p_transform, const AABB &p_aabb, const AABB &p_transformed_aabb) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->transform = p_transform; + ginstance->mirror = p_transform.basis.determinant() < 0; + ginstance->data->aabb = p_aabb; + ginstance->transformed_aabb = p_transformed_aabb; + + Vector3 model_scale_vec = p_transform.basis.get_scale_abs(); + // handle non uniform scale here + + float max_scale = MAX(model_scale_vec.x, MAX(model_scale_vec.y, model_scale_vec.z)); + float min_scale = MIN(model_scale_vec.x, MIN(model_scale_vec.y, model_scale_vec.z)); + ginstance->non_uniform_scale = max_scale >= 0.0 && (min_scale / max_scale) < 0.9; + + ginstance->lod_model_scale = max_scale; +} +void RendererSceneRenderForward::geometry_instance_set_lod_bias(GeometryInstance *p_geometry_instance, float p_lod_bias) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->lod_bias = p_lod_bias; +} +void RendererSceneRenderForward::geometry_instance_set_use_baked_light(GeometryInstance *p_geometry_instance, bool p_enable) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->data->use_baked_light = p_enable; + _geometry_instance_mark_dirty(ginstance); +} +void RendererSceneRenderForward::geometry_instance_set_use_dynamic_gi(GeometryInstance *p_geometry_instance, bool p_enable) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->data->use_dynamic_gi = p_enable; + _geometry_instance_mark_dirty(ginstance); +} +void RendererSceneRenderForward::geometry_instance_set_use_lightmap(GeometryInstance *p_geometry_instance, RID p_lightmap_instance, const Rect2 &p_lightmap_uv_scale, int p_lightmap_slice_index) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->lightmap_instance = p_lightmap_instance; + ginstance->lightmap_uv_scale = p_lightmap_uv_scale; + ginstance->lightmap_slice_index = p_lightmap_slice_index; + _geometry_instance_mark_dirty(ginstance); +} +void RendererSceneRenderForward::geometry_instance_set_lightmap_capture(GeometryInstance *p_geometry_instance, const Color *p_sh9) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + if (p_sh9) { + if (ginstance->lightmap_sh == nullptr) { + ginstance->lightmap_sh = geometry_instance_lightmap_sh.alloc(); + } + + copymem(ginstance->lightmap_sh->sh, p_sh9, sizeof(Color) * 9); + } else { + if (ginstance->lightmap_sh != nullptr) { + geometry_instance_lightmap_sh.free(ginstance->lightmap_sh); + ginstance->lightmap_sh = nullptr; + } + } + _geometry_instance_mark_dirty(ginstance); +} +void RendererSceneRenderForward::geometry_instance_set_instance_shader_parameters_offset(GeometryInstance *p_geometry_instance, int32_t p_offset) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->shader_parameters_offset = p_offset; + _geometry_instance_mark_dirty(ginstance); +} +void RendererSceneRenderForward::geometry_instance_set_cast_double_sided_shadows(GeometryInstance *p_geometry_instance, bool p_enable) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + + ginstance->data->cast_double_sided_shaodows = p_enable; + _geometry_instance_mark_dirty(ginstance); +} + +void RendererSceneRenderForward::geometry_instance_set_layer_mask(GeometryInstance *p_geometry_instance, uint32_t p_layer_mask) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->layer_mask = p_layer_mask; +} + +void RendererSceneRenderForward::geometry_instance_free(GeometryInstance *p_geometry_instance) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + if (ginstance->lightmap_sh != nullptr) { + geometry_instance_lightmap_sh.free(ginstance->lightmap_sh); + } + GeometryInstanceSurfaceDataCache *surf = ginstance->surface_caches; + while (surf) { + GeometryInstanceSurfaceDataCache *next = surf->next; + geometry_instance_surface_alloc.free(surf); + surf = next; + } + memdelete(ginstance->data); + geometry_instance_alloc.free(ginstance); +} + +uint32_t RendererSceneRenderForward::geometry_instance_get_pair_mask() { + return (1 << RS::INSTANCE_GI_PROBE); +} +void RendererSceneRenderForward::geometry_instance_pair_light_instances(GeometryInstance *p_geometry_instance, const RID *p_light_instances, uint32_t p_light_instance_count) { +} +void RendererSceneRenderForward::geometry_instance_pair_reflection_probe_instances(GeometryInstance *p_geometry_instance, const RID *p_reflection_probe_instances, uint32_t p_reflection_probe_instance_count) { +} +void RendererSceneRenderForward::geometry_instance_pair_decal_instances(GeometryInstance *p_geometry_instance, const RID *p_decal_instances, uint32_t p_decal_instance_count) { +} + +Transform RendererSceneRenderForward::geometry_instance_get_transform(GeometryInstance *p_instance) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_instance); + ERR_FAIL_COND_V(!ginstance, Transform()); + return ginstance->transform; +} +AABB RendererSceneRenderForward::geometry_instance_get_aabb(GeometryInstance *p_instance) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_instance); + ERR_FAIL_COND_V(!ginstance, AABB()); + return ginstance->data->aabb; +} + +void RendererSceneRenderForward::geometry_instance_pair_gi_probe_instances(GeometryInstance *p_geometry_instance, const RID *p_gi_probe_instances, uint32_t p_gi_probe_instance_count) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + if (p_gi_probe_instance_count > 0) { + ginstance->gi_probes[0] = p_gi_probe_instances[0]; + } else { + ginstance->gi_probes[0] = RID(); + } + + if (p_gi_probe_instance_count > 1) { + ginstance->gi_probes[1] = p_gi_probe_instances[1]; + } else { + ginstance->gi_probes[1] = RID(); + } } RendererSceneRenderForward::RendererSceneRenderForward(RendererStorageRD *p_storage) : @@ -2788,11 +3351,10 @@ RendererSceneRenderForward::RendererSceneRenderForward(RendererStorageRD *p_stor { //lightmaps - scene_state.max_lightmaps = storage->lightmap_array_get_size(); + scene_state.max_lightmaps = low_end ? 2 : MAX_LIGHTMAPS; defines += "\n#define MAX_LIGHTMAP_TEXTURES " + itos(scene_state.max_lightmaps) + "\n"; defines += "\n#define MAX_LIGHTMAPS " + itos(scene_state.max_lightmaps) + "\n"; - scene_state.lightmaps = memnew_arr(LightmapData, scene_state.max_lightmaps); scene_state.lightmap_buffer = RD::get_singleton()->storage_buffer_create(sizeof(LightmapData) * scene_state.max_lightmaps); } { @@ -3005,24 +3567,11 @@ RendererSceneRenderForward::RendererSceneRenderForward(RendererStorageRD *p_stor actions.default_filter = ShaderLanguage::FILTER_LINEAR_MIPMAP; actions.default_repeat = ShaderLanguage::REPEAT_ENABLE; actions.global_buffer_array_variable = "global_variables.data"; - actions.instance_uniform_index_variable = "instances.data[instance_index].instance_uniforms_ofs"; + actions.instance_uniform_index_variable = "draw_call.instance_uniforms_ofs"; shader.compiler.initialize(actions); } - //render list - render_list.max_elements = GLOBAL_DEF_RST("rendering/limits/rendering/max_renderable_elements", (int)128000); - render_list.init(); - render_pass = 0; - - { - scene_state.max_instances = render_list.max_elements; - scene_state.instances = memnew_arr(InstanceData, scene_state.max_instances); - scene_state.instance_buffer = RD::get_singleton()->storage_buffer_create(sizeof(InstanceData) * scene_state.max_instances); - } - - scene_state.uniform_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(SceneState::UBO)); - { //default material and shader default_shader = storage->shader_create(); @@ -3068,14 +3617,18 @@ RendererSceneRenderForward::RendererSceneRenderForward(RendererStorageRD *p_stor sampler.compare_op = RD::COMPARE_OP_LESS; shadow_sampler = RD::get_singleton()->sampler_create(sampler); } + + render_list_thread_threshold = GLOBAL_GET("rendering/forward_renderer/threaded_render_minimum_instances"); } RendererSceneRenderForward::~RendererSceneRenderForward() { directional_shadow_atlas_set_size(0); //clear base uniform set if still valid - if (render_pass_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(render_pass_uniform_set)) { - RD::get_singleton()->free(render_pass_uniform_set); + for (uint32_t i = 0; i < render_pass_uniform_sets.size(); i++) { + if (render_pass_uniform_sets[i].is_valid() && RD::get_singleton()->uniform_set_is_valid(render_pass_uniform_sets[i])) { + RD::get_singleton()->free(render_pass_uniform_sets[i]); + } } if (sdfgi_pass_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(sdfgi_pass_uniform_set)) { @@ -3094,12 +3647,16 @@ RendererSceneRenderForward::~RendererSceneRenderForward() { storage->free(default_material); { - RD::get_singleton()->free(scene_state.uniform_buffer); - RD::get_singleton()->free(scene_state.instance_buffer); + for (uint32_t i = 0; i < scene_state.uniform_buffers.size(); i++) { + RD::get_singleton()->free(scene_state.uniform_buffers[i]); + } RD::get_singleton()->free(scene_state.lightmap_buffer); RD::get_singleton()->free(scene_state.lightmap_capture_buffer); - memdelete_arr(scene_state.instances); - memdelete_arr(scene_state.lightmaps); + for (uint32_t i = 0; i < RENDER_LIST_MAX; i++) { + if (scene_state.instance_buffer[i] != RID()) { + RD::get_singleton()->free(scene_state.instance_buffer[i]); + } + } memdelete_arr(scene_state.lightmap_captures); } diff --git a/servers/rendering/renderer_rd/renderer_scene_render_forward.h b/servers/rendering/renderer_rd/renderer_scene_render_forward.h index 4b37f4a391..af78c50fda 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_forward.h +++ b/servers/rendering/renderer_rd/renderer_scene_render_forward.h @@ -31,6 +31,7 @@ #ifndef RENDERING_SERVER_SCENE_RENDER_FORWARD_H #define RENDERING_SERVER_SCENE_RENDER_FORWARD_H +#include "core/templates/paged_allocator.h" #include "servers/rendering/renderer_rd/pipeline_cache_rd.h" #include "servers/rendering/renderer_rd/renderer_scene_render_rd.h" #include "servers/rendering/renderer_rd/renderer_storage_rd.h" @@ -46,7 +47,18 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { enum { SDFGI_MAX_CASCADES = 8, - MAX_GI_PROBES = 8 + MAX_GI_PROBES = 8, + MAX_LIGHTMAPS = 8, + MAX_GI_PROBES_PER_INSTANCE = 2, + INSTANCE_DATA_BUFFER_MIN_SIZE = 4096 + }; + + enum RenderListType { + RENDER_LIST_OPAQUE, //used for opaque objects + RENDER_LIST_ALPHA, //used for transparent objects + RENDER_LIST_SECONDARY, //used for shadows and other objects + RENDER_LIST_MAX + }; /* Scene Shader */ @@ -166,6 +178,8 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { virtual bool is_animated() const; virtual bool casts_shadows() const; virtual Variant get_default_parameter(const StringName &p_parameter) const; + virtual RS::ShaderNativeSourceCode get_native_source_code() const; + ShaderData(); virtual ~ShaderData(); }; @@ -197,14 +211,6 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { return static_cast<RendererSceneRenderForward *>(singleton)->_create_material_func(static_cast<ShaderData *>(p_shader)); } - /* Push Constant */ - - struct PushConstant { - uint32_t index; - uint32_t pad; - float bake_uv2_offset[2]; - }; - /* Framebuffer */ struct RenderBufferDataForward : public RenderBufferData { @@ -216,9 +222,6 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { RID normal_roughness_buffer; RID giprobe_buffer; - RID ambient_buffer; - RID reflection_buffer; - RS::ViewportMSAA msaa; RD::TextureSamples texture_samples; @@ -239,7 +242,6 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { RID render_sdfgi_uniform_set; void ensure_specular(); - void ensure_gi(); void ensure_giprobe(); void clear(); virtual void configure(RID p_color_buffer, RID p_depth_buffer, int p_width, int p_height, RS::ViewportMSAA p_msaa); @@ -252,7 +254,7 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { RID shadow_sampler; RID render_base_uniform_set; - RID render_pass_uniform_set; + LocalVector<RID> render_pass_uniform_sets; RID sdfgi_pass_uniform_set; uint64_t lightmap_texture_array_version = 0xFFFFFFFF; @@ -261,12 +263,61 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { void _render_buffers_clear_uniform_set(RenderBufferDataForward *rb); virtual void _render_buffers_uniform_set_changed(RID p_render_buffers); virtual RID _render_buffers_get_normal_texture(RID p_render_buffers); - virtual RID _render_buffers_get_ambient_texture(RID p_render_buffers); - virtual RID _render_buffers_get_reflection_texture(RID p_render_buffers); void _update_render_base_uniform_set(); RID _setup_sdfgi_render_pass_uniform_set(RID p_albedo_texture, RID p_emission_texture, RID p_emission_aniso_texture, RID p_geom_facing_texture); - RID _setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, const PagedArray<RID> &p_gi_probes); + RID _setup_render_pass_uniform_set(RenderListType p_render_list, RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, bool p_use_directional_shadow_atlas = false, int p_index = 0); + + enum PassMode { + PASS_MODE_COLOR, + PASS_MODE_COLOR_SPECULAR, + PASS_MODE_COLOR_TRANSPARENT, + PASS_MODE_SHADOW, + PASS_MODE_SHADOW_DP, + PASS_MODE_DEPTH, + PASS_MODE_DEPTH_NORMAL_ROUGHNESS, + PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE, + PASS_MODE_DEPTH_MATERIAL, + PASS_MODE_SDF, + }; + + struct GeometryInstanceSurfaceDataCache; + struct RenderElementInfo; + + struct RenderListParameters { + GeometryInstanceSurfaceDataCache **elements = nullptr; + RenderElementInfo *element_info = nullptr; + int element_count = 0; + bool reverse_cull = false; + PassMode pass_mode = PASS_MODE_COLOR; + bool no_gi = false; + RID render_pass_uniform_set; + bool force_wireframe = false; + Vector2 uv_offset; + Plane lod_plane; + float lod_distance_multiplier = 0.0; + float screen_lod_threshold = 0.0; + RD::FramebufferFormatID framebuffer_format = 0; + uint32_t element_offset = 0; + uint32_t barrier = RD::BARRIER_MASK_ALL; + + RenderListParameters(GeometryInstanceSurfaceDataCache **p_elements, RenderElementInfo *p_element_info, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), const Plane &p_lod_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, uint32_t p_element_offset = 0, uint32_t p_barrier = RD::BARRIER_MASK_ALL) { + elements = p_elements; + element_info = p_element_info; + element_count = p_element_count; + reverse_cull = p_reverse_cull; + pass_mode = p_pass_mode; + no_gi = p_no_gi; + render_pass_uniform_set = p_render_pass_uniform_set; + force_wireframe = p_force_wireframe; + uv_offset = p_uv_offset; + lod_plane = p_lod_plane; + lod_distance_multiplier = p_lod_distance_multiplier; + screen_lod_threshold = p_screen_lod_threshold; + element_offset = p_element_offset; + barrier = p_barrier; + } + }; struct LightmapData { float normal_xform[12]; @@ -292,16 +343,6 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { INSTANCE_DATA_FLAG_SKELETON = 1 << 19, }; - struct InstanceData { - float transform[16]; - float normal_transform[16]; - uint32_t flags; - uint32_t instance_uniforms_ofs; //instance_offset in instancing/skeleton buffer - uint32_t gi_offset; //GI information when using lightmapping (VCT or lightmap) - uint32_t mask; - float lightmap_uv_scale[4]; - }; - struct SceneState { struct UBO { float projection_matrix[16]; @@ -313,6 +354,11 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { float viewport_size[2]; float screen_pixel_size[2]; + uint32_t cluster_shift; + uint32_t cluster_width; + uint32_t cluster_type_size; + uint32_t max_cluster_element_count_div_32; + float directional_penumbra_shadow_kernel[128]; //32 vec4s float directional_soft_shadow_kernel[128]; float penumbra_shadow_kernel[128]; @@ -381,214 +427,340 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { uint32_t pancake_shadows; }; + struct PushConstant { + uint32_t base_index; // + uint32_t uv_offset; //packed + uint32_t pad[2]; + }; + + struct InstanceData { + float transform[16]; + uint32_t flags; + uint32_t instance_uniforms_ofs; //base offset in global buffer for instance variables + uint32_t gi_offset; //GI information when using lightmapping (VCT or lightmap index) + uint32_t layer_mask; + float lightmap_uv_scale[4]; + }; + UBO ubo; - RID uniform_buffer; + LocalVector<RID> uniform_buffers; - LightmapData *lightmaps; + LightmapData lightmaps[MAX_LIGHTMAPS]; + RID lightmap_ids[MAX_LIGHTMAPS]; + bool lightmap_has_sh[MAX_LIGHTMAPS]; + uint32_t lightmaps_used = 0; uint32_t max_lightmaps; RID lightmap_buffer; + RID instance_buffer[RENDER_LIST_MAX]; + uint32_t instance_buffer_size[RENDER_LIST_MAX] = { 0, 0, 0 }; + LocalVector<InstanceData> instance_data[RENDER_LIST_MAX]; + LightmapCaptureData *lightmap_captures; uint32_t max_lightmap_captures; RID lightmap_capture_buffer; - RID instance_buffer; - InstanceData *instances; - uint32_t max_instances; + RID giprobe_ids[MAX_GI_PROBES]; + uint32_t giprobes_used = 0; bool used_screen_texture = false; bool used_normal_texture = false; bool used_depth_texture = false; bool used_sss = false; - uint32_t current_shader_index = 0; - uint32_t current_material_index = 0; + + struct ShadowPass { + uint32_t element_from; + uint32_t element_count; + bool flip_cull; + PassMode pass_mode; + + RID rp_uniform_set; + Plane camera_plane; + float lod_distance_multiplier; + float screen_lod_threshold; + + RID framebuffer; + RD::InitialAction initial_depth_action; + RD::FinalAction final_depth_action; + Rect2i rect; + }; + + LocalVector<ShadowPass> shadow_passes; } scene_state; - /* Render List */ + static RendererSceneRenderForward *singleton; - struct RenderList { - int max_elements; - - struct Element { - RendererSceneRender::InstanceBase *instance; - MaterialData *material; - union { - struct { - //from least significant to most significant in sort, TODO: should be endian swapped on big endian - uint64_t geometry_index : 20; - uint64_t material_index : 15; - uint64_t shader_index : 12; - uint64_t uses_instancing : 1; - uint64_t uses_forward_gi : 1; - uint64_t uses_lightmap : 1; - uint64_t depth_layer : 4; - uint64_t priority : 8; - }; - - uint64_t sort_key; + double time; + RID default_shader; + RID default_material; + RID overdraw_material_shader; + RID overdraw_material; + RID wireframe_material_shader; + RID wireframe_material; + RID default_shader_rd; + RID default_shader_sdfgi_rd; + + RID default_vec4_xform_buffer; + RID default_vec4_xform_uniform_set; + + void _setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false, int p_index = 0); + void _setup_giprobes(const PagedArray<RID> &p_giprobes); + void _setup_lightmaps(const PagedArray<RID> &p_lightmaps, const Transform &p_cam_transform); + + struct RenderElementInfo { + uint32_t repeat : 22; + uint32_t uses_lightmap : 1; + uint32_t uses_forward_gi : 1; + uint32_t lod_index : 8; + }; + + template <PassMode p_pass_mode> + _FORCE_INLINE_ void _render_list_template(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element); + + void _render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element); + + LocalVector<RD::DrawListID> thread_draw_lists; + void _render_list_thread_function(uint32_t p_thread, RenderListParameters *p_params); + void _render_list_with_threads(RenderListParameters *p_params, RID p_framebuffer, RD::InitialAction p_initial_color_action, RD::FinalAction p_final_color_action, RD::InitialAction p_initial_depth_action, RD::FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values = Vector<Color>(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2(), const Vector<RID> &p_storage_textures = Vector<RID>()); + + uint32_t render_list_thread_threshold = 500; + + void _update_instance_data_buffer(RenderListType p_render_list); + void _fill_instance_data(RenderListType p_render_list, uint32_t p_offset = 0, int32_t p_max_elements = -1, bool p_update_buffer = true); + void _fill_render_list(RenderListType p_render_list, const PagedArray<GeometryInstance *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi = false, bool p_using_opaque_gi = false, const Plane &p_lod_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, bool p_append = false); + + Map<Size2i, RID> sdfgi_framebuffer_size_cache; + + struct GeometryInstanceData; + struct GeometryInstanceForward; + + struct GeometryInstanceLightmapSH { + Color sh[9]; + }; + + // Cached data for drawing surfaces + struct GeometryInstanceSurfaceDataCache { + enum { + FLAG_PASS_DEPTH = 1, + FLAG_PASS_OPAQUE = 2, + FLAG_PASS_ALPHA = 4, + FLAG_PASS_SHADOW = 8, + FLAG_USES_SHARED_SHADOW_MATERIAL = 128, + FLAG_USES_SUBSURFACE_SCATTERING = 2048, + FLAG_USES_SCREEN_TEXTURE = 4096, + FLAG_USES_DEPTH_TEXTURE = 8192, + FLAG_USES_NORMAL_TEXTURE = 16384, + FLAG_USES_DOUBLE_SIDED_SHADOWS = 32768, + }; + + union { + struct { + uint64_t lod_index : 8; + uint64_t surface_index : 10; + uint64_t geometry_id : 32; + uint64_t material_id_low : 14; + + uint64_t material_id_hi : 18; + uint64_t shader_id : 32; + uint64_t uses_forward_gi : 1; + uint64_t uses_lightmap : 1; + uint64_t depth_layer : 4; + uint64_t priority : 8; + }; + struct { + uint64_t sort_key1; + uint64_t sort_key2; }; - uint32_t surface_index; + } sort; + + RS::PrimitiveType primitive = RS::PRIMITIVE_MAX; + uint32_t flags = 0; + uint32_t surface_index = 0; + + void *surface = nullptr; + RID material_uniform_set; + ShaderData *shader = nullptr; + + void *surface_shadow = nullptr; + RID material_uniform_set_shadow; + ShaderData *shader_shadow = nullptr; + + GeometryInstanceSurfaceDataCache *next = nullptr; + GeometryInstanceForward *owner = nullptr; + }; + + struct GeometryInstanceForward : public GeometryInstance { + //used during rendering + bool mirror = false; + bool non_uniform_scale = false; + float lod_bias = 0.0; + float lod_model_scale = 1.0; + AABB transformed_aabb; //needed for LOD + float depth = 0; + uint32_t gi_offset_cache = 0; + uint32_t flags_cache = 0; + bool store_transform_cache = true; + int32_t shader_parameters_offset = -1; + uint32_t lightmap_slice_index; + Rect2 lightmap_uv_scale; + uint32_t layer_mask = 1; + RID transforms_uniform_set; + uint32_t instance_count = 0; + RID mesh_instance; + bool can_sdfgi = false; + //used during setup + uint32_t base_flags = 0; + Transform transform; + RID gi_probes[MAX_GI_PROBES_PER_INSTANCE]; + RID lightmap_instance; + GeometryInstanceLightmapSH *lightmap_sh = nullptr; + GeometryInstanceSurfaceDataCache *surface_caches = nullptr; + SelfList<GeometryInstanceForward> dirty_list_element; + + struct Data { + //data used less often goes into regular heap + RID base; + RS::InstanceType base_type; + + RID skeleton; + Vector<RID> surface_materials; + RID material_override; + AABB aabb; + + bool use_dynamic_gi = false; + bool use_baked_light = false; + bool cast_double_sided_shaodows = false; + bool mirror = false; + bool dirty_dependencies = false; + + RendererStorage::DependencyTracker dependency_tracker; }; - Element *base_elements; - Element **elements; + Data *data = nullptr; + + GeometryInstanceForward() : + dirty_list_element(this) {} + }; + + static void _geometry_instance_dependency_changed(RendererStorage::DependencyChangedNotification p_notification, RendererStorage::DependencyTracker *p_tracker); + static void _geometry_instance_dependency_deleted(const RID &p_dependency, RendererStorage::DependencyTracker *p_tracker); - int element_count; - int alpha_element_count; + SelfList<GeometryInstanceForward>::List geometry_instance_dirty_list; + + PagedAllocator<GeometryInstanceForward> geometry_instance_alloc; + PagedAllocator<GeometryInstanceSurfaceDataCache> geometry_instance_surface_alloc; + PagedAllocator<GeometryInstanceLightmapSH> geometry_instance_lightmap_sh; + + void _geometry_instance_add_surface_with_material(GeometryInstanceForward *ginstance, uint32_t p_surface, MaterialData *p_material, uint32_t p_material_id, uint32_t p_shader_id, RID p_mesh); + void _geometry_instance_add_surface(GeometryInstanceForward *ginstance, uint32_t p_surface, RID p_material, RID p_mesh); + void _geometry_instance_mark_dirty(GeometryInstance *p_geometry_instance); + void _geometry_instance_update(GeometryInstance *p_geometry_instance); + void _update_dirty_geometry_instances(); + + bool low_end = false; + + /* Render List */ + + struct RenderList { + LocalVector<GeometryInstanceSurfaceDataCache *> elements; + LocalVector<RenderElementInfo> element_info; void clear() { - element_count = 0; - alpha_element_count = 0; + elements.clear(); + element_info.clear(); } //should eventually be replaced by radix struct SortByKey { - _FORCE_INLINE_ bool operator()(const Element *A, const Element *B) const { - return A->sort_key < B->sort_key; + _FORCE_INLINE_ bool operator()(const GeometryInstanceSurfaceDataCache *A, const GeometryInstanceSurfaceDataCache *B) const { + return (A->sort.sort_key2 == B->sort.sort_key2) ? (A->sort.sort_key1 < B->sort.sort_key1) : (A->sort.sort_key2 < B->sort.sort_key2); } }; - void sort_by_key(bool p_alpha) { - SortArray<Element *, SortByKey> sorter; - if (p_alpha) { - sorter.sort(&elements[max_elements - alpha_element_count], alpha_element_count); - } else { - sorter.sort(elements, element_count); - } + void sort_by_key() { + SortArray<GeometryInstanceSurfaceDataCache *, SortByKey> sorter; + sorter.sort(elements.ptr(), elements.size()); + } + + void sort_by_key_range(uint32_t p_from, uint32_t p_size) { + SortArray<GeometryInstanceSurfaceDataCache *, SortByKey> sorter; + sorter.sort(elements.ptr() + p_from, p_size); } struct SortByDepth { - _FORCE_INLINE_ bool operator()(const Element *A, const Element *B) const { - return A->instance->depth < B->instance->depth; + _FORCE_INLINE_ bool operator()(const GeometryInstanceSurfaceDataCache *A, const GeometryInstanceSurfaceDataCache *B) const { + return (A->owner->depth < B->owner->depth); } }; - void sort_by_depth(bool p_alpha) { //used for shadows + void sort_by_depth() { //used for shadows - SortArray<Element *, SortByDepth> sorter; - if (p_alpha) { - sorter.sort(&elements[max_elements - alpha_element_count], alpha_element_count); - } else { - sorter.sort(elements, element_count); - } + SortArray<GeometryInstanceSurfaceDataCache *, SortByDepth> sorter; + sorter.sort(elements.ptr(), elements.size()); } struct SortByReverseDepthAndPriority { - _FORCE_INLINE_ bool operator()(const Element *A, const Element *B) const { - uint32_t layer_A = uint32_t(A->priority); - uint32_t layer_B = uint32_t(B->priority); - if (layer_A == layer_B) { - return A->instance->depth > B->instance->depth; - } else { - return layer_A < layer_B; - } + _FORCE_INLINE_ bool operator()(const GeometryInstanceSurfaceDataCache *A, const GeometryInstanceSurfaceDataCache *B) const { + return (A->sort.priority == B->sort.priority) ? (A->owner->depth > B->owner->depth) : (A->sort.priority < B->sort.priority); } }; void sort_by_reverse_depth_and_priority(bool p_alpha) { //used for alpha - SortArray<Element *, SortByReverseDepthAndPriority> sorter; - if (p_alpha) { - sorter.sort(&elements[max_elements - alpha_element_count], alpha_element_count); - } else { - sorter.sort(elements, element_count); - } + SortArray<GeometryInstanceSurfaceDataCache *, SortByReverseDepthAndPriority> sorter; + sorter.sort(elements.ptr(), elements.size()); } - _FORCE_INLINE_ Element *add_element() { - if (element_count + alpha_element_count >= max_elements) { - return nullptr; - } - elements[element_count] = &base_elements[element_count]; - return elements[element_count++]; - } - - _FORCE_INLINE_ Element *add_alpha_element() { - if (element_count + alpha_element_count >= max_elements) { - return nullptr; - } - int idx = max_elements - alpha_element_count - 1; - elements[idx] = &base_elements[idx]; - alpha_element_count++; - return elements[idx]; + _FORCE_INLINE_ void add_element(GeometryInstanceSurfaceDataCache *p_element) { + elements.push_back(p_element); } - - void init() { - element_count = 0; - alpha_element_count = 0; - elements = memnew_arr(Element *, max_elements); - base_elements = memnew_arr(Element, max_elements); - for (int i = 0; i < max_elements; i++) { - elements[i] = &base_elements[i]; // assign elements - } - } - - RenderList() { - max_elements = 0; - } - - ~RenderList() { - memdelete_arr(elements); - memdelete_arr(base_elements); - } - }; - - RenderList render_list; - - static RendererSceneRenderForward *singleton; - uint64_t render_pass; - double time; - RID default_shader; - RID default_material; - RID overdraw_material_shader; - RID overdraw_material; - RID wireframe_material_shader; - RID wireframe_material; - RID default_shader_rd; - RID default_shader_sdfgi_rd; - - RID default_vec4_xform_buffer; - RID default_vec4_xform_uniform_set; - - enum PassMode { - PASS_MODE_COLOR, - PASS_MODE_COLOR_SPECULAR, - PASS_MODE_COLOR_TRANSPARENT, - PASS_MODE_SHADOW, - PASS_MODE_SHADOW_DP, - PASS_MODE_DEPTH, - PASS_MODE_DEPTH_NORMAL_ROUGHNESS, - PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE, - PASS_MODE_DEPTH_MATERIAL, - PASS_MODE_SDF, }; - void _setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2 &p_screen_pixel_size, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false); - void _setup_lightmaps(const PagedArray<InstanceBase *> &p_lightmaps, const Transform &p_cam_transform); - - void _fill_instances(RenderList::Element **p_elements, int p_element_count, bool p_for_depth, bool p_has_sdfgi = false, bool p_has_opaque_gi = false); - void _render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderList::Element **p_elements, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), const Plane &p_lod_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0); - _FORCE_INLINE_ void _add_geometry(InstanceBase *p_instance, uint32_t p_surface, RID p_material, PassMode p_pass_mode, uint32_t p_geometry_index, bool p_using_sdfgi = false); - _FORCE_INLINE_ void _add_geometry_with_material(InstanceBase *p_instance, uint32_t p_surface, MaterialData *p_material, RID p_material_rid, PassMode p_pass_mode, uint32_t p_geometry_index, bool p_using_sdfgi = false); + RenderList render_list[RENDER_LIST_MAX]; - void _fill_render_list(const PagedArray<InstanceBase *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi = false); - - Map<Size2i, RID> sdfgi_framebuffer_size_cache; +protected: + virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_lod_threshold); - bool low_end = false; + virtual void _render_shadow_begin(); + virtual void _render_shadow_append(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true); + virtual void _render_shadow_process(); + virtual void _render_shadow_end(uint32_t p_barrier = RD::BARRIER_MASK_ALL); -protected: - virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<InstanceBase *> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_lod_threshold); - virtual void _render_shadow(RID p_framebuffer, const PagedArray<InstanceBase *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0); - virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, RID p_framebuffer, const Rect2i &p_region); - virtual void _render_uv2(const PagedArray<InstanceBase *> &p_instances, RID p_framebuffer, const Rect2i &p_region); - virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<InstanceBase *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture); - virtual void _render_particle_collider_heightfield(RID p_fb, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, const PagedArray<InstanceBase *> &p_instances); + virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region); + virtual void _render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region); + virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<GeometryInstance *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture); + virtual void _render_particle_collider_heightfield(RID p_fb, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, const PagedArray<GeometryInstance *> &p_instances); public: + virtual GeometryInstance *geometry_instance_create(RID p_base); + virtual void geometry_instance_set_skeleton(GeometryInstance *p_geometry_instance, RID p_skeleton); + virtual void geometry_instance_set_material_override(GeometryInstance *p_geometry_instance, RID p_override); + virtual void geometry_instance_set_surface_materials(GeometryInstance *p_geometry_instance, const Vector<RID> &p_materials); + virtual void geometry_instance_set_mesh_instance(GeometryInstance *p_geometry_instance, RID p_mesh_instance); + virtual void geometry_instance_set_transform(GeometryInstance *p_geometry_instance, const Transform &p_transform, const AABB &p_aabb, const AABB &p_transformed_aabb); + virtual void geometry_instance_set_layer_mask(GeometryInstance *p_geometry_instance, uint32_t p_layer_mask); + virtual void geometry_instance_set_lod_bias(GeometryInstance *p_geometry_instance, float p_lod_bias); + virtual void geometry_instance_set_use_baked_light(GeometryInstance *p_geometry_instance, bool p_enable); + virtual void geometry_instance_set_use_dynamic_gi(GeometryInstance *p_geometry_instance, bool p_enable); + virtual void geometry_instance_set_use_lightmap(GeometryInstance *p_geometry_instance, RID p_lightmap_instance, const Rect2 &p_lightmap_uv_scale, int p_lightmap_slice_index); + virtual void geometry_instance_set_lightmap_capture(GeometryInstance *p_geometry_instance, const Color *p_sh9); + virtual void geometry_instance_set_instance_shader_parameters_offset(GeometryInstance *p_geometry_instance, int32_t p_offset); + virtual void geometry_instance_set_cast_double_sided_shadows(GeometryInstance *p_geometry_instance, bool p_enable); + + virtual Transform geometry_instance_get_transform(GeometryInstance *p_instance); + virtual AABB geometry_instance_get_aabb(GeometryInstance *p_instance); + + virtual void geometry_instance_free(GeometryInstance *p_geometry_instance); + + virtual uint32_t geometry_instance_get_pair_mask(); + virtual void geometry_instance_pair_light_instances(GeometryInstance *p_geometry_instance, const RID *p_light_instances, uint32_t p_light_instance_count); + virtual void geometry_instance_pair_reflection_probe_instances(GeometryInstance *p_geometry_instance, const RID *p_reflection_probe_instances, uint32_t p_reflection_probe_instance_count); + virtual void geometry_instance_pair_decal_instances(GeometryInstance *p_geometry_instance, const RID *p_decal_instances, uint32_t p_decal_instance_count); + virtual void geometry_instance_pair_gi_probe_instances(GeometryInstance *p_geometry_instance, const RID *p_gi_probe_instances, uint32_t p_gi_probe_instance_count); + virtual void set_time(double p_time, double p_step); virtual bool free(RID p_rid); diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp index 8e55dea2b1..09d2c032a8 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp @@ -183,13 +183,11 @@ void RendererSceneRenderRD::_create_reflection_importance_sample(ReflectionData void RendererSceneRenderRD::_update_reflection_mipmaps(ReflectionData &rd, int p_start, int p_end) { for (int i = p_start; i < p_end; i++) { - for (int j = 0; j < rd.layers[i].mipmaps.size() - 1; j++) { - for (int k = 0; k < 6; k++) { - RID view = rd.layers[i].mipmaps[j].views[k]; - RID texture = rd.layers[i].mipmaps[j + 1].views[k]; - Size2i size = rd.layers[i].mipmaps[j + 1].size; - storage->get_effects()->make_mipmap(view, texture, size); - } + for (int j = 0; j < rd.layers[i].views.size() - 1; j++) { + RID view = rd.layers[i].views[j]; + RID texture = rd.layers[i].views[j + 1]; + Size2i size = rd.layers[i].mipmaps[j + 1].size; + storage->get_effects()->cubemap_downsample(view, texture, size); } } } @@ -1150,150 +1148,71 @@ void RendererSceneRenderRD::_sdfgi_update_cascades(RID p_render_buffers) { cascade_data[i].pad = 0; } - RD::get_singleton()->buffer_update(rb->sdfgi->cascades_ubo, 0, sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES, cascade_data, true); + RD::get_singleton()->buffer_update(rb->sdfgi->cascades_ubo, 0, sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES, cascade_data, RD::BARRIER_MASK_COMPUTE); } -void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_environment, const Vector<RID> &p_directional_lights, const RID *p_positional_light_instances, uint32_t p_positional_light_count) { +void RendererSceneRenderRD::_sdfgi_update_light(RID p_render_buffers, RID p_environment) { RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(rb == nullptr); if (rb->sdfgi == nullptr) { return; } - Environment *env = environment_owner.getornull(p_environment); - - RENDER_TIMESTAMP(">SDFGI Update Probes"); - - /* Update Cascades UBO */ - _sdfgi_update_cascades(p_render_buffers); - /* Update Dynamic Lights Buffer */ - - RENDER_TIMESTAMP("Update Lights"); - - /* Update dynamic lights */ - - { - int32_t cascade_light_count[SDFGI::MAX_CASCADES]; - - for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { - SDFGI::Cascade &cascade = rb->sdfgi->cascades[i]; - - SDGIShader::Light lights[SDFGI::MAX_DYNAMIC_LIGHTS]; - uint32_t idx = 0; - for (uint32_t j = 0; j < (uint32_t)p_directional_lights.size(); j++) { - if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { - break; - } - - LightInstance *li = light_instance_owner.getornull(p_directional_lights[j]); - ERR_CONTINUE(!li); - - if (storage->light_directional_is_sky_only(li->light)) { - continue; - } - - Vector3 dir = -li->transform.basis.get_axis(Vector3::AXIS_Z); - dir.y *= rb->sdfgi->y_mult; - dir.normalize(); - lights[idx].direction[0] = dir.x; - lights[idx].direction[1] = dir.y; - lights[idx].direction[2] = dir.z; - Color color = storage->light_get_color(li->light); - color = color.to_linear(); - lights[idx].color[0] = color.r; - lights[idx].color[1] = color.g; - lights[idx].color[2] = color.b; - lights[idx].type = RS::LIGHT_DIRECTIONAL; - lights[idx].energy = storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY); - lights[idx].has_shadow = storage->light_has_shadow(li->light); - idx++; - } + RD::get_singleton()->draw_command_begin_label("SDFGI Update dynamic Light"); - AABB cascade_aabb; - cascade_aabb.position = Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + cascade.position)) * cascade.cell_size; - cascade_aabb.size = Vector3(1, 1, 1) * rb->sdfgi->cascade_size * cascade.cell_size; + /* Update dynamic light */ - for (uint32_t j = 0; j < p_positional_light_count; j++) { - if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { - break; - } - - LightInstance *li = light_instance_owner.getornull(p_positional_light_instances[j]); - ERR_CONTINUE(!li); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.direct_light_pipeline[SDGIShader::DIRECT_LIGHT_MODE_DYNAMIC]); - uint32_t max_sdfgi_cascade = storage->light_get_max_sdfgi_cascade(li->light); - if (i > max_sdfgi_cascade) { - continue; - } + SDGIShader::DirectLightPushConstant push_constant; - if (!cascade_aabb.intersects(li->aabb)) { - continue; - } + push_constant.grid_size[0] = rb->sdfgi->cascade_size; + push_constant.grid_size[1] = rb->sdfgi->cascade_size; + push_constant.grid_size[2] = rb->sdfgi->cascade_size; + push_constant.max_cascades = rb->sdfgi->cascades.size(); + push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; + push_constant.multibounce = rb->sdfgi->uses_multibounce; + push_constant.y_mult = rb->sdfgi->y_mult; - Vector3 dir = -li->transform.basis.get_axis(Vector3::AXIS_Z); - //faster to not do this here - //dir.y *= rb->sdfgi->y_mult; - //dir.normalize(); - lights[idx].direction[0] = dir.x; - lights[idx].direction[1] = dir.y; - lights[idx].direction[2] = dir.z; - Vector3 pos = li->transform.origin; - pos.y *= rb->sdfgi->y_mult; - lights[idx].position[0] = pos.x; - lights[idx].position[1] = pos.y; - lights[idx].position[2] = pos.z; - Color color = storage->light_get_color(li->light); - color = color.to_linear(); - lights[idx].color[0] = color.r; - lights[idx].color[1] = color.g; - lights[idx].color[2] = color.b; - lights[idx].type = storage->light_get_type(li->light); - lights[idx].energy = storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY); - lights[idx].has_shadow = storage->light_has_shadow(li->light); - lights[idx].attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_ATTENUATION); - lights[idx].radius = storage->light_get_param(li->light, RS::LIGHT_PARAM_RANGE); - lights[idx].spot_angle = Math::deg2rad(storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ANGLE)); - lights[idx].spot_attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ATTENUATION); + for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { + SDFGI::Cascade &cascade = rb->sdfgi->cascades[i]; + push_constant.light_count = rb->sdfgi->cascade_dynamic_light_count[i]; + push_constant.cascade = i; - idx++; - } + if (rb->sdfgi->cascades[i].all_dynamic_lights_dirty || sdfgi_frames_to_update_light == RS::ENV_SDFGI_UPDATE_LIGHT_IN_1_FRAME) { + push_constant.process_offset = 0; + push_constant.process_increment = 1; + } else { + static uint32_t frames_to_update_table[RS::ENV_SDFGI_UPDATE_LIGHT_MAX] = { + 1, 2, 4, 8, 16 + }; - if (idx > 0) { - RD::get_singleton()->buffer_update(cascade.lights_buffer, 0, idx * sizeof(SDGIShader::Light), lights, true); - } + uint32_t frames_to_update = frames_to_update_table[sdfgi_frames_to_update_light]; - cascade_light_count[i] = idx; + push_constant.process_offset = RSG::rasterizer->get_frame_number() % frames_to_update; + push_constant.process_increment = frames_to_update; } + rb->sdfgi->cascades[i].all_dynamic_lights_dirty = false; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.direct_light_pipeline[SDGIShader::DIRECT_LIGHT_MODE_DYNAMIC]); - - SDGIShader::DirectLightPushConstant push_constant; - - push_constant.grid_size[0] = rb->sdfgi->cascade_size; - push_constant.grid_size[1] = rb->sdfgi->cascade_size; - push_constant.grid_size[2] = rb->sdfgi->cascade_size; - push_constant.max_cascades = rb->sdfgi->cascades.size(); - push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; - push_constant.multibounce = rb->sdfgi->uses_multibounce; - push_constant.y_mult = rb->sdfgi->y_mult; - - push_constant.process_offset = 0; - push_constant.process_increment = 1; - - for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { - SDFGI::Cascade &cascade = rb->sdfgi->cascades[i]; - push_constant.light_count = cascade_light_count[i]; - push_constant.cascade = i; + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascade.sdf_direct_light_uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::DirectLightPushConstant)); + RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cascade.solid_cell_dispatch_buffer, 0); + } + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->draw_command_end_label(); +} - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascade.sdf_direct_light_uniform_set, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::DirectLightPushConstant)); - RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cascade.solid_cell_dispatch_buffer, 0); - } - RD::get_singleton()->compute_list_end(); +void RendererSceneRenderRD::_sdfgi_update_probes(RID p_render_buffers, RID p_environment) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(rb == nullptr); + if (rb->sdfgi == nullptr) { + return; } - RENDER_TIMESTAMP("Raytrace"); + RD::get_singleton()->draw_command_begin_label("SDFGI Update Probes"); + + Environment *env = environment_owner.getornull(p_environment); SDGIShader::IntegratePushConstant push_constant; push_constant.grid_size[1] = rb->sdfgi->cascade_size; @@ -1303,7 +1222,7 @@ void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_envi push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; push_constant.history_index = rb->sdfgi->render_pass % rb->sdfgi->history_size; push_constant.history_size = rb->sdfgi->history_size; - static const uint32_t ray_count[RS::ENV_SDFGI_RAY_COUNT_MAX] = { 8, 16, 32, 64, 96, 128 }; + static const uint32_t ray_count[RS::ENV_SDFGI_RAY_COUNT_MAX] = { 4, 8, 16, 32, 64, 96, 128 }; push_constant.ray_count = ray_count[sdfgi_ray_count]; push_constant.ray_bias = rb->sdfgi->probe_bias; push_constant.image_size[0] = rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count; @@ -1362,7 +1281,7 @@ void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_envi rb->sdfgi->render_pass++; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(true); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.integrate_pipeline[SDGIShader::INTEGRATE_MODE_PROCESS]); int32_t probe_divisor = rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR; @@ -1376,14 +1295,47 @@ void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_envi RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sky_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1); + } + + //end later after raster to avoid barriering on layout changes + //RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); + + RD::get_singleton()->draw_command_end_label(); +} + +void RendererSceneRenderRD::_sdfgi_store_probes(RID p_render_buffers) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(rb == nullptr); + if (rb->sdfgi == nullptr) { + return; } - RD::get_singleton()->compute_list_add_barrier(compute_list); //wait until done + RD::get_singleton()->barrier(RD::BARRIER_MASK_COMPUTE, RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->draw_command_begin_label("SDFGI Store Probes"); + + SDGIShader::IntegratePushConstant push_constant; + push_constant.grid_size[1] = rb->sdfgi->cascade_size; + push_constant.grid_size[2] = rb->sdfgi->cascade_size; + push_constant.grid_size[0] = rb->sdfgi->cascade_size; + push_constant.max_cascades = rb->sdfgi->cascades.size(); + push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; + push_constant.history_index = rb->sdfgi->render_pass % rb->sdfgi->history_size; + push_constant.history_size = rb->sdfgi->history_size; + static const uint32_t ray_count[RS::ENV_SDFGI_RAY_COUNT_MAX] = { 4, 8, 16, 32, 64, 96, 128 }; + push_constant.ray_count = ray_count[sdfgi_ray_count]; + push_constant.ray_bias = rb->sdfgi->probe_bias; + push_constant.image_size[0] = rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count; + push_constant.image_size[1] = rb->sdfgi->probe_axis_count; + push_constant.store_ambient_texture = false; + + push_constant.sky_mode = 0; + push_constant.y_mult = rb->sdfgi->y_mult; // Then store values into the lightprobe texture. Separating these steps has a small performance hit, but it allows for multiple bounces RENDER_TIMESTAMP("Average Probes"); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.integrate_pipeline[SDGIShader::INTEGRATE_MODE_STORE]); //convert to octahedral to store @@ -1393,20 +1345,22 @@ void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_envi for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { push_constant.cascade = i; RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[i].integrate_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdfgi_shader.integrate_default_sky_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1); } - RD::get_singleton()->compute_list_end(); + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); - RENDER_TIMESTAMP("<SDFGI Update Probes"); + RD::get_singleton()->draw_command_end_label(); } - void RendererSceneRenderRD::_setup_giprobes(RID p_render_buffers, const Transform &p_transform, const PagedArray<RID> &p_gi_probes, uint32_t &r_gi_probes_used) { r_gi_probes_used = 0; RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(rb == nullptr); + RD::get_singleton()->draw_command_begin_label("GIProbes Setup"); + RID gi_probe_buffer = render_buffers_get_gi_probe_buffer(p_render_buffers); GI::GIProbeData gi_probe_data[RenderBuffers::MAX_GIPROBES]; @@ -1490,56 +1444,25 @@ void RendererSceneRenderRD::_setup_giprobes(RID p_render_buffers, const Transfor } if (p_gi_probes.size() > 0) { - RD::get_singleton()->buffer_update(gi_probe_buffer, 0, sizeof(GI::GIProbeData) * MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()), gi_probe_data, true); + RD::get_singleton()->buffer_update(gi_probe_buffer, 0, sizeof(GI::GIProbeData) * MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()), gi_probe_data, RD::BARRIER_MASK_COMPUTE); } -} -void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_ambient_buffer, RID p_reflection_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, const PagedArray<RID> &p_gi_probes) { - RENDER_TIMESTAMP("Render GI"); + RD::get_singleton()->draw_command_end_label(); +} +void RendererSceneRenderRD::_pre_process_gi(RID p_render_buffers, const Transform &p_transform) { + // Do the required buffer transfers and setup before the depth-pre pass, this way GI can + // run in parallel during depth-pre pass and shadow rendering. RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(rb == nullptr); - Environment *env = environment_owner.getornull(p_environment); - GI::PushConstant push_constant; - - push_constant.screen_size[0] = rb->width; - push_constant.screen_size[1] = rb->height; - push_constant.z_near = p_projection.get_z_near(); - push_constant.z_far = p_projection.get_z_far(); - push_constant.orthogonal = p_projection.is_orthogonal(); - push_constant.proj_info[0] = -2.0f / (rb->width * p_projection.matrix[0][0]); - push_constant.proj_info[1] = -2.0f / (rb->height * p_projection.matrix[1][1]); - push_constant.proj_info[2] = (1.0f - p_projection.matrix[0][2]) / p_projection.matrix[0][0]; - push_constant.proj_info[3] = (1.0f + p_projection.matrix[1][2]) / p_projection.matrix[1][1]; - push_constant.max_giprobes = MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()); - push_constant.high_quality_vct = gi_probe_quality == RS::GI_PROBE_QUALITY_HIGH; - push_constant.use_sdfgi = rb->sdfgi != nullptr; + /* Update Cascades UBO */ - if (env) { - push_constant.ao_color[0] = env->ao_color.r; - push_constant.ao_color[1] = env->ao_color.g; - push_constant.ao_color[2] = env->ao_color.b; - } else { - push_constant.ao_color[0] = 0; - push_constant.ao_color[1] = 0; - push_constant.ao_color[2] = 0; - } + if (rb->sdfgi) { + /* Update general SDFGI Buffer */ - push_constant.cam_rotation[0] = p_transform.basis[0][0]; - push_constant.cam_rotation[1] = p_transform.basis[1][0]; - push_constant.cam_rotation[2] = p_transform.basis[2][0]; - push_constant.cam_rotation[3] = 0; - push_constant.cam_rotation[4] = p_transform.basis[0][1]; - push_constant.cam_rotation[5] = p_transform.basis[1][1]; - push_constant.cam_rotation[6] = p_transform.basis[2][1]; - push_constant.cam_rotation[7] = 0; - push_constant.cam_rotation[8] = p_transform.basis[0][2]; - push_constant.cam_rotation[9] = p_transform.basis[1][2]; - push_constant.cam_rotation[10] = p_transform.basis[2][2]; - push_constant.cam_rotation[11] = 0; + _sdfgi_update_cascades(p_render_buffers); - if (rb->sdfgi) { GI::SDFGIData sdfgi_data; sdfgi_data.grid_size[0] = rb->sdfgi->cascade_size; @@ -1606,9 +1529,172 @@ void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_rough c.to_cell = 1.0 / rb->sdfgi->cascades[i].cell_size; } - RD::get_singleton()->buffer_update(gi.sdfgi_ubo, 0, sizeof(GI::SDFGIData), &sdfgi_data, true); + RD::get_singleton()->buffer_update(gi.sdfgi_ubo, 0, sizeof(GI::SDFGIData), &sdfgi_data, RD::BARRIER_MASK_COMPUTE); + + /* Update dynamic lights in SDFGI cascades */ + + for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { + SDFGI::Cascade &cascade = rb->sdfgi->cascades[i]; + + SDGIShader::Light lights[SDFGI::MAX_DYNAMIC_LIGHTS]; + uint32_t idx = 0; + for (uint32_t j = 0; j < (uint32_t)render_state.sdfgi_update_data->directional_lights->size(); j++) { + if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { + break; + } + + LightInstance *li = light_instance_owner.getornull(render_state.sdfgi_update_data->directional_lights->get(j)); + ERR_CONTINUE(!li); + + if (storage->light_directional_is_sky_only(li->light)) { + continue; + } + + Vector3 dir = -li->transform.basis.get_axis(Vector3::AXIS_Z); + dir.y *= rb->sdfgi->y_mult; + dir.normalize(); + lights[idx].direction[0] = dir.x; + lights[idx].direction[1] = dir.y; + lights[idx].direction[2] = dir.z; + Color color = storage->light_get_color(li->light); + color = color.to_linear(); + lights[idx].color[0] = color.r; + lights[idx].color[1] = color.g; + lights[idx].color[2] = color.b; + lights[idx].type = RS::LIGHT_DIRECTIONAL; + lights[idx].energy = storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY); + lights[idx].has_shadow = storage->light_has_shadow(li->light); + + idx++; + } + + AABB cascade_aabb; + cascade_aabb.position = Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + cascade.position)) * cascade.cell_size; + cascade_aabb.size = Vector3(1, 1, 1) * rb->sdfgi->cascade_size * cascade.cell_size; + + for (uint32_t j = 0; j < render_state.sdfgi_update_data->positional_light_count; j++) { + if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { + break; + } + + LightInstance *li = light_instance_owner.getornull(render_state.sdfgi_update_data->positional_light_instances[j]); + ERR_CONTINUE(!li); + + uint32_t max_sdfgi_cascade = storage->light_get_max_sdfgi_cascade(li->light); + if (i > max_sdfgi_cascade) { + continue; + } + + if (!cascade_aabb.intersects(li->aabb)) { + continue; + } + + Vector3 dir = -li->transform.basis.get_axis(Vector3::AXIS_Z); + //faster to not do this here + //dir.y *= rb->sdfgi->y_mult; + //dir.normalize(); + lights[idx].direction[0] = dir.x; + lights[idx].direction[1] = dir.y; + lights[idx].direction[2] = dir.z; + Vector3 pos = li->transform.origin; + pos.y *= rb->sdfgi->y_mult; + lights[idx].position[0] = pos.x; + lights[idx].position[1] = pos.y; + lights[idx].position[2] = pos.z; + Color color = storage->light_get_color(li->light); + color = color.to_linear(); + lights[idx].color[0] = color.r; + lights[idx].color[1] = color.g; + lights[idx].color[2] = color.b; + lights[idx].type = storage->light_get_type(li->light); + lights[idx].energy = storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY); + lights[idx].has_shadow = storage->light_has_shadow(li->light); + lights[idx].attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_ATTENUATION); + lights[idx].radius = storage->light_get_param(li->light, RS::LIGHT_PARAM_RANGE); + lights[idx].cos_spot_angle = Math::cos(Math::deg2rad(storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ANGLE))); + lights[idx].inv_spot_attenuation = 1.0f / storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ATTENUATION); + + idx++; + } + + if (idx > 0) { + RD::get_singleton()->buffer_update(cascade.lights_buffer, 0, idx * sizeof(SDGIShader::Light), lights, RD::BARRIER_MASK_COMPUTE); + } + + rb->sdfgi->cascade_dynamic_light_count[i] = idx; + } + } +} + +void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, const PagedArray<RID> &p_gi_probes) { + RD::get_singleton()->draw_command_begin_label("GI Render"); + + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(rb == nullptr); + Environment *env = environment_owner.getornull(p_environment); + + if (rb->ambient_buffer.is_null() || rb->using_half_size_gi != gi.half_resolution) { + if (rb->ambient_buffer.is_valid()) { + RD::get_singleton()->free(rb->ambient_buffer); + RD::get_singleton()->free(rb->reflection_buffer); + } + + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf.width = rb->width; + tf.height = rb->height; + if (gi.half_resolution) { + tf.width >>= 1; + tf.height >>= 1; + } + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + rb->reflection_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->ambient_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->using_half_size_gi = gi.half_resolution; + + _render_buffers_uniform_set_changed(p_render_buffers); + } + + GI::PushConstant push_constant; + + push_constant.screen_size[0] = rb->width; + push_constant.screen_size[1] = rb->height; + push_constant.z_near = p_projection.get_z_near(); + push_constant.z_far = p_projection.get_z_far(); + push_constant.orthogonal = p_projection.is_orthogonal(); + push_constant.proj_info[0] = -2.0f / (rb->width * p_projection.matrix[0][0]); + push_constant.proj_info[1] = -2.0f / (rb->height * p_projection.matrix[1][1]); + push_constant.proj_info[2] = (1.0f - p_projection.matrix[0][2]) / p_projection.matrix[0][0]; + push_constant.proj_info[3] = (1.0f + p_projection.matrix[1][2]) / p_projection.matrix[1][1]; + push_constant.max_giprobes = MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()); + push_constant.high_quality_vct = gi_probe_quality == RS::GI_PROBE_QUALITY_HIGH; + + bool use_sdfgi = rb->sdfgi != nullptr; + bool use_giprobes = push_constant.max_giprobes > 0; + + if (env) { + push_constant.ao_color[0] = env->ao_color.r; + push_constant.ao_color[1] = env->ao_color.g; + push_constant.ao_color[2] = env->ao_color.b; + } else { + push_constant.ao_color[0] = 0; + push_constant.ao_color[1] = 0; + push_constant.ao_color[2] = 0; } + push_constant.cam_rotation[0] = p_transform.basis[0][0]; + push_constant.cam_rotation[1] = p_transform.basis[1][0]; + push_constant.cam_rotation[2] = p_transform.basis[2][0]; + push_constant.cam_rotation[3] = 0; + push_constant.cam_rotation[4] = p_transform.basis[0][1]; + push_constant.cam_rotation[5] = p_transform.basis[1][1]; + push_constant.cam_rotation[6] = p_transform.basis[2][1]; + push_constant.cam_rotation[7] = 0; + push_constant.cam_rotation[8] = p_transform.basis[0][2]; + push_constant.cam_rotation[9] = p_transform.basis[1][2]; + push_constant.cam_rotation[10] = p_transform.basis[2][2]; + push_constant.cam_rotation[11] = 0; + if (rb->gi_uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(rb->gi_uniform_set)) { Vector<RD::Uniform> uniforms; { @@ -1693,7 +1779,7 @@ void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_rough RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; u.binding = 9; - u.ids.push_back(p_ambient_buffer); + u.ids.push_back(rb->ambient_buffer); uniforms.push_back(u); } @@ -1701,7 +1787,7 @@ void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_rough RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; u.binding = 10; - u.ids.push_back(p_reflection_buffer); + u.ids.push_back(rb->reflection_buffer); uniforms.push_back(u); } @@ -1765,12 +1851,26 @@ void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_rough rb->gi_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi.shader.version_get_shader(gi.shader_version, 0), 0); } - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi.pipelines[0]); + GI::Mode mode; + + if (rb->using_half_size_gi) { + mode = (use_sdfgi && use_giprobes) ? GI::MODE_HALF_RES_COMBINED : (use_sdfgi ? GI::MODE_HALF_RES_SDFGI : GI::MODE_HALF_RES_GIPROBE); + } else { + mode = (use_sdfgi && use_giprobes) ? GI::MODE_COMBINED : (use_sdfgi ? GI::MODE_SDFGI : GI::MODE_GIPROBE); + } + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(true); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi.pipelines[mode]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->gi_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(GI::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width, rb->height, 1, 8, 8, 1); - RD::get_singleton()->compute_list_end(); + + if (rb->using_half_size_gi) { + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width >> 1, rb->height >> 1, 1); + } else { + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width, rb->height, 1); + } + //do barrier later to allow oeverlap + //RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); //no barriers, let other compute, raster and transfer happen at the same time + RD::get_singleton()->draw_command_end_label(); } RID RendererSceneRenderRD::sky_create() { @@ -2288,7 +2388,7 @@ void RendererSceneRenderRD::_setup_sky(RID p_environment, RID p_render_buffers, } if (light_data_dirty) { - RD::get_singleton()->buffer_update(sky_scene_state.directional_light_buffer, 0, sizeof(SkyDirectionalLightData) * sky_scene_state.max_directional_lights, sky_scene_state.directional_lights, true); + RD::get_singleton()->buffer_update(sky_scene_state.directional_light_buffer, 0, sizeof(SkyDirectionalLightData) * sky_scene_state.max_directional_lights, sky_scene_state.directional_lights); RendererSceneRenderRD::SkyDirectionalLightData *temp = sky_scene_state.last_frame_directional_lights; sky_scene_state.last_frame_directional_lights = sky_scene_state.directional_lights; @@ -2340,7 +2440,7 @@ void RendererSceneRenderRD::_setup_sky(RID p_environment, RID p_render_buffers, sky_scene_state.ubo.fog_light_color[2] = fog_color.b * fog_energy; sky_scene_state.ubo.fog_sun_scatter = environment_get_fog_sun_scatter(p_environment); - RD::get_singleton()->buffer_update(sky_scene_state.uniform_buffer, 0, sizeof(SkySceneState::UBO), &sky_scene_state.ubo, true); + RD::get_singleton()->buffer_update(sky_scene_state.uniform_buffer, 0, sizeof(SkySceneState::UBO), &sky_scene_state.ubo); } void RendererSceneRenderRD::_update_sky(RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform) { @@ -2674,6 +2774,12 @@ Variant RendererSceneRenderRD::SkyShaderData::get_default_parameter(const String return Variant(); } +RS::ShaderNativeSourceCode RendererSceneRenderRD::SkyShaderData::get_native_source_code() const { + RendererSceneRenderRD *scene_singleton = (RendererSceneRenderRD *)RendererSceneRenderRD::singleton; + + return scene_singleton->sky_shader.shader.version_get_native_source_code(version); +} + RendererSceneRenderRD::SkyShaderData::SkyShaderData() { valid = false; } @@ -3045,7 +3151,7 @@ float RendererSceneRenderRD::environment_get_fog_aerial_perspective(RID p_env) c return env->fog_aerial_perspective; } -void RendererSceneRenderRD::environment_set_volumetric_fog(RID p_env, bool p_enable, float p_density, const Color &p_light, float p_light_energy, float p_length, float p_detail_spread, float p_gi_inject, RenderingServer::EnvVolumetricFogShadowFilter p_shadow_filter) { +void RendererSceneRenderRD::environment_set_volumetric_fog(RID p_env, bool p_enable, float p_density, const Color &p_light, float p_light_energy, float p_length, float p_detail_spread, float p_gi_inject, bool p_temporal_reprojection, float p_temporal_reprojection_amount) { Environment *env = environment_owner.getornull(p_env); ERR_FAIL_COND(!env); @@ -3059,8 +3165,9 @@ void RendererSceneRenderRD::environment_set_volumetric_fog(RID p_env, bool p_ena env->volumetric_fog_light_energy = p_light_energy; env->volumetric_fog_length = p_length; env->volumetric_fog_detail_spread = p_detail_spread; - env->volumetric_fog_shadow_filter = p_shadow_filter; env->volumetric_fog_gi_inject = p_gi_inject; + env->volumetric_fog_temporal_reprojection = p_temporal_reprojection; + env->volumetric_fog_temporal_reprojection_amount = p_temporal_reprojection_amount; } void RendererSceneRenderRD::environment_set_volumetric_fog_volume_size(int p_size, int p_depth) { @@ -3071,25 +3178,6 @@ void RendererSceneRenderRD::environment_set_volumetric_fog_volume_size(int p_siz void RendererSceneRenderRD::environment_set_volumetric_fog_filter_active(bool p_enable) { volumetric_fog_filter_active = p_enable; } -void RendererSceneRenderRD::environment_set_volumetric_fog_directional_shadow_shrink_size(int p_shrink_size) { - p_shrink_size = nearest_power_of_2_templated(p_shrink_size); - if (volumetric_fog_directional_shadow_shrink == (uint32_t)p_shrink_size) { - return; - } - - _clear_shadow_shrink_stages(directional_shadow.shrink_stages); -} -void RendererSceneRenderRD::environment_set_volumetric_fog_positional_shadow_shrink_size(int p_shrink_size) { - p_shrink_size = nearest_power_of_2_templated(p_shrink_size); - if (volumetric_fog_positional_shadow_shrink == (uint32_t)p_shrink_size) { - return; - } - - for (uint32_t i = 0; i < shadow_atlas_owner.get_rid_count(); i++) { - ShadowAtlas *sa = shadow_atlas_owner.get_ptr_by_index(i); - _clear_shadow_shrink_stages(sa->shrink_stages); - } -} void RendererSceneRenderRD::environment_set_sdfgi_ray_count(RS::EnvironmentSDFGIRayCount p_ray_count) { sdfgi_ray_count = p_ray_count; @@ -3098,6 +3186,9 @@ void RendererSceneRenderRD::environment_set_sdfgi_ray_count(RS::EnvironmentSDFGI void RendererSceneRenderRD::environment_set_sdfgi_frames_to_converge(RS::EnvironmentSDFGIFramesToConverge p_frames) { sdfgi_frames_to_converge = p_frames; } +void RendererSceneRenderRD::environment_set_sdfgi_frames_to_update_light(RS::EnvironmentSDFGIFramesToUpdateLight p_update) { + sdfgi_frames_to_update_light = p_update; +} void RendererSceneRenderRD::environment_set_ssr(RID p_env, bool p_enable, int p_max_steps, float p_fade_int, float p_fade_out, float p_depth_tolerance) { Environment *env = environment_owner.getornull(p_env); @@ -3227,6 +3318,10 @@ RID RendererSceneRenderRD::reflection_atlas_create() { ra.count = GLOBAL_GET("rendering/quality/reflection_atlas/reflection_count"); ra.size = GLOBAL_GET("rendering/quality/reflection_atlas/reflection_size"); + ra.cluster_builder = memnew(ClusterBuilderRD); + ra.cluster_builder->set_shared(&cluster_builder_shared); + ra.cluster_builder->setup(Size2i(ra.size, ra.size), max_cluster_elements, RID(), RID(), RID()); + return reflection_atlas_owner.make_rid(ra); } @@ -3238,6 +3333,8 @@ void RendererSceneRenderRD::reflection_atlas_set_size(RID p_ref_atlas, int p_ref return; //no changes } + ra->cluster_builder->setup(Size2i(ra->size, ra->size), max_cluster_elements, RID(), RID(), RID()); + ra->size = p_reflection_size; ra->count = p_reflection_count; @@ -3247,7 +3344,6 @@ void RendererSceneRenderRD::reflection_atlas_set_size(RID p_ref_atlas, int p_ref ra->reflection = RID(); RD::get_singleton()->free(ra->depth_buffer); ra->depth_buffer = RID(); - for (int i = 0; i < ra->reflections.size(); i++) { _clear_reflection_data(ra->reflections.write[i].data); if (ra->reflections[i].owner.is_null()) { @@ -3504,13 +3600,28 @@ RID RendererSceneRenderRD::shadow_atlas_create() { return shadow_atlas_owner.make_rid(ShadowAtlas()); } -void RendererSceneRenderRD::shadow_atlas_set_size(RID p_atlas, int p_size) { +void RendererSceneRenderRD::_update_shadow_atlas(ShadowAtlas *shadow_atlas) { + if (shadow_atlas->size > 0 && shadow_atlas->depth.is_null()) { + RD::TextureFormat tf; + tf.format = shadow_atlas->use_16_bits ? RD::DATA_FORMAT_D16_UNORM : RD::DATA_FORMAT_D32_SFLOAT; + tf.width = shadow_atlas->size; + tf.height = shadow_atlas->size; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + + shadow_atlas->depth = RD::get_singleton()->texture_create(tf, RD::TextureView()); + Vector<RID> fb_tex; + fb_tex.push_back(shadow_atlas->depth); + shadow_atlas->fb = RD::get_singleton()->framebuffer_create(fb_tex); + } +} + +void RendererSceneRenderRD::shadow_atlas_set_size(RID p_atlas, int p_size, bool p_16_bits) { ShadowAtlas *shadow_atlas = shadow_atlas_owner.getornull(p_atlas); ERR_FAIL_COND(!shadow_atlas); ERR_FAIL_COND(p_size < 0); p_size = next_power_of_2(p_size); - if (p_size == shadow_atlas->size) { + if (p_size == shadow_atlas->size && p_16_bits == shadow_atlas->use_16_bits) { return; } @@ -3518,7 +3629,6 @@ void RendererSceneRenderRD::shadow_atlas_set_size(RID p_atlas, int p_size) { if (shadow_atlas->depth.is_valid()) { RD::get_singleton()->free(shadow_atlas->depth); shadow_atlas->depth = RID(); - _clear_shadow_shrink_stages(shadow_atlas->shrink_stages); } for (int i = 0; i < 4; i++) { //clear subdivisions @@ -3537,16 +3647,7 @@ void RendererSceneRenderRD::shadow_atlas_set_size(RID p_atlas, int p_size) { shadow_atlas->shadow_owners.clear(); shadow_atlas->size = p_size; - - if (shadow_atlas->size) { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R32_SFLOAT; - tf.width = shadow_atlas->size; - tf.height = shadow_atlas->size; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - - shadow_atlas->depth = RD::get_singleton()->texture_create(tf, RD::TextureView()); - } + shadow_atlas->use_16_bits = p_size; } void RendererSceneRenderRD::shadow_atlas_set_quadrant_subdivision(RID p_atlas, int p_quadrant, int p_subdivision) { @@ -3801,10 +3902,24 @@ bool RendererSceneRenderRD::shadow_atlas_update_light(RID p_atlas, RID p_light_i return false; } -void RendererSceneRenderRD::directional_shadow_atlas_set_size(int p_size) { +void RendererSceneRenderRD::_update_directional_shadow_atlas() { + if (directional_shadow.depth.is_null() && directional_shadow.size > 0) { + RD::TextureFormat tf; + tf.format = directional_shadow.use_16_bits ? RD::DATA_FORMAT_D16_UNORM : RD::DATA_FORMAT_D32_SFLOAT; + tf.width = directional_shadow.size; + tf.height = directional_shadow.size; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + + directional_shadow.depth = RD::get_singleton()->texture_create(tf, RD::TextureView()); + Vector<RID> fb_tex; + fb_tex.push_back(directional_shadow.depth); + directional_shadow.fb = RD::get_singleton()->framebuffer_create(fb_tex); + } +} +void RendererSceneRenderRD::directional_shadow_atlas_set_size(int p_size, bool p_16_bits) { p_size = nearest_power_of_2_templated(p_size); - if (directional_shadow.size == p_size) { + if (directional_shadow.size == p_size && directional_shadow.use_16_bits == p_16_bits) { return; } @@ -3812,21 +3927,9 @@ void RendererSceneRenderRD::directional_shadow_atlas_set_size(int p_size) { if (directional_shadow.depth.is_valid()) { RD::get_singleton()->free(directional_shadow.depth); - _clear_shadow_shrink_stages(directional_shadow.shrink_stages); directional_shadow.depth = RID(); + _base_uniforms_changed(); } - - if (p_size > 0) { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R32_SFLOAT; - tf.width = p_size; - tf.height = p_size; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - - directional_shadow.depth = RD::get_singleton()->texture_create(tf, RD::TextureView()); - } - - _base_uniforms_changed(); } void RendererSceneRenderRD::set_directional_shadow_count(int p_count) { @@ -3946,11 +4049,7 @@ void RendererSceneRenderRD::light_instance_set_shadow_transform(RID p_light_inst LightInstance *light_instance = light_instance_owner.getornull(p_light_instance); ERR_FAIL_COND(!light_instance); - if (storage->light_get_type(light_instance->light) != RS::LIGHT_DIRECTIONAL) { - p_pass = 0; - } - - ERR_FAIL_INDEX(p_pass, 4); + ERR_FAIL_INDEX(p_pass, 6); light_instance->shadow_transform[p_pass].camera = p_projection; light_instance->shadow_transform[p_pass].transform = p_transform; @@ -3996,29 +4095,6 @@ RendererSceneRenderRD::ShadowCubemap *RendererSceneRenderRD::_get_shadow_cubemap return &shadow_cubemaps[p_size]; } -RendererSceneRenderRD::ShadowMap *RendererSceneRenderRD::_get_shadow_map(const Size2i &p_size) { - if (!shadow_maps.has(p_size)) { - ShadowMap sm; - { - RD::TextureFormat tf; - tf.format = RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_D32_SFLOAT, RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ? RD::DATA_FORMAT_D32_SFLOAT : RD::DATA_FORMAT_X8_D24_UNORM_PACK32; - tf.width = p_size.width; - tf.height = p_size.height; - tf.usage_bits = RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; - - sm.depth = RD::get_singleton()->texture_create(tf, RD::TextureView()); - } - - Vector<RID> fbtex; - fbtex.push_back(sm.depth); - sm.fb = RD::get_singleton()->framebuffer_create(fbtex); - - shadow_maps[p_size] = sm; - } - - return &shadow_maps[p_size]; -} - ////////////////////////// RID RendererSceneRenderRD::decal_instance_create(RID p_decal) { @@ -4035,6 +4111,19 @@ void RendererSceneRenderRD::decal_instance_set_transform(RID p_decal, const Tran ///////////////////////////////// +RID RendererSceneRenderRD::lightmap_instance_create(RID p_lightmap) { + LightmapInstance li; + li.lightmap = p_lightmap; + return lightmap_instance_owner.make_rid(li); +} +void RendererSceneRenderRD::lightmap_instance_set_transform(RID p_lightmap, const Transform &p_transform) { + LightmapInstance *li = lightmap_instance_owner.getornull(p_lightmap); + ERR_FAIL_COND(!li); + li->transform = p_transform; +} + +///////////////////////////////// + RID RendererSceneRenderRD::gi_probe_instance_create(RID p_base) { GIProbeInstance gi_probe; gi_probe.probe = p_base; @@ -4061,7 +4150,7 @@ bool RendererSceneRenderRD::gi_probe_needs_update(RID p_probe) const { return gi_probe->last_probe_version != storage->gi_probe_get_version(gi_probe->probe); } -void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_instances, const Vector<RID> &p_light_instances, const PagedArray<InstanceBase *> &p_dynamic_objects) { +void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_instances, const Vector<RID> &p_light_instances, const PagedArray<GeometryInstance *> &p_dynamic_objects) { GIProbeInstance *gi_probe = gi_probe_instance_owner.getornull(p_probe); ERR_FAIL_COND(!gi_probe); @@ -4106,7 +4195,7 @@ void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_ins gi_probe->texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); - RD::get_singleton()->texture_clear(gi_probe->texture, Color(0, 0, 0, 0), 0, levels.size(), 0, 1, false); + RD::get_singleton()->texture_clear(gi_probe->texture, Color(0, 0, 0, 0), 0, levels.size(), 0, 1); { int total_elements = 0; @@ -4397,7 +4486,10 @@ void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_ins } } - dmap.uniform_set = RD::get_singleton()->uniform_set_create(uniforms, giprobe_lighting_shader_version_shaders[(write && plot) ? GI_PROBE_SHADER_VERSION_DYNAMIC_SHRINK_WRITE_PLOT : write ? GI_PROBE_SHADER_VERSION_DYNAMIC_SHRINK_WRITE : GI_PROBE_SHADER_VERSION_DYNAMIC_SHRINK_PLOT], 0); + dmap.uniform_set = RD::get_singleton()->uniform_set_create( + uniforms, + giprobe_lighting_shader_version_shaders[(write && plot) ? GI_PROBE_SHADER_VERSION_DYNAMIC_SHRINK_WRITE_PLOT : (write ? GI_PROBE_SHADER_VERSION_DYNAMIC_SHRINK_WRITE : GI_PROBE_SHADER_VERSION_DYNAMIC_SHRINK_PLOT)], + 0); } gi_probe->dynamic_maps.push_back(dmap); @@ -4415,7 +4507,7 @@ void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_ins if (gi_probe->has_dynamic_object_data) { //if it has dynamic object data, it needs to be cleared - RD::get_singleton()->texture_clear(gi_probe->texture, Color(0, 0, 0, 0), 0, gi_probe->mipmaps.size(), 0, 1, true); + RD::get_singleton()->texture_clear(gi_probe->texture, Color(0, 0, 0, 0), 0, gi_probe->mipmaps.size(), 0, 1); } uint32_t light_count = 0; @@ -4447,8 +4539,8 @@ void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_ins l.color[1] = color.g; l.color[2] = color.b; - l.spot_angle_radians = Math::deg2rad(storage->light_get_param(light, RS::LIGHT_PARAM_SPOT_ANGLE)); - l.spot_attenuation = storage->light_get_param(light, RS::LIGHT_PARAM_SPOT_ATTENUATION); + l.cos_spot_angle = Math::cos(Math::deg2rad(storage->light_get_param(light, RS::LIGHT_PARAM_SPOT_ANGLE))); + l.inv_spot_attenuation = 1.0f / storage->light_get_param(light, RS::LIGHT_PARAM_SPOT_ATTENUATION); Transform xform = light_instance_get_base_transform(light_instance); @@ -4466,7 +4558,7 @@ void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_ins l.has_shadow = storage->light_has_shadow(light); } - RD::get_singleton()->buffer_update(gi_probe_lights_uniform, 0, sizeof(GIProbeLight) * light_count, gi_probe_lights, true); + RD::get_singleton()->buffer_update(gi_probe_lights_uniform, 0, sizeof(GIProbeLight) * light_count, gi_probe_lights); } } @@ -4578,13 +4670,10 @@ void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_ins //this could probably be better parallelized in compute.. for (int i = 0; i < (int)p_dynamic_objects.size(); i++) { - InstanceBase *instance = p_dynamic_objects[i]; - //not used, so clear - instance->depth_layer = 0; - instance->depth = 0; + GeometryInstance *instance = p_dynamic_objects[i]; //transform aabb to giprobe - AABB aabb = (to_probe_xform * instance->transform).xform(instance->aabb); + AABB aabb = (to_probe_xform * geometry_instance_get_transform(instance)).xform(geometry_instance_get_aabb(instance)); //this needs to wrap to grid resolution to avoid jitter //also extend margin a bit just in case @@ -4834,7 +4923,16 @@ void RendererSceneRenderRD::_debug_giprobe(RID p_gi_probe, RD::DrawListID p_draw } giprobe_debug_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, giprobe_debug_shader_version_shaders[0], 0); - RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, giprobe_debug_shader_version_pipelines[p_emission ? GI_PROBE_DEBUG_EMISSION : p_lighting ? (gi_probe->has_dynamic_object_data ? GI_PROBE_DEBUG_LIGHT_FULL : GI_PROBE_DEBUG_LIGHT) : GI_PROBE_DEBUG_COLOR].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_framebuffer))); + + int giprobe_debug_pipeline = GI_PROBE_DEBUG_COLOR; + if (p_emission) { + giprobe_debug_pipeline = GI_PROBE_DEBUG_EMISSION; + } else if (p_lighting) { + giprobe_debug_pipeline = gi_probe->has_dynamic_object_data ? GI_PROBE_DEBUG_LIGHT_FULL : GI_PROBE_DEBUG_LIGHT; + } + RD::get_singleton()->draw_list_bind_render_pipeline( + p_draw_list, + giprobe_debug_shader_version_pipelines[giprobe_debug_pipeline].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(p_draw_list, giprobe_debug_uniform_set, 0); RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(GIProbeDebugPushConstant)); RD::get_singleton()->draw_list_draw(p_draw_list, false, cell_count, 36); @@ -4861,7 +4959,7 @@ void RendererSceneRenderRD::_debug_sdfgi_probes(RID p_render_buffers, RD::DrawLi push_constant.band_power = 4; push_constant.sections_in_band = ((band_points / 2) - 1); push_constant.band_mask = band_points - 2; - push_constant.section_arc = (Math_PI * 2.0) / float(push_constant.sections_in_band); + push_constant.section_arc = Math_TAU / float(push_constant.sections_in_band); push_constant.y_mult = rb->sdfgi->y_mult; uint32_t total_points = push_constant.sections_in_band * band_points; @@ -5082,9 +5180,6 @@ void RendererSceneRenderRD::_free_render_buffer_data(RenderBuffers *rb) { RD::get_singleton()->free(rb->luminance.reduce[i]); } - for (int i = 0; i < rb->luminance.reduce.size(); i++) { - RD::get_singleton()->free(rb->luminance.reduce[i]); - } rb->luminance.reduce.clear(); if (rb->luminance.current.is_valid()) { @@ -5125,6 +5220,13 @@ void RendererSceneRenderRD::_free_render_buffer_data(RenderBuffers *rb) { RD::get_singleton()->free(rb->ssr.normal_scaled); rb->ssr.normal_scaled = RID(); } + + if (rb->ambient_buffer.is_valid()) { + RD::get_singleton()->free(rb->ambient_buffer); + RD::get_singleton()->free(rb->reflection_buffer); + rb->ambient_buffer = RID(); + rb->reflection_buffer = RID(); + } } void RendererSceneRenderRD::_process_sss(RID p_render_buffers, const CameraMatrix &p_camera) { @@ -5257,9 +5359,11 @@ void RendererSceneRenderRD::_process_ssao(RID p_render_buffers, RID p_environmen tf.array_layers = 4; tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; rb->ssao.depth = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(rb->ssao.depth, "SSAO Depth"); for (uint32_t i = 0; i < tf.mipmaps; i++) { RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ssao.depth, 0, i, RD::TEXTURE_SLICE_2D_ARRAY); rb->ssao.depth_slices.push_back(slice); + RD::get_singleton()->set_resource_name(rb->ssao.depth_slices[i], "SSAO Depth Mip " + itos(i) + " "); } } @@ -5272,9 +5376,11 @@ void RendererSceneRenderRD::_process_ssao(RID p_render_buffers, RID p_environmen tf.array_layers = 4; tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; rb->ssao.ao_deinterleaved = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(rb->ssao.ao_deinterleaved, "SSAO De-interleaved Array"); for (uint32_t i = 0; i < 4; i++) { RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ssao.ao_deinterleaved, i, 0); rb->ssao.ao_deinterleaved_slices.push_back(slice); + RD::get_singleton()->set_resource_name(rb->ssao.ao_deinterleaved_slices[i], "SSAO De-interleaved Array Layer " + itos(i) + " "); } } @@ -5287,9 +5393,11 @@ void RendererSceneRenderRD::_process_ssao(RID p_render_buffers, RID p_environmen tf.array_layers = 4; tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; rb->ssao.ao_pong = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(rb->ssao.ao_pong, "SSAO De-interleaved Array Pong"); for (uint32_t i = 0; i < 4; i++) { RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ssao.ao_pong, i, 0); rb->ssao.ao_pong_slices.push_back(slice); + RD::get_singleton()->set_resource_name(rb->ssao.ao_deinterleaved_slices[i], "SSAO De-interleaved Array Layer " + itos(i) + " Pong"); } } @@ -5300,7 +5408,9 @@ void RendererSceneRenderRD::_process_ssao(RID p_render_buffers, RID p_environmen tf.height = half_height; tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; rb->ssao.importance_map[0] = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(rb->ssao.importance_map[0], "SSAO Importance Map"); rb->ssao.importance_map[1] = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(rb->ssao.importance_map[1], "SSAO Importance Map Pong"); } { RD::TextureFormat tf; @@ -5309,6 +5419,7 @@ void RendererSceneRenderRD::_process_ssao(RID p_render_buffers, RID p_environmen tf.height = rb->height; tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; rb->ssao.ao_final = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(rb->ssao.ao_final, "SSAO Final"); _render_buffers_uniform_set_changed(p_render_buffers); } ssao_using_half_size = ssao_half_size; @@ -5329,9 +5440,9 @@ void RendererSceneRenderRD::_process_ssao(RID p_render_buffers, RID p_environmen settings.blur_passes = ssao_blur_passes; settings.fadeout_from = ssao_fadeout_from; settings.fadeout_to = ssao_fadeout_to; - settings.screen_size = Size2i(rb->width, rb->height); + settings.full_screen_size = Size2i(rb->width, rb->height); settings.half_screen_size = Size2i(buffer_width, buffer_height); - settings.quarter_size = Size2i(half_width, half_height); + settings.quarter_screen_size = Size2i(half_width, half_height); storage->get_effects()->generate_ssao(rb->depth_texture, p_normal_buffer, rb->ssao.depth, rb->ssao.depth_slices, rb->ssao.ao_deinterleaved, rb->ssao.ao_deinterleaved_slices, rb->ssao.ao_pong, rb->ssao.ao_pong_slices, rb->ssao.ao_final, rb->ssao.importance_map[0], rb->ssao.importance_map[1], p_projection, settings, uniform_sets_are_invalid); } @@ -5524,10 +5635,10 @@ void RendererSceneRenderRD::_render_buffers_debug_draw(RID p_render_buffers, RID effects->copy_to_fb_rect(_render_buffers_get_normal_texture(p_render_buffers), storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), false, false); } - if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_GI_BUFFER && _render_buffers_get_ambient_texture(p_render_buffers).is_valid()) { + if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_GI_BUFFER && rb->ambient_buffer.is_valid()) { Size2 rtsize = storage->render_target_get_size(rb->render_target); - RID ambient_texture = _render_buffers_get_ambient_texture(p_render_buffers); - RID reflection_texture = _render_buffers_get_reflection_texture(p_render_buffers); + RID ambient_texture = rb->ambient_buffer; + RID reflection_texture = rb->reflection_buffer; effects->copy_to_fb_rect(ambient_texture, storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), false, false, false, true, reflection_texture); } } @@ -5683,7 +5794,7 @@ void RendererSceneRenderRD::_sdfgi_debug_draw(RID p_render_buffers, const Camera RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::DebugPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width, rb->height, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width, rb->height, 1); RD::get_singleton()->compute_list_end(); Size2 rtsize = storage->render_target_get_size(rb->render_target); @@ -5719,6 +5830,17 @@ RID RendererSceneRenderRD::render_buffers_get_default_gi_probe_buffer() { return default_giprobe_buffer; } +RID RendererSceneRenderRD::render_buffers_get_gi_ambient_texture(RID p_render_buffers) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, RID()); + return rb->ambient_buffer; +} +RID RendererSceneRenderRD::render_buffers_get_gi_reflection_texture(RID p_render_buffers) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, RID()); + return rb->reflection_buffer; +} + uint32_t RendererSceneRenderRD::render_buffers_get_sdfgi_cascade_count(RID p_render_buffers) const { const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND_V(!rb, 0); @@ -5856,6 +5978,11 @@ void RendererSceneRenderRD::render_buffers_configure(RID p_render_buffers, RID p rb->msaa = p_msaa; rb->screen_space_aa = p_screen_space_aa; rb->use_debanding = p_use_debanding; + if (rb->cluster_builder == nullptr) { + rb->cluster_builder = memnew(ClusterBuilderRD); + } + rb->cluster_builder->set_shared(&cluster_builder_shared); + _free_render_buffer_data(rb); { @@ -5896,6 +6023,12 @@ void RendererSceneRenderRD::render_buffers_configure(RID p_render_buffers, RID p rb->data->configure(rb->texture, rb->depth_texture, p_width, p_height, p_msaa); _render_buffers_uniform_set_changed(p_render_buffers); + + rb->cluster_builder->setup(Size2i(p_width, p_height), max_cluster_elements, rb->depth_texture, storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED), rb->texture); +} + +void RendererSceneRenderRD::gi_set_use_half_resolution(bool p_enable) { + gi.half_resolution = p_enable; } void RendererSceneRenderRD::sub_surface_scattering_set_quality(RS::SubSurfaceScatteringQuality p_quality) { @@ -6006,17 +6139,34 @@ RendererSceneRenderRD::RenderBufferData *RendererSceneRenderRD::render_buffers_g } void RendererSceneRenderRD::_setup_reflections(const PagedArray<RID> &p_reflections, const Transform &p_camera_inverse_transform, RID p_environment) { + cluster.reflection_count = 0; + for (uint32_t i = 0; i < (uint32_t)p_reflections.size(); i++) { - RID rpi = p_reflections[i]; + if (cluster.reflection_count == cluster.max_reflections) { + break; + } - if (i >= cluster.max_reflections) { - reflection_probe_instance_set_render_index(rpi, 0); //invalid, but something needs to be set + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_reflections[i]); + if (!rpi) { continue; } - reflection_probe_instance_set_render_index(rpi, i); + cluster.reflection_sort[cluster.reflection_count].instance = rpi; + cluster.reflection_sort[cluster.reflection_count].depth = -p_camera_inverse_transform.xform(rpi->transform.origin).z; + cluster.reflection_count++; + } + + if (cluster.reflection_count > 0) { + SortArray<Cluster::InstanceSort<ReflectionProbeInstance>> sort_array; + sort_array.sort(cluster.reflection_sort, cluster.reflection_count); + } + + for (uint32_t i = 0; i < cluster.reflection_count; i++) { + ReflectionProbeInstance *rpi = cluster.reflection_sort[i].instance; - RID base_probe = reflection_probe_instance_get_probe(rpi); + rpi->render_index = i; + + RID base_probe = rpi->probe; Cluster::ReflectionData &reflection_ubo = cluster.reflections[i]; @@ -6025,7 +6175,7 @@ void RendererSceneRenderRD::_setup_reflections(const PagedArray<RID> &p_reflecti reflection_ubo.box_extents[0] = extents.x; reflection_ubo.box_extents[1] = extents.y; reflection_ubo.box_extents[2] = extents.z; - reflection_ubo.index = reflection_probe_instance_get_atlas_index(rpi); + reflection_ubo.index = rpi->atlas_index; Vector3 origin_offset = storage->reflection_probe_get_origin_offset(base_probe); @@ -6034,46 +6184,50 @@ void RendererSceneRenderRD::_setup_reflections(const PagedArray<RID> &p_reflecti reflection_ubo.box_offset[2] = origin_offset.z; reflection_ubo.mask = storage->reflection_probe_get_cull_mask(base_probe); - float intensity = storage->reflection_probe_get_intensity(base_probe); - bool interior = storage->reflection_probe_is_interior(base_probe); - bool box_projection = storage->reflection_probe_is_box_projection(base_probe); + reflection_ubo.intensity = storage->reflection_probe_get_intensity(base_probe); + reflection_ubo.ambient_mode = storage->reflection_probe_get_ambient_mode(base_probe); - reflection_ubo.params[0] = intensity; - reflection_ubo.params[1] = 0; - reflection_ubo.params[2] = interior ? 1.0 : 0.0; - reflection_ubo.params[3] = box_projection ? 1.0 : 0.0; + reflection_ubo.exterior = !storage->reflection_probe_is_interior(base_probe); + reflection_ubo.box_project = storage->reflection_probe_is_box_projection(base_probe); Color ambient_linear = storage->reflection_probe_get_ambient_color(base_probe).to_linear(); float interior_ambient_energy = storage->reflection_probe_get_ambient_color_energy(base_probe); - uint32_t ambient_mode = storage->reflection_probe_get_ambient_mode(base_probe); reflection_ubo.ambient[0] = ambient_linear.r * interior_ambient_energy; reflection_ubo.ambient[1] = ambient_linear.g * interior_ambient_energy; reflection_ubo.ambient[2] = ambient_linear.b * interior_ambient_energy; - reflection_ubo.ambient_mode = ambient_mode; - Transform transform = reflection_probe_instance_get_transform(rpi); + Transform transform = rpi->transform; Transform proj = (p_camera_inverse_transform * transform).inverse(); RendererStorageRD::store_transform(proj, reflection_ubo.local_matrix); - cluster.builder.add_reflection_probe(transform, extents); + current_cluster_builder->add_box(ClusterBuilderRD::BOX_TYPE_REFLECTION_PROBE, transform, extents); - reflection_probe_instance_set_render_pass(rpi, RSG::rasterizer->get_frame_number()); + rpi->last_pass = RSG::rasterizer->get_frame_number(); } - if (p_reflections.size()) { - RD::get_singleton()->buffer_update(cluster.reflection_buffer, 0, MIN(cluster.max_reflections, (unsigned int)p_reflections.size()) * sizeof(ReflectionData), cluster.reflections, true); + if (cluster.reflection_count) { + RD::get_singleton()->buffer_update(cluster.reflection_buffer, 0, cluster.reflection_count * sizeof(ReflectionData), cluster.reflections, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } } -void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const Transform &p_camera_inverse_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count) { - uint32_t light_count = 0; +void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const Transform &p_camera_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count) { + Transform inverse_transform = p_camera_transform.affine_inverse(); + r_directional_light_count = 0; r_positional_light_count = 0; sky_scene_state.ubo.directional_light_count = 0; + Plane camera_plane(p_camera_transform.origin, -p_camera_transform.basis.get_axis(Vector3::AXIS_Z).normalized()); + + cluster.omni_light_count = 0; + cluster.spot_light_count = 0; + for (int i = 0; i < (int)p_lights.size(); i++) { - RID li = p_lights[i]; - RID base = light_instance_get_base_light(li); + LightInstance *li = light_instance_owner.getornull(p_lights[i]); + if (!li) { + continue; + } + RID base = li->light; ERR_CONTINUE(base.is_null()); @@ -6083,7 +6237,7 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const // Copy to SkyDirectionalLightData if (r_directional_light_count < sky_scene_state.max_directional_lights) { SkyDirectionalLightData &sky_light_data = sky_scene_state.directional_lights[r_directional_light_count]; - Transform light_transform = light_instance_get_base_transform(li); + Transform light_transform = li->transform; Vector3 world_direction = light_transform.basis.xform(Vector3(0, 0, 1)).normalized(); sky_light_data.direction[0] = world_direction.x; @@ -6119,9 +6273,9 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const Cluster::DirectionalLightData &light_data = cluster.directional_lights[r_directional_light_count]; - Transform light_transform = light_instance_get_base_transform(li); + Transform light_transform = li->transform; - Vector3 direction = p_camera_inverse_transform.basis.xform(light_transform.basis.xform(Vector3(0, 0, 1))).normalized(); + Vector3 direction = inverse_transform.basis.xform(light_transform.basis.xform(Vector3(0, 0, 1))).normalized(); light_data.direction[0] = direction.x; light_data.direction[1] = direction.y; @@ -6200,28 +6354,28 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const int limit = smode == RS::LIGHT_DIRECTIONAL_SHADOW_ORTHOGONAL ? 0 : (smode == RS::LIGHT_DIRECTIONAL_SHADOW_PARALLEL_2_SPLITS ? 1 : 3); light_data.blend_splits = storage->light_directional_get_blend_splits(base); for (int j = 0; j < 4; j++) { - Rect2 atlas_rect = light_instance_get_directional_shadow_atlas_rect(li, j); - CameraMatrix matrix = light_instance_get_shadow_camera(li, j); - float split = light_instance_get_directional_shadow_split(li, MIN(limit, j)); + Rect2 atlas_rect = li->shadow_transform[j].atlas_rect; + CameraMatrix matrix = li->shadow_transform[j].camera; + float split = li->shadow_transform[MIN(limit, j)].split; CameraMatrix bias; bias.set_light_bias(); CameraMatrix rectm; rectm.set_light_atlas_rect(atlas_rect); - Transform modelview = (p_camera_inverse_transform * light_instance_get_shadow_transform(li, j)).inverse(); + Transform modelview = (inverse_transform * li->shadow_transform[j].transform).inverse(); CameraMatrix shadow_mtx = rectm * bias * matrix * modelview; light_data.shadow_split_offsets[j] = split; - float bias_scale = light_instance_get_shadow_bias_scale(li, j); + float bias_scale = li->shadow_transform[j].bias_scale; light_data.shadow_bias[j] = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * bias_scale; - light_data.shadow_normal_bias[j] = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * light_instance_get_directional_shadow_texel_size(li, j); + light_data.shadow_normal_bias[j] = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * li->shadow_transform[j].shadow_texel_size; light_data.shadow_transmittance_bias[j] = storage->light_get_transmittance_bias(base) * bias_scale; - light_data.shadow_z_range[j] = light_instance_get_shadow_range(li, j); - light_data.shadow_range_begin[j] = light_instance_get_shadow_range_begin(li, j); + light_data.shadow_z_range[j] = li->shadow_transform[j].farplane; + light_data.shadow_range_begin[j] = li->shadow_transform[j].range_begin; RendererStorageRD::store_camera(shadow_mtx, light_data.shadow_matrices[j]); - Vector2 uv_scale = light_instance_get_shadow_uv_scale(li, j); + Vector2 uv_scale = li->shadow_transform[j].uv_scale; uv_scale *= atlas_rect.size; //adapt to atlas size switch (j) { case 0: { @@ -6258,170 +6412,201 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const r_directional_light_count++; } break; - case RS::LIGHT_SPOT: case RS::LIGHT_OMNI: { - if (light_count >= cluster.max_lights) { + if (cluster.omni_light_count >= cluster.max_lights) { continue; } - Transform light_transform = light_instance_get_base_transform(li); + cluster.omni_light_sort[cluster.omni_light_count].instance = li; + cluster.omni_light_sort[cluster.omni_light_count].depth = camera_plane.distance_to(li->transform.origin); + cluster.omni_light_count++; + } break; + case RS::LIGHT_SPOT: { + if (cluster.spot_light_count >= cluster.max_lights) { + continue; + } - Cluster::LightData &light_data = cluster.lights[light_count]; - cluster.lights_instances[light_count] = li; + cluster.spot_light_sort[cluster.spot_light_count].instance = li; + cluster.spot_light_sort[cluster.spot_light_count].depth = camera_plane.distance_to(li->transform.origin); + cluster.spot_light_count++; + } break; + } - float sign = storage->light_is_negative(base) ? -1 : 1; - Color linear_col = storage->light_get_color(base).to_linear(); + li->last_pass = RSG::rasterizer->get_frame_number(); + } - light_data.attenuation_energy[0] = Math::make_half_float(storage->light_get_param(base, RS::LIGHT_PARAM_ATTENUATION)); - light_data.attenuation_energy[1] = Math::make_half_float(sign * storage->light_get_param(base, RS::LIGHT_PARAM_ENERGY) * Math_PI); + if (cluster.omni_light_count) { + SortArray<Cluster::InstanceSort<LightInstance>> sorter; + sorter.sort(cluster.omni_light_sort, cluster.omni_light_count); + } - light_data.color_specular[0] = MIN(uint32_t(linear_col.r * 255), 255); - light_data.color_specular[1] = MIN(uint32_t(linear_col.g * 255), 255); - light_data.color_specular[2] = MIN(uint32_t(linear_col.b * 255), 255); - light_data.color_specular[3] = MIN(uint32_t(storage->light_get_param(base, RS::LIGHT_PARAM_SPECULAR) * 255), 255); + if (cluster.spot_light_count) { + SortArray<Cluster::InstanceSort<LightInstance>> sorter; + sorter.sort(cluster.spot_light_sort, cluster.spot_light_count); + } - float radius = MAX(0.001, storage->light_get_param(base, RS::LIGHT_PARAM_RANGE)); - light_data.inv_radius = 1.0 / radius; + ShadowAtlas *shadow_atlas = nullptr; - Vector3 pos = p_camera_inverse_transform.xform(light_transform.origin); + if (p_shadow_atlas.is_valid() && p_using_shadows) { + shadow_atlas = shadow_atlas_owner.getornull(p_shadow_atlas); + } - light_data.position[0] = pos.x; - light_data.position[1] = pos.y; - light_data.position[2] = pos.z; + for (uint32_t i = 0; i < (cluster.omni_light_count + cluster.spot_light_count); i++) { + uint32_t index = (i < cluster.omni_light_count) ? i : i - (cluster.omni_light_count); + Cluster::LightData &light_data = (i < cluster.omni_light_count) ? cluster.omni_lights[index] : cluster.spot_lights[index]; + RS::LightType type = (i < cluster.omni_light_count) ? RS::LIGHT_OMNI : RS::LIGHT_SPOT; + LightInstance *li = (i < cluster.omni_light_count) ? cluster.omni_light_sort[index].instance : cluster.spot_light_sort[index].instance; + RID base = li->light; - Vector3 direction = p_camera_inverse_transform.basis.xform(light_transform.basis.xform(Vector3(0, 0, -1))).normalized(); + Transform light_transform = li->transform; - light_data.direction[0] = direction.x; - light_data.direction[1] = direction.y; - light_data.direction[2] = direction.z; + float sign = storage->light_is_negative(base) ? -1 : 1; + Color linear_col = storage->light_get_color(base).to_linear(); - float size = storage->light_get_param(base, RS::LIGHT_PARAM_SIZE); + light_data.attenuation = storage->light_get_param(base, RS::LIGHT_PARAM_ATTENUATION); - light_data.size = size; + float energy = sign * storage->light_get_param(base, RS::LIGHT_PARAM_ENERGY) * Math_PI; - light_data.cone_attenuation_angle[0] = Math::make_half_float(storage->light_get_param(base, RS::LIGHT_PARAM_SPOT_ATTENUATION)); - float spot_angle = storage->light_get_param(base, RS::LIGHT_PARAM_SPOT_ANGLE); - light_data.cone_attenuation_angle[1] = Math::make_half_float(Math::cos(Math::deg2rad(spot_angle))); + light_data.color[0] = linear_col.r * energy; + light_data.color[1] = linear_col.g * energy; + light_data.color[2] = linear_col.b * energy; + light_data.specular_amount = storage->light_get_param(base, RS::LIGHT_PARAM_SPECULAR) * 2.0; - light_data.mask = storage->light_get_cull_mask(base); + float radius = MAX(0.001, storage->light_get_param(base, RS::LIGHT_PARAM_RANGE)); + light_data.inv_radius = 1.0 / radius; - light_data.atlas_rect[0] = 0; - light_data.atlas_rect[1] = 0; - light_data.atlas_rect[2] = 0; - light_data.atlas_rect[3] = 0; + Vector3 pos = inverse_transform.xform(light_transform.origin); - RID projector = storage->light_get_projector(base); + light_data.position[0] = pos.x; + light_data.position[1] = pos.y; + light_data.position[2] = pos.z; - if (projector.is_valid()) { - Rect2 rect = storage->decal_atlas_get_texture_rect(projector); + Vector3 direction = inverse_transform.basis.xform(light_transform.basis.xform(Vector3(0, 0, -1))).normalized(); - if (type == RS::LIGHT_SPOT) { - light_data.projector_rect[0] = rect.position.x; - light_data.projector_rect[1] = rect.position.y + rect.size.height; //flip because shadow is flipped - light_data.projector_rect[2] = rect.size.width; - light_data.projector_rect[3] = -rect.size.height; - } else { - light_data.projector_rect[0] = rect.position.x; - light_data.projector_rect[1] = rect.position.y; - light_data.projector_rect[2] = rect.size.width; - light_data.projector_rect[3] = rect.size.height * 0.5; //used by dp, so needs to be half - } - } else { - light_data.projector_rect[0] = 0; - light_data.projector_rect[1] = 0; - light_data.projector_rect[2] = 0; - light_data.projector_rect[3] = 0; - } + light_data.direction[0] = direction.x; + light_data.direction[1] = direction.y; + light_data.direction[2] = direction.z; - if (p_using_shadows && p_shadow_atlas.is_valid() && shadow_atlas_owns_light_instance(p_shadow_atlas, li)) { - // fill in the shadow information + float size = storage->light_get_param(base, RS::LIGHT_PARAM_SIZE); - Color shadow_color = storage->light_get_shadow_color(base); + light_data.size = size; - light_data.shadow_color_enabled[0] = MIN(uint32_t(shadow_color.r * 255), 255); - light_data.shadow_color_enabled[1] = MIN(uint32_t(shadow_color.g * 255), 255); - light_data.shadow_color_enabled[2] = MIN(uint32_t(shadow_color.b * 255), 255); - light_data.shadow_color_enabled[3] = 255; + light_data.inv_spot_attenuation = 1.0f / storage->light_get_param(base, RS::LIGHT_PARAM_SPOT_ATTENUATION); + float spot_angle = storage->light_get_param(base, RS::LIGHT_PARAM_SPOT_ANGLE); + light_data.cos_spot_angle = Math::cos(Math::deg2rad(spot_angle)); - if (type == RS::LIGHT_SPOT) { - light_data.shadow_bias = (storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * radius / 10.0); - float shadow_texel_size = Math::tan(Math::deg2rad(spot_angle)) * radius * 2.0; - shadow_texel_size *= light_instance_get_shadow_texel_size(li, p_shadow_atlas); + light_data.mask = storage->light_get_cull_mask(base); - light_data.shadow_normal_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * shadow_texel_size; + light_data.atlas_rect[0] = 0; + light_data.atlas_rect[1] = 0; + light_data.atlas_rect[2] = 0; + light_data.atlas_rect[3] = 0; - } else { //omni - light_data.shadow_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * radius / 10.0; - float shadow_texel_size = light_instance_get_shadow_texel_size(li, p_shadow_atlas); - light_data.shadow_normal_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * shadow_texel_size * 2.0; // applied in -1 .. 1 space - } + RID projector = storage->light_get_projector(base); - light_data.transmittance_bias = storage->light_get_transmittance_bias(base); + if (projector.is_valid()) { + Rect2 rect = storage->decal_atlas_get_texture_rect(projector); - Rect2 rect = light_instance_get_shadow_atlas_rect(li, p_shadow_atlas); + if (type == RS::LIGHT_SPOT) { + light_data.projector_rect[0] = rect.position.x; + light_data.projector_rect[1] = rect.position.y + rect.size.height; //flip because shadow is flipped + light_data.projector_rect[2] = rect.size.width; + light_data.projector_rect[3] = -rect.size.height; + } else { + light_data.projector_rect[0] = rect.position.x; + light_data.projector_rect[1] = rect.position.y; + light_data.projector_rect[2] = rect.size.width; + light_data.projector_rect[3] = rect.size.height * 0.5; //used by dp, so needs to be half + } + } else { + light_data.projector_rect[0] = 0; + light_data.projector_rect[1] = 0; + light_data.projector_rect[2] = 0; + light_data.projector_rect[3] = 0; + } - light_data.atlas_rect[0] = rect.position.x; - light_data.atlas_rect[1] = rect.position.y; - light_data.atlas_rect[2] = rect.size.width; - light_data.atlas_rect[3] = rect.size.height; + if (shadow_atlas && shadow_atlas->shadow_owners.has(li->self)) { + // fill in the shadow information - light_data.soft_shadow_scale = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BLUR); - light_data.shadow_volumetric_fog_fade = 1.0 / storage->light_get_shadow_volumetric_fog_fade(base); + light_data.shadow_enabled = true; - if (type == RS::LIGHT_OMNI) { - light_data.atlas_rect[3] *= 0.5; //one paraboloid on top of another - Transform proj = (p_camera_inverse_transform * light_transform).inverse(); + if (type == RS::LIGHT_SPOT) { + light_data.shadow_bias = (storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * radius / 10.0); + float shadow_texel_size = Math::tan(Math::deg2rad(spot_angle)) * radius * 2.0; + shadow_texel_size *= light_instance_get_shadow_texel_size(li->self, p_shadow_atlas); - RendererStorageRD::store_transform(proj, light_data.shadow_matrix); + light_data.shadow_normal_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * shadow_texel_size; - if (size > 0.0) { - light_data.soft_shadow_size = size; - } else { - light_data.soft_shadow_size = 0.0; - light_data.soft_shadow_scale *= shadows_quality_radius_get(); // Only use quality radius for PCF - } + } else { //omni + light_data.shadow_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * radius / 10.0; + float shadow_texel_size = light_instance_get_shadow_texel_size(li->self, p_shadow_atlas); + light_data.shadow_normal_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * shadow_texel_size * 2.0; // applied in -1 .. 1 space + } - } else if (type == RS::LIGHT_SPOT) { - Transform modelview = (p_camera_inverse_transform * light_transform).inverse(); - CameraMatrix bias; - bias.set_light_bias(); + light_data.transmittance_bias = storage->light_get_transmittance_bias(base); - CameraMatrix shadow_mtx = bias * light_instance_get_shadow_camera(li, 0) * modelview; - RendererStorageRD::store_camera(shadow_mtx, light_data.shadow_matrix); + Rect2 rect = light_instance_get_shadow_atlas_rect(li->self, p_shadow_atlas); - if (size > 0.0) { - CameraMatrix cm = light_instance_get_shadow_camera(li, 0); - float half_np = cm.get_z_near() * Math::tan(Math::deg2rad(spot_angle)); - light_data.soft_shadow_size = (size * 0.5 / radius) / (half_np / cm.get_z_near()) * rect.size.width; - } else { - light_data.soft_shadow_size = 0.0; - light_data.soft_shadow_scale *= shadows_quality_radius_get(); // Only use quality radius for PCF - } - } + light_data.atlas_rect[0] = rect.position.x; + light_data.atlas_rect[1] = rect.position.y; + light_data.atlas_rect[2] = rect.size.width; + light_data.atlas_rect[3] = rect.size.height; + + light_data.soft_shadow_scale = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BLUR); + light_data.shadow_volumetric_fog_fade = 1.0 / storage->light_get_shadow_volumetric_fog_fade(base); + + if (type == RS::LIGHT_OMNI) { + light_data.atlas_rect[3] *= 0.5; //one paraboloid on top of another + Transform proj = (inverse_transform * light_transform).inverse(); + + RendererStorageRD::store_transform(proj, light_data.shadow_matrix); + + if (size > 0.0) { + light_data.soft_shadow_size = size; } else { - light_data.shadow_color_enabled[3] = 0; + light_data.soft_shadow_size = 0.0; + light_data.soft_shadow_scale *= shadows_quality_radius_get(); // Only use quality radius for PCF } - light_instance_set_index(li, light_count); + } else if (type == RS::LIGHT_SPOT) { + Transform modelview = (inverse_transform * light_transform).inverse(); + CameraMatrix bias; + bias.set_light_bias(); - cluster.builder.add_light(type == RS::LIGHT_SPOT ? LightClusterBuilder::LIGHT_TYPE_SPOT : LightClusterBuilder::LIGHT_TYPE_OMNI, light_transform, radius, spot_angle); + CameraMatrix shadow_mtx = bias * li->shadow_transform[0].camera * modelview; + RendererStorageRD::store_camera(shadow_mtx, light_data.shadow_matrix); - light_count++; - r_positional_light_count++; - } break; + if (size > 0.0) { + CameraMatrix cm = li->shadow_transform[0].camera; + float half_np = cm.get_z_near() * Math::tan(Math::deg2rad(spot_angle)); + light_data.soft_shadow_size = (size * 0.5 / radius) / (half_np / cm.get_z_near()) * rect.size.width; + } else { + light_data.soft_shadow_size = 0.0; + light_data.soft_shadow_scale *= shadows_quality_radius_get(); // Only use quality radius for PCF + } + } + } else { + light_data.shadow_enabled = false; } - light_instance_set_render_pass(li, RSG::rasterizer->get_frame_number()); + li->light_index = index; + + current_cluster_builder->add_light(type == RS::LIGHT_SPOT ? ClusterBuilderRD::LIGHT_TYPE_SPOT : ClusterBuilderRD::LIGHT_TYPE_OMNI, light_transform, radius, spot_angle); - //update UBO for forward rendering, blit to texture for clustered + r_positional_light_count++; } - if (light_count) { - RD::get_singleton()->buffer_update(cluster.light_buffer, 0, sizeof(Cluster::LightData) * light_count, cluster.lights, true); + //update without barriers + if (cluster.omni_light_count) { + RD::get_singleton()->buffer_update(cluster.omni_light_buffer, 0, sizeof(Cluster::LightData) * cluster.omni_light_count, cluster.omni_lights, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + } + + if (cluster.spot_light_count) { + RD::get_singleton()->buffer_update(cluster.spot_light_buffer, 0, sizeof(Cluster::LightData) * cluster.spot_light_count, cluster.spot_lights, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } if (r_directional_light_count) { - RD::get_singleton()->buffer_update(cluster.directional_light_buffer, 0, sizeof(Cluster::DirectionalLightData) * r_directional_light_count, cluster.directional_lights, true); + RD::get_singleton()->buffer_update(cluster.directional_light_buffer, 0, sizeof(Cluster::DirectionalLightData) * r_directional_light_count, cluster.directional_lights, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } } @@ -6430,18 +6615,26 @@ void RendererSceneRenderRD::_setup_decals(const PagedArray<RID> &p_decals, const uv_xform.basis.scale(Vector3(2.0, 1.0, 2.0)); uv_xform.origin = Vector3(-1.0, 0.0, -1.0); - uint32_t decal_count = MIN((uint32_t)p_decals.size(), cluster.max_decals); - int idx = 0; + uint32_t decal_count = p_decals.size(); + + cluster.decal_count = 0; + for (uint32_t i = 0; i < decal_count; i++) { - RID di = p_decals[i]; - RID decal = decal_instance_get_base(di); + if (cluster.decal_count == cluster.max_decals) { + break; + } - Transform xform = decal_instance_get_transform(di); + DecalInstance *di = decal_instance_owner.getornull(p_decals[i]); + if (!di) { + continue; + } + RID decal = di->decal; - float fade = 1.0; + Transform xform = di->transform; + + real_t distance = -p_camera_inverse_xform.xform(xform.origin).z; if (storage->decal_is_distance_fade_enabled(decal)) { - real_t distance = -p_camera_inverse_xform.xform(xform.origin).z; float fade_begin = storage->decal_get_distance_fade_begin(decal); float fade_length = storage->decal_get_distance_fade_length(decal); @@ -6449,18 +6642,43 @@ void RendererSceneRenderRD::_setup_decals(const PagedArray<RID> &p_decals, const if (distance > fade_begin + fade_length) { continue; // do not use this decal, its invisible } + } + } + cluster.decal_sort[cluster.decal_count].instance = di; + cluster.decal_sort[cluster.decal_count].depth = distance; + cluster.decal_count++; + } + + if (cluster.decal_count > 0) { + SortArray<Cluster::InstanceSort<DecalInstance>> sort_array; + sort_array.sort(cluster.decal_sort, cluster.decal_count); + } + + for (uint32_t i = 0; i < cluster.decal_count; i++) { + DecalInstance *di = cluster.decal_sort[i].instance; + RID decal = di->decal; + + Transform xform = di->transform; + float fade = 1.0; + + if (storage->decal_is_distance_fade_enabled(decal)) { + real_t distance = -p_camera_inverse_xform.xform(xform.origin).z; + float fade_begin = storage->decal_get_distance_fade_begin(decal); + float fade_length = storage->decal_get_distance_fade_length(decal); + + if (distance > fade_begin) { fade = 1.0 - (distance - fade_begin) / fade_length; } } - Cluster::DecalData &dd = cluster.decals[idx]; + Cluster::DecalData &dd = cluster.decals[i]; Vector3 decal_extents = storage->decal_get_extents(decal); Transform scale_xform; scale_xform.basis.scale(Vector3(decal_extents.x, decal_extents.y, decal_extents.z)); - Transform to_decal_xform = (p_camera_inverse_xform * decal_instance_get_transform(di) * scale_xform * uv_xform).affine_inverse(); + Transform to_decal_xform = (p_camera_inverse_xform * di->transform * scale_xform * uv_xform).affine_inverse(); RendererStorageRD::store_transform(to_decal_xform, dd.xform); Vector3 normal = xform.basis.get_axis(Vector3::AXIS_Y).normalized(); @@ -6545,19 +6763,18 @@ void RendererSceneRenderRD::_setup_decals(const PagedArray<RID> &p_decals, const dd.upper_fade = storage->decal_get_upper_fade(decal); dd.lower_fade = storage->decal_get_lower_fade(decal); - cluster.builder.add_decal(xform, decal_extents); - - idx++; + current_cluster_builder->add_box(ClusterBuilderRD::BOX_TYPE_DECAL, xform, decal_extents); } - if (idx > 0) { - RD::get_singleton()->buffer_update(cluster.decal_buffer, 0, sizeof(Cluster::DecalData) * idx, cluster.decals, true); + if (cluster.decal_count > 0) { + RD::get_singleton()->buffer_update(cluster.decal_buffer, 0, sizeof(Cluster::DecalData) * cluster.decal_count, cluster.decals, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } } void RendererSceneRenderRD::_volumetric_fog_erase(RenderBuffers *rb) { ERR_FAIL_COND(!rb->volumetric_fog); + RD::get_singleton()->free(rb->volumetric_fog->prev_light_density_map); RD::get_singleton()->free(rb->volumetric_fog->light_density_map); RD::get_singleton()->free(rb->volumetric_fog->fog_map); @@ -6579,49 +6796,6 @@ void RendererSceneRenderRD::_volumetric_fog_erase(RenderBuffers *rb) { rb->volumetric_fog = nullptr; } -void RendererSceneRenderRD::_allocate_shadow_shrink_stages(RID p_base, int p_base_size, Vector<ShadowShrinkStage> &shrink_stages, uint32_t p_target_size) { - //create fog mipmaps - uint32_t fog_texture_size = p_target_size; - uint32_t base_texture_size = p_base_size; - - ShadowShrinkStage first; - first.size = base_texture_size; - first.texture = p_base; - shrink_stages.push_back(first); //put depth first in case we dont find smaller ones - - while (fog_texture_size < base_texture_size) { - base_texture_size = MAX(base_texture_size / 8, fog_texture_size); - - ShadowShrinkStage s; - s.size = base_texture_size; - - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R32_SFLOAT; - tf.width = base_texture_size; - tf.height = base_texture_size; - tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT; - - if (base_texture_size == fog_texture_size) { - s.filter_texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); - tf.usage_bits |= RD::TEXTURE_USAGE_SAMPLING_BIT; - } - - s.texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); - - shrink_stages.push_back(s); - } -} - -void RendererSceneRenderRD::_clear_shadow_shrink_stages(Vector<ShadowShrinkStage> &shrink_stages) { - for (int i = 1; i < shrink_stages.size(); i++) { - RD::get_singleton()->free(shrink_stages[i].texture); - if (shrink_stages[i].filter_texture.is_valid()) { - RD::get_singleton()->free(shrink_stages[i].filter_texture); - } - } - shrink_stages.clear(); -} - void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_environment, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_shadow_atlas, int p_directional_light_count, bool p_use_directional_shadows, int p_positional_light_count, int p_gi_probe_count) { RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(!rb); @@ -6644,6 +6818,8 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e return; } + RENDER_TIMESTAMP(">Volumetric Fog"); + if (env && env->volumetric_fog_enabled && !rb->volumetric_fog) { //required volumetric fog but not existing, create rb->volumetric_fog = memnew(VolumetricFog); @@ -6657,11 +6833,16 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e tf.height = target_height; tf.depth = volumetric_fog_depth; tf.texture_type = RD::TEXTURE_TYPE_3D; - tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT; + tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; rb->volumetric_fog->light_density_map = RD::get_singleton()->texture_create(tf, RD::TextureView()); - tf.usage_bits |= RD::TEXTURE_USAGE_SAMPLING_BIT; + tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + + rb->volumetric_fog->prev_light_density_map = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->texture_clear(rb->volumetric_fog->prev_light_density_map, Color(0, 0, 0, 0), 0, 1, 0, 1); + + tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; rb->volumetric_fog->fog_map = RD::get_singleton()->texture_create(tf, RD::TextureView()); _render_buffers_uniform_set_changed(p_render_buffers); @@ -6678,162 +6859,6 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e rb->volumetric_fog->sky_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sky_shader.default_shader_rd, SKY_SET_FOG); } - //update directional shadow - - if (p_use_directional_shadows) { - if (directional_shadow.shrink_stages.is_empty()) { - if (rb->volumetric_fog->uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(rb->volumetric_fog->uniform_set)) { - //invalidate uniform set, we will need a new one - RD::get_singleton()->free(rb->volumetric_fog->uniform_set); - rb->volumetric_fog->uniform_set = RID(); - } - _allocate_shadow_shrink_stages(directional_shadow.depth, directional_shadow.size, directional_shadow.shrink_stages, volumetric_fog_directional_shadow_shrink); - } - - if (directional_shadow.shrink_stages.size() > 1) { - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - for (int i = 1; i < directional_shadow.shrink_stages.size(); i++) { - int32_t src_size = directional_shadow.shrink_stages[i - 1].size; - int32_t dst_size = directional_shadow.shrink_stages[i].size; - Rect2i r(0, 0, src_size, src_size); - int32_t shrink_limit = 8 / (src_size / dst_size); - - storage->get_effects()->reduce_shadow(directional_shadow.shrink_stages[i - 1].texture, directional_shadow.shrink_stages[i].texture, Size2i(src_size, src_size), r, shrink_limit, compute_list); - RD::get_singleton()->compute_list_add_barrier(compute_list); - if (env->volumetric_fog_shadow_filter != RS::ENV_VOLUMETRIC_FOG_SHADOW_FILTER_DISABLED && directional_shadow.shrink_stages[i].filter_texture.is_valid()) { - Rect2i rf(0, 0, dst_size, dst_size); - storage->get_effects()->filter_shadow(directional_shadow.shrink_stages[i].texture, directional_shadow.shrink_stages[i].filter_texture, Size2i(dst_size, dst_size), rf, env->volumetric_fog_shadow_filter, compute_list); - } - } - RD::get_singleton()->compute_list_end(); - } - } - - ShadowAtlas *shadow_atlas = shadow_atlas_owner.getornull(p_shadow_atlas); - - if (shadow_atlas) { - //shrink shadows that need to be shrunk - - bool force_shrink_shadows = false; - - if (shadow_atlas->shrink_stages.is_empty()) { - if (rb->volumetric_fog->uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(rb->volumetric_fog->uniform_set)) { - //invalidate uniform set, we will need a new one - RD::get_singleton()->free(rb->volumetric_fog->uniform_set); - rb->volumetric_fog->uniform_set = RID(); - } - _allocate_shadow_shrink_stages(shadow_atlas->depth, shadow_atlas->size, shadow_atlas->shrink_stages, volumetric_fog_positional_shadow_shrink); - force_shrink_shadows = true; - } - - if (rb->volumetric_fog->last_shadow_filter != env->volumetric_fog_shadow_filter) { - //if shadow filter changed, invalidate caches - rb->volumetric_fog->last_shadow_filter = env->volumetric_fog_shadow_filter; - force_shrink_shadows = true; - } - - cluster.lights_shadow_rect_cache_count = 0; - - for (int i = 0; i < p_positional_light_count; i++) { - if (cluster.lights[i].shadow_color_enabled[3] > 127) { - RID li = cluster.lights_instances[i]; - - ERR_CONTINUE(!shadow_atlas->shadow_owners.has(li)); - - uint32_t key = shadow_atlas->shadow_owners[li]; - - uint32_t quadrant = (key >> ShadowAtlas::QUADRANT_SHIFT) & 0x3; - uint32_t shadow = key & ShadowAtlas::SHADOW_INDEX_MASK; - - ERR_CONTINUE((int)shadow >= shadow_atlas->quadrants[quadrant].shadows.size()); - - ShadowAtlas::Quadrant::Shadow &s = shadow_atlas->quadrants[quadrant].shadows.write[shadow]; - - if (!force_shrink_shadows && s.fog_version == s.version) { - continue; //do not update, no need - } - - s.fog_version = s.version; - - uint32_t quadrant_size = shadow_atlas->size >> 1; - - Rect2i atlas_rect; - - atlas_rect.position.x = (quadrant & 1) * quadrant_size; - atlas_rect.position.y = (quadrant >> 1) * quadrant_size; - - uint32_t shadow_size = (quadrant_size / shadow_atlas->quadrants[quadrant].subdivision); - atlas_rect.position.x += (shadow % shadow_atlas->quadrants[quadrant].subdivision) * shadow_size; - atlas_rect.position.y += (shadow / shadow_atlas->quadrants[quadrant].subdivision) * shadow_size; - - atlas_rect.size.x = shadow_size; - atlas_rect.size.y = shadow_size; - - cluster.lights_shadow_rect_cache[cluster.lights_shadow_rect_cache_count] = atlas_rect; - - cluster.lights_shadow_rect_cache_count++; - - if (cluster.lights_shadow_rect_cache_count == cluster.max_lights) { - break; //light limit reached - } - } - } - - if (cluster.lights_shadow_rect_cache_count > 0) { - //there are shadows to be shrunk, try to do them in parallel - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - - for (int i = 1; i < shadow_atlas->shrink_stages.size(); i++) { - int32_t base_size = shadow_atlas->shrink_stages[0].size; - int32_t src_size = shadow_atlas->shrink_stages[i - 1].size; - int32_t dst_size = shadow_atlas->shrink_stages[i].size; - - uint32_t rect_divisor = base_size / src_size; - - int32_t shrink_limit = 8 / (src_size / dst_size); - - //shrink in parallel for more performance - for (uint32_t j = 0; j < cluster.lights_shadow_rect_cache_count; j++) { - Rect2i src_rect = cluster.lights_shadow_rect_cache[j]; - - src_rect.position /= rect_divisor; - src_rect.size /= rect_divisor; - - storage->get_effects()->reduce_shadow(shadow_atlas->shrink_stages[i - 1].texture, shadow_atlas->shrink_stages[i].texture, Size2i(src_size, src_size), src_rect, shrink_limit, compute_list); - } - - RD::get_singleton()->compute_list_add_barrier(compute_list); - - if (env->volumetric_fog_shadow_filter != RS::ENV_VOLUMETRIC_FOG_SHADOW_FILTER_DISABLED && shadow_atlas->shrink_stages[i].filter_texture.is_valid()) { - uint32_t filter_divisor = base_size / dst_size; - - //filter in parallel for more performance - for (uint32_t j = 0; j < cluster.lights_shadow_rect_cache_count; j++) { - Rect2i dst_rect = cluster.lights_shadow_rect_cache[j]; - - dst_rect.position /= filter_divisor; - dst_rect.size /= filter_divisor; - - storage->get_effects()->filter_shadow(shadow_atlas->shrink_stages[i].texture, shadow_atlas->shrink_stages[i].filter_texture, Size2i(dst_size, dst_size), dst_rect, env->volumetric_fog_shadow_filter, compute_list, true, false); - } - - RD::get_singleton()->compute_list_add_barrier(compute_list); - - for (uint32_t j = 0; j < cluster.lights_shadow_rect_cache_count; j++) { - Rect2i dst_rect = cluster.lights_shadow_rect_cache[j]; - - dst_rect.position /= filter_divisor; - dst_rect.size /= filter_divisor; - - storage->get_effects()->filter_shadow(shadow_atlas->shrink_stages[i].texture, shadow_atlas->shrink_stages[i].filter_texture, Size2i(dst_size, dst_size), dst_rect, env->volumetric_fog_shadow_filter, compute_list, false, true); - } - } - } - - RD::get_singleton()->compute_list_end(); - } - } - //update volumetric fog if (rb->volumetric_fog->uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(rb->volumetric_fog->uniform_set)) { @@ -6845,10 +6870,11 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.binding = 1; - if (shadow_atlas == nullptr || shadow_atlas->shrink_stages.size() == 0) { + ShadowAtlas *shadow_atlas = shadow_atlas_owner.getornull(p_shadow_atlas); + if (shadow_atlas == nullptr || shadow_atlas->depth.is_null()) { u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK)); } else { - u.ids.push_back(shadow_atlas->shrink_stages[shadow_atlas->shrink_stages.size() - 1].texture); + u.ids.push_back(shadow_atlas->depth); } uniforms.push_back(u); @@ -6858,10 +6884,10 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.binding = 2; - if (directional_shadow.shrink_stages.size() == 0) { - u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK)); + if (directional_shadow.depth.is_valid()) { + u.ids.push_back(directional_shadow.depth); } else { - u.ids.push_back(directional_shadow.shrink_stages[directional_shadow.shrink_stages.size() - 1].texture); + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK)); } uniforms.push_back(u); } @@ -6870,23 +6896,22 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.binding = 3; - u.ids.push_back(get_positional_light_buffer()); + u.ids.push_back(get_omni_light_buffer()); uniforms.push_back(u); } - { RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.binding = 4; - u.ids.push_back(get_directional_light_buffer()); + u.ids.push_back(get_spot_light_buffer()); uniforms.push_back(u); } { RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; u.binding = 5; - u.ids.push_back(get_cluster_builder_texture()); + u.ids.push_back(get_directional_light_buffer()); uniforms.push_back(u); } @@ -6894,7 +6919,7 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.binding = 6; - u.ids.push_back(get_cluster_builder_indices_buffer()); + u.ids.push_back(rb->cluster_builder->get_cluster_buffer()); uniforms.push_back(u); } @@ -6954,6 +6979,20 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); uniforms.push_back(u); } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 14; + u.ids.push_back(volumetric_fog.params_ubo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 15; + u.ids.push_back(rb->volumetric_fog->prev_light_density_map); + uniforms.push_back(u); + } rb->volumetric_fog->uniform_set = RD::get_singleton()->uniform_set_create(uniforms, volumetric_fog.shader.version_get_shader(volumetric_fog.shader_version, 0), 0); @@ -6999,7 +7038,7 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e rb->volumetric_fog->length = env->volumetric_fog_length; rb->volumetric_fog->spread = env->volumetric_fog_detail_spread; - VolumetricFogShader::PushConstant push_constant; + VolumetricFogShader::ParamsUBO params; Vector2 frustum_near_size = p_cam_projection.get_viewport_half_extents(); Vector2 frustum_far_size = p_cam_projection.get_far_plane_half_extents(); @@ -7015,51 +7054,78 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e fog_near_size = Vector2(); } - push_constant.fog_frustum_size_begin[0] = fog_near_size.x; - push_constant.fog_frustum_size_begin[1] = fog_near_size.y; + params.fog_frustum_size_begin[0] = fog_near_size.x; + params.fog_frustum_size_begin[1] = fog_near_size.y; - push_constant.fog_frustum_size_end[0] = fog_far_size.x; - push_constant.fog_frustum_size_end[1] = fog_far_size.y; + params.fog_frustum_size_end[0] = fog_far_size.x; + params.fog_frustum_size_end[1] = fog_far_size.y; - push_constant.z_near = z_near; - push_constant.z_far = z_far; + params.z_near = z_near; + params.z_far = z_far; - push_constant.fog_frustum_end = fog_end; + params.fog_frustum_end = fog_end; - push_constant.fog_volume_size[0] = rb->volumetric_fog->width; - push_constant.fog_volume_size[1] = rb->volumetric_fog->height; - push_constant.fog_volume_size[2] = rb->volumetric_fog->depth; + params.fog_volume_size[0] = rb->volumetric_fog->width; + params.fog_volume_size[1] = rb->volumetric_fog->height; + params.fog_volume_size[2] = rb->volumetric_fog->depth; - push_constant.directional_light_count = p_directional_light_count; + params.directional_light_count = p_directional_light_count; Color light = env->volumetric_fog_light.to_linear(); - push_constant.light_energy[0] = light.r * env->volumetric_fog_light_energy; - push_constant.light_energy[1] = light.g * env->volumetric_fog_light_energy; - push_constant.light_energy[2] = light.b * env->volumetric_fog_light_energy; - push_constant.base_density = env->volumetric_fog_density; + params.light_energy[0] = light.r * env->volumetric_fog_light_energy; + params.light_energy[1] = light.g * env->volumetric_fog_light_energy; + params.light_energy[2] = light.b * env->volumetric_fog_light_energy; + params.base_density = env->volumetric_fog_density; + + params.detail_spread = env->volumetric_fog_detail_spread; + params.gi_inject = env->volumetric_fog_gi_inject; + + params.cam_rotation[0] = p_cam_transform.basis[0][0]; + params.cam_rotation[1] = p_cam_transform.basis[1][0]; + params.cam_rotation[2] = p_cam_transform.basis[2][0]; + params.cam_rotation[3] = 0; + params.cam_rotation[4] = p_cam_transform.basis[0][1]; + params.cam_rotation[5] = p_cam_transform.basis[1][1]; + params.cam_rotation[6] = p_cam_transform.basis[2][1]; + params.cam_rotation[7] = 0; + params.cam_rotation[8] = p_cam_transform.basis[0][2]; + params.cam_rotation[9] = p_cam_transform.basis[1][2]; + params.cam_rotation[10] = p_cam_transform.basis[2][2]; + params.cam_rotation[11] = 0; + params.filter_axis = 0; + params.max_gi_probes = env->volumetric_fog_gi_inject > 0.001 ? p_gi_probe_count : 0; + params.temporal_frame = RSG::rasterizer->get_frame_number() % VolumetricFog::MAX_TEMPORAL_FRAMES; + + Transform to_prev_cam_view = rb->volumetric_fog->prev_cam_transform.affine_inverse() * p_cam_transform; + storage->store_transform(to_prev_cam_view, params.to_prev_view); + + params.use_temporal_reprojection = env->volumetric_fog_temporal_reprojection; + params.temporal_blend = env->volumetric_fog_temporal_reprojection_amount; - push_constant.detail_spread = env->volumetric_fog_detail_spread; - push_constant.gi_inject = env->volumetric_fog_gi_inject; + { + uint32_t cluster_size = rb->cluster_builder->get_cluster_size(); + params.cluster_shift = get_shift_from_power_of_2(cluster_size); - push_constant.cam_rotation[0] = p_cam_transform.basis[0][0]; - push_constant.cam_rotation[1] = p_cam_transform.basis[1][0]; - push_constant.cam_rotation[2] = p_cam_transform.basis[2][0]; - push_constant.cam_rotation[3] = 0; - push_constant.cam_rotation[4] = p_cam_transform.basis[0][1]; - push_constant.cam_rotation[5] = p_cam_transform.basis[1][1]; - push_constant.cam_rotation[6] = p_cam_transform.basis[2][1]; - push_constant.cam_rotation[7] = 0; - push_constant.cam_rotation[8] = p_cam_transform.basis[0][2]; - push_constant.cam_rotation[9] = p_cam_transform.basis[1][2]; - push_constant.cam_rotation[10] = p_cam_transform.basis[2][2]; - push_constant.cam_rotation[11] = 0; - push_constant.filter_axis = 0; - push_constant.max_gi_probes = env->volumetric_fog_gi_inject > 0.001 ? p_gi_probe_count : 0; + uint32_t cluster_screen_width = (rb->width - 1) / cluster_size + 1; + uint32_t cluster_screen_height = (rb->height - 1) / cluster_size + 1; + params.cluster_type_size = cluster_screen_width * cluster_screen_height * (32 + 32); + params.cluster_width = cluster_screen_width; + params.max_cluster_element_count_div_32 = max_cluster_elements / 32; + + params.screen_size[0] = rb->width; + params.screen_size[1] = rb->height; + } /* Vector2 dssize = directional_shadow_get_size(); push_constant.directional_shadow_pixel_size[0] = 1.0 / dssize.x; push_constant.directional_shadow_pixel_size[1] = 1.0 / dssize.y; */ + + RD::get_singleton()->draw_command_begin_label("Render Volumetric Fog"); + + RENDER_TIMESTAMP("Render Fog"); + RD::get_singleton()->buffer_update(volumetric_fog.params_ubo, 0, sizeof(VolumetricFogShader::ParamsUBO), ¶ms, RD::BARRIER_MASK_COMPUTE); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); bool use_filter = volumetric_fog_filter_active; @@ -7067,93 +7133,212 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[using_sdfgi ? VOLUMETRIC_FOG_SHADER_DENSITY_WITH_SDFGI : VOLUMETRIC_FOG_SHADER_DENSITY]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set, 0); + if (using_sdfgi) { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->sdfgi_uniform_set, 1); } - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VolumetricFogShader::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth); - RD::get_singleton()->compute_list_add_barrier(compute_list); + RD::get_singleton()->draw_command_end_label(); + + RD::get_singleton()->compute_list_end(); + + RD::get_singleton()->texture_copy(rb->volumetric_fog->light_density_map, rb->volumetric_fog->prev_light_density_map, Vector3(0, 0, 0), Vector3(0, 0, 0), Vector3(rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth), 0, 0, 0, 0); + + compute_list = RD::get_singleton()->compute_list_begin(); if (use_filter) { + RD::get_singleton()->draw_command_begin_label("Filter Fog"); + + RENDER_TIMESTAMP("Filter Fog"); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[VOLUMETRIC_FOG_SHADER_FILTER]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VolumetricFogShader::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth); - RD::get_singleton()->compute_list_add_barrier(compute_list); + RD::get_singleton()->compute_list_end(); + //need restart for buffer update - push_constant.filter_axis = 1; + params.filter_axis = 1; + RD::get_singleton()->buffer_update(volumetric_fog.params_ubo, 0, sizeof(VolumetricFogShader::ParamsUBO), ¶ms); + compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[VOLUMETRIC_FOG_SHADER_FILTER]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set2, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VolumetricFogShader::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth, 8, 8, 1); + if (using_sdfgi) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->sdfgi_uniform_set, 1); + } + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth); RD::get_singleton()->compute_list_add_barrier(compute_list); + RD::get_singleton()->draw_command_end_label(); } + RENDER_TIMESTAMP("Integrate Fog"); + RD::get_singleton()->draw_command_begin_label("Integrate Fog"); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[VOLUMETRIC_FOG_SHADER_FOG]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VolumetricFogShader::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, 1); - RD::get_singleton()->compute_list_end(); + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_RASTER); + + RENDER_TIMESTAMP("<Volumetric Fog"); + RD::get_singleton()->draw_command_end_label(); + + rb->volumetric_fog->prev_cam_transform = p_cam_transform; } -void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<InstanceBase *> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold) { - Color clear_color; - if (p_render_buffers.is_valid()) { - RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); - ERR_FAIL_COND(!rb); - clear_color = storage->render_target_get_clear_request_color(rb->render_target); - } else { - clear_color = storage->get_default_clear_color(); +uint32_t RendererSceneRenderRD::_get_render_state_directional_light_count() const { + return render_state.directional_light_count; +} + +bool RendererSceneRenderRD::_needs_post_prepass_render(bool p_use_gi) { + if (render_state.render_buffers.is_valid()) { + RenderBuffers *rb = render_buffers_owner.getornull(render_state.render_buffers); + if (rb->sdfgi != nullptr) { + return true; + } } + return false; +} - //assign render indices to giprobes - for (uint32_t i = 0; i < (uint32_t)p_gi_probes.size(); i++) { - GIProbeInstance *giprobe_inst = gi_probe_instance_owner.getornull(p_gi_probes[i]); - if (giprobe_inst) { - giprobe_inst->render_index = i; +void RendererSceneRenderRD::_post_prepass_render(bool p_use_gi) { + if (render_state.render_buffers.is_valid()) { + if (p_use_gi) { + _sdfgi_update_probes(render_state.render_buffers, render_state.environment); } } +} - const PagedArray<RID> *lights = &p_lights; - const PagedArray<RID> *reflections = &p_reflection_probes; - const PagedArray<RID> *gi_probes = &p_gi_probes; +void RendererSceneRenderRD::_pre_resolve_render(bool p_use_gi) { + if (render_state.render_buffers.is_valid()) { + if (p_use_gi) { + RD::get_singleton()->compute_list_end(); + } + } +} - PagedArray<RID> empty; +void RendererSceneRenderRD::_pre_opaque_render(bool p_use_ssao, bool p_use_gi, RID p_normal_roughness_buffer, RID p_gi_probe_buffer) { + // Render shadows while GI is rendering, due to how barriers are handled, this should happen at the same time - if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_UNSHADED) { - lights = ∅ - reflections = ∅ - gi_probes = ∅ + if (render_state.render_buffers.is_valid() && p_use_gi) { + _sdfgi_store_probes(render_state.render_buffers); + } + + render_state.cube_shadows.clear(); + render_state.shadows.clear(); + render_state.directional_shadows.clear(); + + Plane camera_plane(render_state.cam_transform.origin, -render_state.cam_transform.basis.get_axis(Vector3::AXIS_Z)); + float lod_distance_multiplier = render_state.cam_projection.get_lod_multiplier(); + + { + for (int i = 0; i < render_state.render_shadow_count; i++) { + LightInstance *li = light_instance_owner.getornull(render_state.render_shadows[i].light); + + if (storage->light_get_type(li->light) == RS::LIGHT_DIRECTIONAL) { + render_state.directional_shadows.push_back(i); + } else if (storage->light_get_type(li->light) == RS::LIGHT_OMNI && storage->light_omni_get_shadow_mode(li->light) == RS::LIGHT_OMNI_SHADOW_CUBE) { + render_state.cube_shadows.push_back(i); + } else { + render_state.shadows.push_back(i); + } + } + + //cube shadows are rendered in their own way + for (uint32_t i = 0; i < render_state.cube_shadows.size(); i++) { + _render_shadow_pass(render_state.render_shadows[render_state.cube_shadows[i]].light, render_state.shadow_atlas, render_state.render_shadows[render_state.cube_shadows[i]].pass, render_state.render_shadows[render_state.cube_shadows[i]].instances, camera_plane, lod_distance_multiplier, render_state.screen_lod_threshold, true, true, true); + } + + if (render_state.directional_shadows.size()) { + //open the pass for directional shadows + _update_directional_shadow_atlas(); + RD::get_singleton()->draw_list_begin(directional_shadow.fb, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_CONTINUE); + RD::get_singleton()->draw_list_end(); + } + } + + // Render GI + + bool render_shadows = render_state.directional_shadows.size() || render_state.shadows.size(); + bool render_gi = render_state.render_buffers.is_valid() && p_use_gi; + + if (render_shadows && render_gi) { + RENDER_TIMESTAMP("Render GI + Render Shadows (parallel)"); + } else if (render_shadows) { + RENDER_TIMESTAMP("Render Shadows"); + } else if (render_gi) { + RENDER_TIMESTAMP("Render GI"); + } + + //prepare shadow rendering + if (render_shadows) { + _render_shadow_begin(); + + //render directional shadows + for (uint32_t i = 0; i < render_state.directional_shadows.size(); i++) { + _render_shadow_pass(render_state.render_shadows[render_state.directional_shadows[i]].light, render_state.shadow_atlas, render_state.render_shadows[render_state.directional_shadows[i]].pass, render_state.render_shadows[render_state.directional_shadows[i]].instances, camera_plane, lod_distance_multiplier, render_state.screen_lod_threshold, false, i == render_state.directional_shadows.size() - 1, false); + } + //render positional shadows + for (uint32_t i = 0; i < render_state.shadows.size(); i++) { + _render_shadow_pass(render_state.render_shadows[render_state.shadows[i]].light, render_state.shadow_atlas, render_state.render_shadows[render_state.shadows[i]].pass, render_state.render_shadows[render_state.shadows[i]].instances, camera_plane, lod_distance_multiplier, render_state.screen_lod_threshold, i == 0, i == render_state.shadows.size() - 1, true); + } + + _render_shadow_process(); + } + + //start GI + if (render_gi) { + _process_gi(render_state.render_buffers, p_normal_roughness_buffer, p_gi_probe_buffer, render_state.environment, render_state.cam_projection, render_state.cam_transform, *render_state.gi_probes); + } + + //Do shadow rendering (in parallel with GI) + if (render_shadows) { + _render_shadow_end(RD::BARRIER_MASK_NO_BARRIER); + } + + if (render_gi) { + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); //use a later barrier + } + + if (render_state.render_buffers.is_valid()) { + if (p_use_ssao) { + _process_ssao(render_state.render_buffers, render_state.environment, p_normal_roughness_buffer, render_state.cam_projection); + } } - cluster.builder.begin(p_cam_transform.affine_inverse(), p_cam_projection); //prepare cluster + //full barrier here, we need raster, transfer and compute and it depends from the previous work + RD::get_singleton()->barrier(RD::BARRIER_MASK_ALL, RD::BARRIER_MASK_ALL); + + if (current_cluster_builder) { + current_cluster_builder->begin(render_state.cam_transform, render_state.cam_projection, !render_state.reflection_probe.is_valid()); + } bool using_shadows = true; - if (p_reflection_probe.is_valid()) { - if (!storage->reflection_probe_renders_shadows(reflection_probe_instance_get_probe(p_reflection_probe))) { + if (render_state.reflection_probe.is_valid()) { + if (!storage->reflection_probe_renders_shadows(reflection_probe_instance_get_probe(render_state.reflection_probe))) { using_shadows = false; } } else { //do not render reflections when rendering a reflection probe - _setup_reflections(*reflections, p_cam_transform.affine_inverse(), p_environment); + _setup_reflections(*render_state.reflection_probes, render_state.cam_transform.affine_inverse(), render_state.environment); } uint32_t directional_light_count = 0; uint32_t positional_light_count = 0; - _setup_lights(*lights, p_cam_transform.affine_inverse(), p_shadow_atlas, using_shadows, directional_light_count, positional_light_count); - _setup_decals(p_decals, p_cam_transform.affine_inverse()); - cluster.builder.bake_cluster(); //bake to cluster + _setup_lights(*render_state.lights, render_state.cam_transform, render_state.shadow_atlas, using_shadows, directional_light_count, positional_light_count); + _setup_decals(*render_state.decals, render_state.cam_transform.affine_inverse()); - uint32_t gi_probe_count = 0; - _setup_giprobes(p_render_buffers, p_cam_transform, *gi_probes, gi_probe_count); + render_state.directional_light_count = directional_light_count; - if (p_render_buffers.is_valid()) { + if (current_cluster_builder) { + current_cluster_builder->bake_cluster(); + } + + if (render_state.render_buffers.is_valid()) { bool directional_shadows = false; for (uint32_t i = 0; i < directional_light_count; i++) { if (cluster.directional_lights[i].shadow_enabled) { @@ -7161,12 +7346,126 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform & break; } } - _update_volumetric_fog(p_render_buffers, p_environment, p_cam_projection, p_cam_transform, p_shadow_atlas, directional_light_count, directional_shadows, positional_light_count, gi_probe_count); + _update_volumetric_fog(render_state.render_buffers, render_state.environment, render_state.cam_projection, render_state.cam_transform, render_state.shadow_atlas, directional_light_count, directional_shadows, positional_light_count, render_state.gi_probe_count); + } +} + +void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data) { + //assign render data + { + render_state.render_buffers = p_render_buffers; + render_state.cam_transform = p_cam_transform; + render_state.cam_projection = p_cam_projection; + render_state.cam_ortogonal = p_cam_projection.is_orthogonal(); + render_state.instances = &p_instances; + render_state.lights = &p_lights; + render_state.reflection_probes = &p_reflection_probes; + render_state.gi_probes = &p_gi_probes; + render_state.decals = &p_decals; + render_state.lightmaps = &p_lightmaps; + render_state.environment = p_environment; + render_state.camera_effects = p_camera_effects; + render_state.shadow_atlas = p_shadow_atlas; + render_state.reflection_atlas = p_reflection_atlas; + render_state.reflection_probe = p_reflection_probe; + render_state.reflection_probe_pass = p_reflection_probe_pass; + render_state.screen_lod_threshold = p_screen_lod_threshold; + + render_state.render_shadows = p_render_shadows; + render_state.render_shadow_count = p_render_shadow_count; + render_state.render_sdfgi_regions = p_render_sdfgi_regions; + render_state.render_sdfgi_region_count = p_render_sdfgi_region_count; + render_state.sdfgi_update_data = p_sdfgi_update_data; + } + + PagedArray<RID> empty; + + if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_UNSHADED) { + render_state.lights = ∅ + render_state.reflection_probes = ∅ + render_state.gi_probes = ∅ + } + + //sdfgi first + if (p_render_buffers.is_valid()) { + for (int i = 0; i < render_state.render_sdfgi_region_count; i++) { + _render_sdfgi_region(p_render_buffers, render_state.render_sdfgi_regions[i].region, render_state.render_sdfgi_regions[i].instances); + } + if (render_state.sdfgi_update_data->update_static) { + _render_sdfgi_static_lights(p_render_buffers, render_state.sdfgi_update_data->static_cascade_count, p_sdfgi_update_data->static_cascade_indices, render_state.sdfgi_update_data->static_positional_lights); + } + } + + Color clear_color; + if (p_render_buffers.is_valid()) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(!rb); + clear_color = storage->render_target_get_clear_request_color(rb->render_target); + } else { + clear_color = storage->get_default_clear_color(); + } + + //assign render indices to giprobes + for (uint32_t i = 0; i < (uint32_t)p_gi_probes.size(); i++) { + GIProbeInstance *giprobe_inst = gi_probe_instance_owner.getornull(p_gi_probes[i]); + if (giprobe_inst) { + giprobe_inst->render_index = i; + } } - _render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_ortogonal, p_instances, directional_light_count, *gi_probes, p_lightmaps, p_environment, p_camera_effects, p_shadow_atlas, p_reflection_atlas, p_reflection_probe, p_reflection_probe_pass, clear_color, p_screen_lod_threshold); + if (render_buffers_owner.owns(render_state.render_buffers)) { + RenderBuffers *rb = render_buffers_owner.getornull(render_state.render_buffers); + current_cluster_builder = rb->cluster_builder; + } else if (reflection_probe_instance_owner.owns(render_state.reflection_probe)) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(render_state.reflection_probe); + ReflectionAtlas *ra = reflection_atlas_owner.getornull(rpi->atlas); + if (!ra) { + ERR_PRINT("reflection probe has no reflection atlas! Bug?"); + current_cluster_builder = nullptr; + } else { + current_cluster_builder = ra->cluster_builder; + } + } else { + ERR_PRINT("No cluster builder, bug"); //should never happen, will crash + current_cluster_builder = nullptr; + } if (p_render_buffers.is_valid()) { + _pre_process_gi(p_render_buffers, p_cam_transform); + } + + render_state.gi_probe_count = 0; + if (render_state.render_buffers.is_valid()) { + _setup_giprobes(render_state.render_buffers, render_state.cam_transform, *render_state.gi_probes, render_state.gi_probe_count); + _sdfgi_update_light(render_state.render_buffers, render_state.environment); + } + + render_state.depth_prepass_used = false; + //calls _pre_opaque_render between depth pre-pass and opaque pass + _render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_ortogonal, p_instances, *render_state.gi_probes, p_lightmaps, p_environment, current_cluster_builder->get_cluster_buffer(), current_cluster_builder->get_cluster_size(), current_cluster_builder->get_max_cluster_elements(), p_camera_effects, p_shadow_atlas, p_reflection_atlas, p_reflection_probe, p_reflection_probe_pass, clear_color, p_screen_lod_threshold); + + if (p_render_buffers.is_valid()) { + if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_OMNI_LIGHTS || debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_SPOT_LIGHTS || debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_DECALS || debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_REFLECTION_PROBES) { + ClusterBuilderRD::ElementType elem_type = ClusterBuilderRD::ELEMENT_TYPE_MAX; + switch (debug_draw) { + case RS::VIEWPORT_DEBUG_DRAW_CLUSTER_OMNI_LIGHTS: + elem_type = ClusterBuilderRD::ELEMENT_TYPE_OMNI_LIGHT; + break; + case RS::VIEWPORT_DEBUG_DRAW_CLUSTER_SPOT_LIGHTS: + elem_type = ClusterBuilderRD::ELEMENT_TYPE_SPOT_LIGHT; + break; + case RS::VIEWPORT_DEBUG_DRAW_CLUSTER_DECALS: + elem_type = ClusterBuilderRD::ELEMENT_TYPE_DECAL; + break; + case RS::VIEWPORT_DEBUG_DRAW_CLUSTER_REFLECTION_PROBES: + elem_type = ClusterBuilderRD::ELEMENT_TYPE_REFLECTION_PROBE; + break; + default: { + } + } + current_cluster_builder->debug(elem_type); + } + RENDER_TIMESTAMP("Tonemap"); _render_buffers_post_process_and_tonemap(p_render_buffers, p_environment, p_camera_effects, p_cam_projection); @@ -7177,27 +7476,26 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform & } } -void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<InstanceBase *> &p_instances, const Plane &p_camera_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold) { +void RendererSceneRenderRD::_render_shadow_pass(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<GeometryInstance *> &p_instances, const Plane &p_camera_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold, bool p_open_pass, bool p_close_pass, bool p_clear_region) { LightInstance *light_instance = light_instance_owner.getornull(p_light); ERR_FAIL_COND(!light_instance); Rect2i atlas_rect; - RID atlas_texture; + uint32_t atlas_size; + RID atlas_fb; bool using_dual_paraboloid = false; bool using_dual_paraboloid_flip = false; - float znear = 0; - float zfar = 0; RID render_fb; RID render_texture; - float bias = 0; - float normal_bias = 0; + float zfar; bool use_pancake = false; - bool use_linear_depth = false; bool render_cubemap = false; bool finalize_cubemap = false; + bool flip_y = false; + CameraMatrix light_projection; Transform light_transform; @@ -7230,7 +7528,6 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p atlas_rect.position.x += atlas_rect.size.width; atlas_rect.position.y += atlas_rect.size.height; } - } else if (storage->light_directional_get_shadow_mode(light_instance->light) == RS::LIGHT_DIRECTIONAL_SHADOW_PARALLEL_2_SPLITS) { atlas_rect.size.height /= 2; @@ -7245,15 +7542,11 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p light_instance->shadow_transform[p_pass].atlas_rect.position /= directional_shadow.size; light_instance->shadow_transform[p_pass].atlas_rect.size /= directional_shadow.size; - float bias_mult = light_instance->shadow_transform[p_pass].bias_scale; zfar = storage->light_get_param(light_instance->light, RS::LIGHT_PARAM_RANGE); - bias = storage->light_get_param(light_instance->light, RS::LIGHT_PARAM_SHADOW_BIAS) * bias_mult; - normal_bias = storage->light_get_param(light_instance->light, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * bias_mult; - ShadowMap *shadow_map = _get_shadow_map(atlas_rect.size); - render_fb = shadow_map->fb; - render_texture = shadow_map->depth; - atlas_texture = directional_shadow.depth; + render_fb = directional_shadow.fb; + render_texture = RID(); + flip_y = true; } else { //set from shadow atlas @@ -7262,6 +7555,8 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p ERR_FAIL_COND(!shadow_atlas); ERR_FAIL_COND(!shadow_atlas->shadow_owners.has(p_light)); + _update_shadow_atlas(shadow_atlas); + uint32_t key = shadow_atlas->shadow_owners[p_light]; uint32_t quadrant = (key >> ShadowAtlas::QUADRANT_SHIFT) & 0x3; @@ -7280,11 +7575,8 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p atlas_rect.size.width = shadow_size; atlas_rect.size.height = shadow_size; - atlas_texture = shadow_atlas->depth; zfar = storage->light_get_param(light_instance->light, RS::LIGHT_PARAM_RANGE); - bias = storage->light_get_param(light_instance->light, RS::LIGHT_PARAM_SHADOW_BIAS); - normal_bias = storage->light_get_param(light_instance->light, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS); if (storage->light_get_type(light_instance->light) == RS::LIGHT_OMNI) { if (storage->light_omni_get_shadow_mode(light_instance->light) == RS::LIGHT_OMNI_SHADOW_CUBE) { @@ -7293,10 +7585,17 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p render_fb = cubemap->side_fb[p_pass]; render_texture = cubemap->cubemap; - light_projection = light_instance->shadow_transform[0].camera; - light_transform = light_instance->shadow_transform[0].transform; + light_projection = light_instance->shadow_transform[p_pass].camera; + light_transform = light_instance->shadow_transform[p_pass].transform; render_cubemap = true; finalize_cubemap = p_pass == 5; + atlas_fb = shadow_atlas->fb; + + atlas_size = shadow_atlas->size; + + if (p_pass == 0) { + _render_shadow_begin(); + } } else { light_projection = light_instance->shadow_transform[0].camera; @@ -7307,57 +7606,52 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p using_dual_paraboloid = true; using_dual_paraboloid_flip = p_pass == 1; - - ShadowMap *shadow_map = _get_shadow_map(atlas_rect.size); - render_fb = shadow_map->fb; - render_texture = shadow_map->depth; + render_fb = shadow_atlas->fb; + flip_y = true; } } else if (storage->light_get_type(light_instance->light) == RS::LIGHT_SPOT) { light_projection = light_instance->shadow_transform[0].camera; light_transform = light_instance->shadow_transform[0].transform; - ShadowMap *shadow_map = _get_shadow_map(atlas_rect.size); - render_fb = shadow_map->fb; - render_texture = shadow_map->depth; + render_fb = shadow_atlas->fb; - znear = light_instance->shadow_transform[0].camera.get_z_near(); - use_linear_depth = true; + flip_y = true; } } if (render_cubemap) { //rendering to cubemap - _render_shadow(render_fb, p_instances, light_projection, light_transform, zfar, 0, 0, false, false, use_pancake, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold); + _render_shadow_append(render_fb, p_instances, light_projection, light_transform, zfar, 0, 0, false, false, use_pancake, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold, Rect2(), false, true, true, true); if (finalize_cubemap) { + _render_shadow_process(); + _render_shadow_end(); //reblit - atlas_rect.size.height /= 2; - storage->get_effects()->copy_cubemap_to_dp(render_texture, atlas_texture, atlas_rect, light_projection.get_z_near(), light_projection.get_z_far(), 0.0, false); - atlas_rect.position.y += atlas_rect.size.height; - storage->get_effects()->copy_cubemap_to_dp(render_texture, atlas_texture, atlas_rect, light_projection.get_z_near(), light_projection.get_z_far(), 0.0, true); - } - } else { - //render shadow - - _render_shadow(render_fb, p_instances, light_projection, light_transform, zfar, bias, normal_bias, using_dual_paraboloid, using_dual_paraboloid_flip, use_pancake, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold); + Rect2 atlas_rect_norm = atlas_rect; + atlas_rect_norm.position.x /= float(atlas_size); + atlas_rect_norm.position.y /= float(atlas_size); + atlas_rect_norm.size.x /= float(atlas_size); + atlas_rect_norm.size.y /= float(atlas_size); + atlas_rect_norm.size.height /= 2; + storage->get_effects()->copy_cubemap_to_dp(render_texture, atlas_fb, atlas_rect_norm, light_projection.get_z_near(), light_projection.get_z_far(), false); + atlas_rect_norm.position.y += atlas_rect_norm.size.height; + storage->get_effects()->copy_cubemap_to_dp(render_texture, atlas_fb, atlas_rect_norm, light_projection.get_z_near(), light_projection.get_z_far(), true); - //copy to atlas - if (use_linear_depth) { - storage->get_effects()->copy_depth_to_rect_and_linearize(render_texture, atlas_texture, atlas_rect, true, znear, zfar); - } else { - storage->get_effects()->copy_depth_to_rect(render_texture, atlas_texture, atlas_rect, true); + //restore transform so it can be properly used + light_instance_set_shadow_transform(p_light, CameraMatrix(), light_instance->transform, zfar, 0, 0, 0); } - //does not work from depth to color - //RD::get_singleton()->texture_copy(render_texture, atlas_texture, Vector3(0, 0, 0), Vector3(atlas_rect.position.x, atlas_rect.position.y, 0), Vector3(atlas_rect.size.x, atlas_rect.size.y, 1), 0, 0, 0, 0, true); + } else { + //render shadow + _render_shadow_append(render_fb, p_instances, light_projection, light_transform, zfar, 0, 0, using_dual_paraboloid, using_dual_paraboloid_flip, use_pancake, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold, atlas_rect, flip_y, p_clear_region, p_open_pass, p_close_pass); } } -void RendererSceneRenderRD::render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, RID p_framebuffer, const Rect2i &p_region) { +void RendererSceneRenderRD::render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) { _render_material(p_cam_transform, p_cam_projection, p_cam_ortogonal, p_instances, p_framebuffer, p_region); } -void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, const PagedArray<InstanceBase *> &p_instances) { +void RendererSceneRenderRD::_render_sdfgi_region(RID p_render_buffers, int p_region, const PagedArray<GeometryInstance *> &p_instances) { //print_line("rendering region " + itos(p_region)); RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(!rb); @@ -7373,16 +7667,18 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con if (cascade_prev != cascade) { //initialize render - RD::get_singleton()->texture_clear(rb->sdfgi->render_albedo, Color(0, 0, 0, 0), 0, 1, 0, 1, true); - RD::get_singleton()->texture_clear(rb->sdfgi->render_emission, Color(0, 0, 0, 0), 0, 1, 0, 1, true); - RD::get_singleton()->texture_clear(rb->sdfgi->render_emission_aniso, Color(0, 0, 0, 0), 0, 1, 0, 1, true); - RD::get_singleton()->texture_clear(rb->sdfgi->render_geom_facing, Color(0, 0, 0, 0), 0, 1, 0, 1, true); + RD::get_singleton()->texture_clear(rb->sdfgi->render_albedo, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(rb->sdfgi->render_emission, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(rb->sdfgi->render_emission_aniso, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(rb->sdfgi->render_geom_facing, Color(0, 0, 0, 0), 0, 1, 0, 1); } //print_line("rendering cascade " + itos(p_region) + " objects: " + itos(p_cull_count) + " bounds: " + bounds + " from: " + from + " size: " + size + " cell size: " + rtos(rb->sdfgi->cascades[cascade].cell_size)); _render_sdfgi(p_render_buffers, from, size, bounds, p_instances, rb->sdfgi->render_albedo, rb->sdfgi->render_emission, rb->sdfgi->render_emission_aniso, rb->sdfgi->render_geom_facing); if (cascade_next != cascade) { + RD::get_singleton()->draw_command_begin_label("SDFGI Pre-Process Cascade"); + RENDER_TIMESTAMP(">SDFGI Update SDF"); //done rendering! must update SDF //clear dispatch indirect data @@ -7405,6 +7701,9 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con push_constant.scroll[1] = 0; push_constant.scroll[2] = 0; } + + rb->sdfgi->cascades[cascade].all_dynamic_lights_dirty = true; + push_constant.grid_size = rb->sdfgi->cascade_size; push_constant.cascade = cascade; @@ -7429,7 +7728,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con groups.z = rb->sdfgi->cascade_size - ABS(dirty.z); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, groups.x, groups.y, groups.z, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, groups.x, groups.y, groups.z); //no barrier, continue together @@ -7471,7 +7770,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].integrate_uniform_set, 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdfgi_shader.integrate_default_sky_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -7479,7 +7778,24 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].integrate_uniform_set, 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdfgi_shader.integrate_default_sky_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1); + + RD::get_singleton()->compute_list_add_barrier(compute_list); + + if (rb->sdfgi->uses_multibounce) { + //multibounce requires this to be stored so direct light can read from it + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.integrate_pipeline[SDGIShader::INTEGRATE_MODE_STORE]); + + //convert to octahedral to store + ipush_constant.image_size[0] *= SDFGI::LIGHTPROBE_OCT_SIZE; + ipush_constant.image_size[1] *= SDFGI::LIGHTPROBE_OCT_SIZE; + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].integrate_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdfgi_shader.integrate_default_sky_uniform_set, 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDGIShader::IntegratePushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1); + } } //ok finally barrier @@ -7488,7 +7804,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con //clear dispatch indirect data uint32_t dispatch_indirct_data[4] = { 0, 0, 0, 0 }; - RD::get_singleton()->buffer_update(rb->sdfgi->cascades[cascade].solid_cell_dispatch_buffer, 0, sizeof(uint32_t) * 4, dispatch_indirct_data, true); + RD::get_singleton()->buffer_update(rb->sdfgi->cascades[cascade].solid_cell_dispatch_buffer, 0, sizeof(uint32_t) * 4, dispatch_indirct_data); RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); @@ -7502,7 +7818,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE_HALF]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->sdf_initialize_half_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size); RD::get_singleton()->compute_list_add_barrier(compute_list); //must start with regular jumpflood @@ -7522,7 +7838,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con push_constant.step_size = s; RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_half_uniform_set[jf_us], 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size); RD::get_singleton()->compute_list_add_barrier(compute_list); jf_us = jf_us == 0 ? 1 : 0; @@ -7540,7 +7856,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con push_constant.step_size = s; RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_half_uniform_set[jf_us], 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size, optimized_jf_group_size, optimized_jf_group_size, optimized_jf_group_size); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size); RD::get_singleton()->compute_list_add_barrier(compute_list); jf_us = jf_us == 0 ? 1 : 0; } @@ -7552,7 +7868,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_UPSCALE]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->sdf_upscale_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); RD::get_singleton()->compute_list_add_barrier(compute_list); //run one pass of fullsize jumpflood to fix up half size arctifacts @@ -7562,7 +7878,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_OPTIMIZED]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_uniform_set[rb->sdfgi->upscale_jfa_uniform_set_index], 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, optimized_jf_group_size, optimized_jf_group_size, optimized_jf_group_size); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); RD::get_singleton()->compute_list_add_barrier(compute_list); } else { @@ -7572,7 +7888,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->sdf_initialize_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -7589,7 +7905,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con push_constant.step_size = s; RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_uniform_set[jf_us], 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); RD::get_singleton()->compute_list_add_barrier(compute_list); jf_us = jf_us == 0 ? 1 : 0; @@ -7607,7 +7923,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con push_constant.step_size = s; RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_uniform_set[jf_us], 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, optimized_jf_group_size, optimized_jf_group_size, optimized_jf_group_size); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); RD::get_singleton()->compute_list_add_barrier(compute_list); jf_us = jf_us == 0 ? 1 : 0; } @@ -7654,14 +7970,14 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_STORE]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].sdf_store_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); RD::get_singleton()->compute_list_end(); //clear these textures, as they will have previous garbage on next draw - RD::get_singleton()->texture_clear(rb->sdfgi->cascades[cascade].light_tex, Color(0, 0, 0, 0), 0, 1, 0, 1, true); - RD::get_singleton()->texture_clear(rb->sdfgi->cascades[cascade].light_aniso_0_tex, Color(0, 0, 0, 0), 0, 1, 0, 1, true); - RD::get_singleton()->texture_clear(rb->sdfgi->cascades[cascade].light_aniso_1_tex, Color(0, 0, 0, 0), 0, 1, 0, 1, true); + RD::get_singleton()->texture_clear(rb->sdfgi->cascades[cascade].light_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(rb->sdfgi->cascades[cascade].light_aniso_0_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(rb->sdfgi->cascades[cascade].light_aniso_1_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); #if 0 Vector<uint8_t> data = RD::get_singleton()->texture_get_data(rb->sdfgi->cascades[cascade].sdf, 0); @@ -7691,10 +8007,11 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con #endif RENDER_TIMESTAMP("<SDFGI Update SDF"); + RD::get_singleton()->draw_command_end_label(); } } -void RendererSceneRenderRD::render_particle_collider_heightfield(RID p_collider, const Transform &p_transform, const PagedArray<InstanceBase *> &p_instances) { +void RendererSceneRenderRD::render_particle_collider_heightfield(RID p_collider, const Transform &p_transform, const PagedArray<GeometryInstance *> &p_instances) { ERR_FAIL_COND(!storage->particles_collision_is_heightfield(p_collider)); Vector3 extents = storage->particles_collision_get_extents(p_collider) * p_transform.basis.get_scale(); CameraMatrix cm; @@ -7711,32 +8028,17 @@ void RendererSceneRenderRD::render_particle_collider_heightfield(RID p_collider, _render_particle_collider_heightfield(fb, cam_xform, cm, p_instances); } -void RendererSceneRenderRD::render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_light_cull_result) { +void RendererSceneRenderRD::_render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_light_cull_result) { RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(!rb); ERR_FAIL_COND(!rb->sdfgi); - _sdfgi_update_cascades(p_render_buffers); //need cascades updated for this - - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.direct_light_pipeline[SDGIShader::DIRECT_LIGHT_MODE_STATIC]); - - SDGIShader::DirectLightPushConstant dl_push_constant; + RD::get_singleton()->draw_command_begin_label("SDFGI Render Static Lighs"); - dl_push_constant.grid_size[0] = rb->sdfgi->cascade_size; - dl_push_constant.grid_size[1] = rb->sdfgi->cascade_size; - dl_push_constant.grid_size[2] = rb->sdfgi->cascade_size; - dl_push_constant.max_cascades = rb->sdfgi->cascades.size(); - dl_push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; - dl_push_constant.multibounce = false; // this is static light, do not multibounce yet - dl_push_constant.y_mult = rb->sdfgi->y_mult; - - //all must be processed - dl_push_constant.process_offset = 0; - dl_push_constant.process_increment = 1; + _sdfgi_update_cascades(p_render_buffers); //need cascades updated for this SDGIShader::Light lights[SDFGI::MAX_STATIC_LIGHTS]; + uint32_t light_count[SDFGI::MAX_STATIC_LIGHTS]; for (uint32_t i = 0; i < p_cascade_count; i++) { ERR_CONTINUE(p_cascade_indices[i] >= rb->sdfgi->cascades.size()); @@ -7792,18 +8094,45 @@ void RendererSceneRenderRD::render_sdfgi_static_lights(RID p_render_buffers, uin lights[idx].has_shadow = storage->light_has_shadow(li->light); lights[idx].attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_ATTENUATION); lights[idx].radius = storage->light_get_param(li->light, RS::LIGHT_PARAM_RANGE); - lights[idx].spot_angle = Math::deg2rad(storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ANGLE)); - lights[idx].spot_attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ATTENUATION); + lights[idx].cos_spot_angle = Math::cos(Math::deg2rad(storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ANGLE))); + lights[idx].inv_spot_attenuation = 1.0f / storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ATTENUATION); idx++; } if (idx > 0) { - RD::get_singleton()->buffer_update(cc.lights_buffer, 0, idx * sizeof(SDGIShader::Light), lights, true); + RD::get_singleton()->buffer_update(cc.lights_buffer, 0, idx * sizeof(SDGIShader::Light), lights); } - dl_push_constant.light_count = idx; + + light_count[i] = idx; } + } + + /* Static Lights */ + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.direct_light_pipeline[SDGIShader::DIRECT_LIGHT_MODE_STATIC]); + + SDGIShader::DirectLightPushConstant dl_push_constant; + dl_push_constant.grid_size[0] = rb->sdfgi->cascade_size; + dl_push_constant.grid_size[1] = rb->sdfgi->cascade_size; + dl_push_constant.grid_size[2] = rb->sdfgi->cascade_size; + dl_push_constant.max_cascades = rb->sdfgi->cascades.size(); + dl_push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; + dl_push_constant.multibounce = false; // this is static light, do not multibounce yet + dl_push_constant.y_mult = rb->sdfgi->y_mult; + + //all must be processed + dl_push_constant.process_offset = 0; + dl_push_constant.process_increment = 1; + + for (uint32_t i = 0; i < p_cascade_count; i++) { + ERR_CONTINUE(p_cascade_indices[i] >= rb->sdfgi->cascades.size()); + + SDFGI::Cascade &cc = rb->sdfgi->cascades[p_cascade_indices[i]]; + + dl_push_constant.light_count = light_count[i]; dl_push_constant.cascade = p_cascade_indices[i]; if (dl_push_constant.light_count > 0) { @@ -7814,6 +8143,8 @@ void RendererSceneRenderRD::render_sdfgi_static_lights(RID p_render_buffers, uin } RD::get_singleton()->compute_list_end(); + + RD::get_singleton()->draw_command_end_label(); } bool RendererSceneRenderRD::free(RID p_rid) { @@ -7827,6 +8158,9 @@ bool RendererSceneRenderRD::free(RID p_rid) { if (rb->volumetric_fog) { _volumetric_fog_erase(rb); } + if (rb->cluster_builder) { + memdelete(rb->cluster_builder); + } render_buffers_owner.free(p_rid); } else if (environment_owner.owns(p_rid)) { //not much to delete, just free it @@ -7836,6 +8170,10 @@ bool RendererSceneRenderRD::free(RID p_rid) { camera_effects_owner.free(p_rid); } else if (reflection_atlas_owner.owns(p_rid)) { reflection_atlas_set_size(p_rid, 0, 0); + ReflectionAtlas *ra = reflection_atlas_owner.getornull(p_rid); + if (ra->cluster_builder) { + memdelete(ra->cluster_builder); + } reflection_atlas_owner.free(p_rid); } else if (reflection_probe_instance_owner.owns(p_rid)) { //not much to delete, just free it @@ -7844,6 +8182,8 @@ bool RendererSceneRenderRD::free(RID p_rid) { reflection_probe_instance_owner.free(p_rid); } else if (decal_instance_owner.owns(p_rid)) { decal_instance_owner.free(p_rid); + } else if (lightmap_instance_owner.owns(p_rid)) { + lightmap_instance_owner.free(p_rid); } else if (gi_probe_instance_owner.owns(p_rid)) { GIProbeInstance *gi_probe = gi_probe_instance_owner.getornull(p_rid); if (gi_probe->texture.is_valid()) { @@ -7979,23 +8319,28 @@ TypedArray<Image> RendererSceneRenderRD::bake_render_uv2(RID p_base, const Vecto //RID sampled_light; - InstanceBase ins; + GeometryInstance *gi = geometry_instance_create(p_base); + + uint32_t sc = RSG::storage->mesh_get_surface_count(p_base); + Vector<RID> materials; + materials.resize(sc); - ins.base_type = RSG::storage->get_base_type(p_base); - ins.base = p_base; - ins.materials.resize(RSG::storage->mesh_get_surface_count(p_base)); - for (int i = 0; i < ins.materials.size(); i++) { - if (i < p_material_overrides.size()) { - ins.materials.write[i] = p_material_overrides[i]; + for (uint32_t i = 0; i < sc; i++) { + if (i < (uint32_t)p_material_overrides.size()) { + materials.write[i] = p_material_overrides[i]; } } + geometry_instance_set_surface_materials(gi, materials); + if (cull_argument.size() == 0) { cull_argument.push_back(nullptr); } - cull_argument[0] = &ins; + cull_argument[0] = gi; _render_uv2(cull_argument, fb, Rect2i(0, 0, p_image_size.width, p_image_size.height)); + geometry_instance_free(gi); + TypedArray<Image> ret; { @@ -8047,20 +8392,17 @@ void RendererSceneRenderRD::sdfgi_set_debug_probe_select(const Vector3 &p_positi RendererSceneRenderRD *RendererSceneRenderRD::singleton = nullptr; -RID RendererSceneRenderRD::get_cluster_builder_texture() { - return cluster.builder.get_cluster_texture(); -} - -RID RendererSceneRenderRD::get_cluster_builder_indices_buffer() { - return cluster.builder.get_cluster_indices_buffer(); -} - RID RendererSceneRenderRD::get_reflection_probe_buffer() { return cluster.reflection_buffer; } -RID RendererSceneRenderRD::get_positional_light_buffer() { - return cluster.light_buffer; +RID RendererSceneRenderRD::get_omni_light_buffer() { + return cluster.omni_light_buffer; } + +RID RendererSceneRenderRD::get_spot_light_buffer() { + return cluster.spot_light_buffer; +} + RID RendererSceneRenderRD::get_directional_light_buffer() { return cluster.directional_light_buffer; } @@ -8076,13 +8418,21 @@ bool RendererSceneRenderRD::is_low_end() const { } RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { + max_cluster_elements = GLOBAL_GET("rendering/cluster_builder/max_clustered_elements"); + storage = p_storage; singleton = this; roughness_layers = GLOBAL_GET("rendering/quality/reflections/roughness_layers"); sky_ggx_samples_quality = GLOBAL_GET("rendering/quality/reflections/ggx_samples"); sky_use_cubemap_array = GLOBAL_GET("rendering/quality/reflections/texture_array_reflections"); - // sky_use_cubemap_array = false; + + sdfgi_ray_count = RS::EnvironmentSDFGIRayCount(CLAMP(int32_t(GLOBAL_GET("rendering/sdfgi/probe_ray_count")), 0, int32_t(RS::ENV_SDFGI_RAY_COUNT_MAX - 1))); + sdfgi_frames_to_converge = RS::EnvironmentSDFGIFramesToConverge(CLAMP(int32_t(GLOBAL_GET("rendering/sdfgi/frames_to_converge")), 0, int32_t(RS::ENV_SDFGI_CONVERGE_MAX - 1))); + sdfgi_frames_to_update_light = RS::EnvironmentSDFGIFramesToUpdateLight(CLAMP(int32_t(GLOBAL_GET("rendering/sdfgi/frames_to_update_lights")), 0, int32_t(RS::ENV_SDFGI_UPDATE_LIGHT_MAX - 1))); + + directional_shadow.size = GLOBAL_GET("rendering/quality/directional_shadow/size"); + directional_shadow.use_16_bits = GLOBAL_GET("rendering/quality/directional_shadow/16_bits"); uint32_t textures_per_stage = RD::get_singleton()->limit_get(RD::LIMIT_MAX_TEXTURES_PER_SHADER_STAGE); @@ -8376,6 +8726,9 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { //calculate tables String defines = "\n#define OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; defines += "\n#define SH_SIZE " + itos(SDFGI::SH_SIZE) + "\n"; + if (sky_use_cubemap_array) { + defines += "\n#define USE_CUBEMAP_ARRAY\n"; + } Vector<String> integrate_modes; integrate_modes.push_back("\n#define MODE_PROCESS\n"); @@ -8410,11 +8763,18 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { sdfgi_shader.integrate_default_sky_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.integrate.version_get_shader(sdfgi_shader.integrate_shader, 0), 1); } } + //GK { //calculate tables String defines = "\n#define SDFGI_OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; Vector<String> gi_modes; - gi_modes.push_back(""); + gi_modes.push_back("\n#define USE_GIPROBES\n"); + gi_modes.push_back("\n#define USE_SDFGI\n"); + gi_modes.push_back("\n#define USE_SDFGI\n\n#define USE_GIPROBES\n"); + gi_modes.push_back("\n#define MODE_HALF_RES\n#define USE_GIPROBES\n"); + gi_modes.push_back("\n#define MODE_HALF_RES\n#define USE_SDFGI\n"); + gi_modes.push_back("\n#define MODE_HALF_RES\n#define USE_SDFGI\n\n#define USE_GIPROBES\n"); + gi.shader.initialize(gi_modes, defines); gi.shader_version = gi.shader.version_create(); for (int i = 0; i < GI::MODE_MAX; i++) { @@ -8458,31 +8818,33 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { default_giprobe_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(GI::GIProbeData) * RenderBuffers::MAX_GIPROBES); } - //cluster setup - uint32_t uniform_max_size = RD::get_singleton()->limit_get(RD::LIMIT_MAX_UNIFORM_BUFFER_SIZE); + { //decals + cluster.max_decals = max_cluster_elements; + uint32_t decal_buffer_size = cluster.max_decals * sizeof(Cluster::DecalData); + cluster.decals = memnew_arr(Cluster::DecalData, cluster.max_decals); + cluster.decal_sort = memnew_arr(Cluster::InstanceSort<DecalInstance>, cluster.max_decals); + cluster.decal_buffer = RD::get_singleton()->storage_buffer_create(decal_buffer_size); + } { //reflections - uint32_t reflection_buffer_size; - if (uniform_max_size < 65536) { - //Yes, you guessed right, ARM again - reflection_buffer_size = uniform_max_size; - } else { - reflection_buffer_size = 65536; - } - cluster.max_reflections = reflection_buffer_size / sizeof(Cluster::ReflectionData); + cluster.max_reflections = max_cluster_elements; cluster.reflections = memnew_arr(Cluster::ReflectionData, cluster.max_reflections); - cluster.reflection_buffer = RD::get_singleton()->storage_buffer_create(reflection_buffer_size); + cluster.reflection_sort = memnew_arr(Cluster::InstanceSort<ReflectionProbeInstance>, cluster.max_reflections); + cluster.reflection_buffer = RD::get_singleton()->storage_buffer_create(sizeof(Cluster::ReflectionData) * cluster.max_reflections); } { //lights - cluster.max_lights = MIN(1024 * 1024, uniform_max_size) / sizeof(Cluster::LightData); //1mb of lights + cluster.max_lights = max_cluster_elements; + uint32_t light_buffer_size = cluster.max_lights * sizeof(Cluster::LightData); - cluster.lights = memnew_arr(Cluster::LightData, cluster.max_lights); - cluster.light_buffer = RD::get_singleton()->storage_buffer_create(light_buffer_size); + cluster.omni_lights = memnew_arr(Cluster::LightData, cluster.max_lights); + cluster.omni_light_buffer = RD::get_singleton()->storage_buffer_create(light_buffer_size); + cluster.omni_light_sort = memnew_arr(Cluster::InstanceSort<LightInstance>, cluster.max_lights); + cluster.spot_lights = memnew_arr(Cluster::LightData, cluster.max_lights); + cluster.spot_light_buffer = RD::get_singleton()->storage_buffer_create(light_buffer_size); + cluster.spot_light_sort = memnew_arr(Cluster::InstanceSort<LightInstance>, cluster.max_lights); //defines += "\n#define MAX_LIGHT_DATA_STRUCTS " + itos(cluster.max_lights) + "\n"; - cluster.lights_instances = memnew_arr(RID, cluster.max_lights); - cluster.lights_shadow_rect_cache = memnew_arr(Rect2i, cluster.max_lights); cluster.max_directional_lights = MAX_DIRECTIONAL_LIGHTS; uint32_t directional_light_buffer_size = cluster.max_directional_lights * sizeof(Cluster::DirectionalLightData); @@ -8490,15 +8852,6 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { cluster.directional_light_buffer = RD::get_singleton()->uniform_buffer_create(directional_light_buffer_size); } - { //decals - cluster.max_decals = MIN(1024 * 1024, uniform_max_size) / sizeof(Cluster::DecalData); //1mb of decals - uint32_t decal_buffer_size = cluster.max_decals * sizeof(Cluster::DecalData); - cluster.decals = memnew_arr(Cluster::DecalData, cluster.max_decals); - cluster.decal_buffer = RD::get_singleton()->storage_buffer_create(decal_buffer_size); - } - - cluster.builder.setup(16, 8, 24); - if (!low_end) { String defines = "\n#define MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS " + itos(cluster.max_directional_lights) + "\n"; Vector<String> volumetric_fog_modes; @@ -8511,6 +8864,7 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { for (int i = 0; i < VOLUMETRIC_FOG_SHADER_MAX; i++) { volumetric_fog.pipelines[i] = RD::get_singleton()->compute_pipeline_create(volumetric_fog.shader.version_get_shader(volumetric_fog.shader_version, i)); } + volumetric_fog.params_ubo = RD::get_singleton()->uniform_buffer_create(sizeof(VolumetricFogShader::ParamsUBO)); } { @@ -8543,16 +8897,13 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { environment_set_volumetric_fog_volume_size(GLOBAL_GET("rendering/volumetric_fog/volume_size"), GLOBAL_GET("rendering/volumetric_fog/volume_depth")); environment_set_volumetric_fog_filter_active(GLOBAL_GET("rendering/volumetric_fog/use_filter")); - environment_set_volumetric_fog_directional_shadow_shrink_size(GLOBAL_GET("rendering/volumetric_fog/directional_shadow_shrink")); - environment_set_volumetric_fog_positional_shadow_shrink_size(GLOBAL_GET("rendering/volumetric_fog/positional_shadow_shrink")); cull_argument.set_page_pool(&cull_argument_pool); + + gi.half_resolution = GLOBAL_GET("rendering/quality/gi/use_half_resolution"); } RendererSceneRenderRD::~RendererSceneRenderRD() { - for (Map<Vector2i, ShadowMap>::Element *E = shadow_maps.front(); E; E = E->next()) { - RD::get_singleton()->free(E->get().depth); - } for (Map<int, ShadowCubemap>::Element *E = shadow_cubemaps.front(); E; E = E->next()) { RD::get_singleton()->free(E->get().cubemap); } @@ -8576,6 +8927,7 @@ RendererSceneRenderRD::~RendererSceneRenderRD() { sdfgi_shader.preprocess.version_free(sdfgi_shader.preprocess_shader); volumetric_fog.shader.version_free(volumetric_fog.shader_version); + RD::get_singleton()->free(volumetric_fog.params_ubo); memdelete_arr(gi_probe_lights); } @@ -8597,15 +8949,19 @@ RendererSceneRenderRD::~RendererSceneRenderRD() { { RD::get_singleton()->free(cluster.directional_light_buffer); - RD::get_singleton()->free(cluster.light_buffer); + RD::get_singleton()->free(cluster.omni_light_buffer); + RD::get_singleton()->free(cluster.spot_light_buffer); RD::get_singleton()->free(cluster.reflection_buffer); RD::get_singleton()->free(cluster.decal_buffer); memdelete_arr(cluster.directional_lights); - memdelete_arr(cluster.lights); - memdelete_arr(cluster.lights_shadow_rect_cache); - memdelete_arr(cluster.lights_instances); + memdelete_arr(cluster.omni_lights); + memdelete_arr(cluster.spot_lights); + memdelete_arr(cluster.omni_light_sort); + memdelete_arr(cluster.spot_light_sort); memdelete_arr(cluster.reflections); + memdelete_arr(cluster.reflection_sort); memdelete_arr(cluster.decals); + memdelete_arr(cluster.decal_sort); } RD::get_singleton()->free(shadow_sampler); diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.h b/servers/rendering/renderer_rd/renderer_scene_render_rd.h index af35e1b3b4..cdcdb73132 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.h +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.h @@ -34,7 +34,7 @@ #include "core/templates/local_vector.h" #include "core/templates/rid_owner.h" #include "servers/rendering/renderer_compositor.h" -#include "servers/rendering/renderer_rd/light_cluster_builder.h" +#include "servers/rendering/renderer_rd/cluster_builder_rd.h" #include "servers/rendering/renderer_rd/renderer_storage_rd.h" #include "servers/rendering/renderer_rd/shaders/gi.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/giprobe.glsl.gen.h" @@ -104,17 +104,22 @@ protected: }; virtual RenderBufferData *_create_render_buffer_data() = 0; - void _setup_lights(const PagedArray<RID> &p_lights, const Transform &p_camera_inverse_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count); + void _setup_lights(const PagedArray<RID> &p_lights, const Transform &p_camera_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count); void _setup_decals(const PagedArray<RID> &p_decals, const Transform &p_camera_inverse_xform); void _setup_reflections(const PagedArray<RID> &p_reflections, const Transform &p_camera_inverse_transform, RID p_environment); void _setup_giprobes(RID p_render_buffers, const Transform &p_transform, const PagedArray<RID> &p_gi_probes, uint32_t &r_gi_probes_used); - virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<InstanceBase *> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_color, float p_screen_lod_threshold) = 0; - virtual void _render_shadow(RID p_framebuffer, const PagedArray<InstanceBase *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0) = 0; - virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; - virtual void _render_uv2(const PagedArray<InstanceBase *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; - virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<InstanceBase *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) = 0; - virtual void _render_particle_collider_heightfield(RID p_fb, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, const PagedArray<InstanceBase *> &p_instances) = 0; + virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_cluster_max_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_color, float p_screen_lod_threshold) = 0; + + virtual void _render_shadow_begin() = 0; + virtual void _render_shadow_append(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true) = 0; + virtual void _render_shadow_process() = 0; + virtual void _render_shadow_end(uint32_t p_barrier = RD::BARRIER_MASK_ALL) = 0; + + virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; + virtual void _render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; + virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<GeometryInstance *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) = 0; + virtual void _render_particle_collider_heightfield(RID p_fb, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, const PagedArray<GeometryInstance *> &p_instances) = 0; virtual void _debug_giprobe(RID p_gi_probe, RenderingDevice::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform, bool p_lighting, bool p_emission, float p_alpha); void _debug_sdfgi_probes(RID p_render_buffers, RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform); @@ -124,8 +129,6 @@ protected: virtual void _base_uniforms_changed() = 0; virtual void _render_buffers_uniform_set_changed(RID p_render_buffers) = 0; virtual RID _render_buffers_get_normal_texture(RID p_render_buffers) = 0; - virtual RID _render_buffers_get_ambient_texture(RID p_render_buffers) = 0; - virtual RID _render_buffers_get_reflection_texture(RID p_render_buffers) = 0; void _process_ssao(RID p_render_buffers, RID p_environment, RID p_normal_buffer, const CameraMatrix &p_projection); void _process_ssr(RID p_render_buffers, RID p_dest_framebuffer, RID p_normal_buffer, RID p_specular_buffer, RID p_metallic, const Color &p_metallic_mask, RID p_environment, const CameraMatrix &p_projection, bool p_use_additive); @@ -134,11 +137,19 @@ protected: void _setup_sky(RID p_environment, RID p_render_buffers, const CameraMatrix &p_projection, const Transform &p_transform, const Size2i p_screen_size); void _update_sky(RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform); void _draw_sky(bool p_can_continue_color, bool p_can_continue_depth, RID p_fb, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform); - void _process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_ambient_buffer, RID p_reflection_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, const PagedArray<RID> &p_gi_probes); + void _pre_process_gi(RID p_render_buffers, const Transform &p_transform); + void _process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, const PagedArray<RID> &p_gi_probes); + + bool _needs_post_prepass_render(bool p_use_gi); + void _post_prepass_render(bool p_use_gi); + void _pre_resolve_render(bool p_use_gi); + + void _pre_opaque_render(bool p_use_ssao, bool p_use_gi, RID p_normal_roughness_buffer, RID p_gi_probe_buffer); + uint32_t _get_render_state_directional_light_count() const; // needed for a single argument calls (material and uv2) - PagedArrayPool<InstanceBase *> cull_argument_pool; - PagedArray<InstanceBase *> cull_argument; //need this to exist + PagedArrayPool<GeometryInstance *> cull_argument_pool; + PagedArray<GeometryInstance *> cull_argument; //need this to exist private: RS::ViewportDebugDraw debug_draw = RS::VIEWPORT_DEBUG_DRAW_DISABLED; double time_step = 0; @@ -233,6 +244,7 @@ private: virtual bool is_animated() const; virtual bool casts_shadows() const; virtual Variant get_default_parameter(const StringName &p_parameter) const; + virtual RS::ShaderNativeSourceCode get_native_source_code() const; SkyShaderData(); virtual ~SkyShaderData(); }; @@ -340,6 +352,8 @@ private: }; Vector<Reflection> reflections; + + ClusterBuilderRD *cluster_builder = nullptr; }; mutable RID_Owner<ReflectionAtlas> reflection_atlas_owner; @@ -374,6 +388,15 @@ private: mutable RID_Owner<DecalInstance> decal_instance_owner; + /* LIGHTMAP INSTANCE */ + + struct LightmapInstance { + RID lightmap; + Transform transform; + }; + + mutable RID_Owner<LightmapInstance> lightmap_instance_owner; + /* GIPROBE INSTANCE */ struct GIProbeLight { @@ -383,10 +406,10 @@ private: float attenuation; float color[3]; - float spot_angle_radians; + float cos_spot_angle; float position[3]; - float spot_attenuation; + float inv_spot_attenuation; float direction[3]; uint32_t has_shadow; @@ -562,17 +585,18 @@ private: uint32_t smallest_subdiv = 0; int size = 0; + bool use_16_bits = false; RID depth; RID fb; //for copying Map<RID, uint32_t> shadow_owners; - - Vector<ShadowShrinkStage> shrink_stages; }; RID_Owner<ShadowAtlas> shadow_atlas_owner; + void _update_shadow_atlas(ShadowAtlas *shadow_atlas); + bool _shadow_atlas_find_shadow(ShadowAtlas *shadow_atlas, int *p_in_quadrants, int p_quadrant_count, int p_current_subdiv, uint64_t p_tick, int &r_quadrant, int &r_shadow); RS::ShadowQuality shadows_quality = RS::SHADOW_QUALITY_MAX; //So it always updates when first set @@ -593,17 +617,16 @@ private: struct DirectionalShadow { RID depth; + RID fb; //when renderign direct int light_count = 0; int size = 0; + bool use_16_bits = false; int current_light = 0; - Vector<ShadowShrinkStage> shrink_stages; - } directional_shadow; - void _allocate_shadow_shrink_stages(RID p_base, int p_base_size, Vector<ShadowShrinkStage> &shrink_stages, uint32_t p_target_size); - void _clear_shadow_shrink_stages(Vector<ShadowShrinkStage> &shrink_stages); + void _update_directional_shadow_atlas(); /* SHADOW CUBEMAPS */ @@ -615,14 +638,6 @@ private: Map<int, ShadowCubemap> shadow_cubemaps; ShadowCubemap *_get_shadow_cubemap(int p_size); - struct ShadowMap { - RID depth; - RID fb; - }; - - Map<Vector2i, ShadowMap> shadow_maps; - ShadowMap *_get_shadow_map(const Size2i &p_size); - void _create_shadow_cubemaps(); /* LIGHT INSTANCE */ @@ -642,7 +657,7 @@ private: RS::LightType light_type = RS::LIGHT_DIRECTIONAL; - ShadowTransform shadow_transform[4]; + ShadowTransform shadow_transform[6]; AABB aabb; RID self; @@ -721,8 +736,9 @@ private: float volumetric_fog_light_energy = 0.0; float volumetric_fog_length = 64.0; float volumetric_fog_detail_spread = 2.0; - RS::EnvVolumetricFogShadowFilter volumetric_fog_shadow_filter = RS::ENV_VOLUMETRIC_FOG_SHADOW_FILTER_LOW; float volumetric_fog_gi_inject = 0.0; + bool volumetric_fog_temporal_reprojection = true; + float volumetric_fog_temporal_reprojection_amount = 0.9; /// Glow @@ -823,6 +839,9 @@ private: /* RENDER BUFFERS */ + ClusterBuilderSharedDataRD cluster_builder_shared; + ClusterBuilderRD *current_cluster_builder = nullptr; + struct SDFGI; struct VolumetricFog; @@ -848,6 +867,8 @@ private: SDFGI *sdfgi = nullptr; VolumetricFog *volumetric_fog = nullptr; + ClusterBuilderRD *cluster_builder = nullptr; + //built-in textures used for ping pong image processing and blurring struct Blur { RID texture; @@ -887,6 +908,16 @@ private: RID giprobe_textures[MAX_GIPROBES]; RID giprobe_buffer; + + RID ambient_buffer; + RID reflection_buffer; + bool using_half_size_gi = false; + + struct GI { + RID full_buffer; + RID full_dispatch; + RID full_mask; + } gi; }; RID default_giprobe_buffer; @@ -948,6 +979,8 @@ private: RID scroll_occlusion_uniform_set; RID integrate_uniform_set; RID lights_buffer; + + bool all_dynamic_lights_dirty = true; }; //used for rendering (voxelization) @@ -1005,10 +1038,18 @@ private: float y_mult = 1.0; uint32_t render_pass = 0; + + int32_t cascade_dynamic_light_count[SDFGI::MAX_CASCADES]; //used dynamically }; + void _sdfgi_update_light(RID p_render_buffers, RID p_environment); + void _sdfgi_update_probes(RID p_render_buffers, RID p_environment); + void _sdfgi_store_probes(RID p_render_buffers); + RS::EnvironmentSDFGIRayCount sdfgi_ray_count = RS::ENV_SDFGI_RAY_COUNT_16; RS::EnvironmentSDFGIFramesToConverge sdfgi_frames_to_converge = RS::ENV_SDFGI_CONVERGE_IN_10_FRAMES; + RS::EnvironmentSDFGIFramesToUpdateLight sdfgi_frames_to_update_light = RS::ENV_SDFGI_UPDATE_LIGHT_IN_4_FRAMES; + float sdfgi_solid_cell_ratio = 0.25; Vector3 sdfgi_debug_probe_pos; Vector3 sdfgi_debug_probe_dir; @@ -1105,8 +1146,8 @@ private: float attenuation; uint32_t type; - float spot_angle; - float spot_attenuation; + float cos_spot_angle; + float inv_spot_attenuation; float radius; float shadow_color[4]; @@ -1246,23 +1287,28 @@ private: float z_far; float proj_info[4]; - + float ao_color[3]; uint32_t max_giprobes; + uint32_t high_quality_vct; - uint32_t use_sdfgi; uint32_t orthogonal; - - float ao_color[3]; - uint32_t pad; + uint32_t pad[2]; float cam_rotation[12]; }; RID sdfgi_ubo; - enum { - MODE_MAX = 1 + enum Mode { + MODE_GIPROBE, + MODE_SDFGI, + MODE_COMBINED, + MODE_HALF_RES_GIPROBE, + MODE_HALF_RES_SDFGI, + MODE_HALF_RES_COMBINED, + MODE_MAX }; + bool half_resolution = false; GiShaderRD shader; RID shader_version; RID pipelines[MODE_MAX]; @@ -1287,14 +1333,23 @@ private: struct Cluster { /* Scene State UBO */ - struct ReflectionData { //should always be 128 bytes + enum { + REFLECTION_AMBIENT_DISABLED = 0, + REFLECTION_AMBIENT_ENVIRONMENT = 1, + REFLECTION_AMBIENT_COLOR = 2, + }; + + struct ReflectionData { float box_extents[3]; float index; float box_offset[3]; uint32_t mask; - float params[4]; // intensity, 0, interior , boxproject float ambient[3]; // ambient color, + float intensity; + bool exterior; + bool box_project; uint32_t ambient_mode; + uint32_t pad; float local_matrix[16]; // up to here for spot and omni, rest is for directional }; @@ -1303,10 +1358,15 @@ private: float inv_radius; float direction[3]; float size; - uint16_t attenuation_energy[2]; //16 bits attenuation, then energy - uint8_t color_specular[4]; //rgb color, a specular (8 bit unorm) - uint16_t cone_attenuation_angle[2]; // attenuation and angle, (16bit float) - uint8_t shadow_color_enabled[4]; //shadow rgb color, a>0.5 enabled (8bit unorm) + + float color[3]; + float attenuation; + + float inv_spot_attenuation; + float cos_spot_angle; + float specular_amount; + uint32_t shadow_enabled; + float atlas_rect[4]; // in omni, used for atlas uv, in spot, used for projector uv float shadow_matrix[16]; float shadow_bias; @@ -1370,31 +1430,85 @@ private: float normal_fade; }; + template <class T> + struct InstanceSort { + float depth; + T *instance; + bool operator<(const InstanceSort &p_sort) const { + return depth < p_sort.depth; + } + }; + ReflectionData *reflections; + InstanceSort<ReflectionProbeInstance> *reflection_sort; uint32_t max_reflections; RID reflection_buffer; uint32_t max_reflection_probes_per_instance; + uint32_t reflection_count = 0; DecalData *decals; + InstanceSort<DecalInstance> *decal_sort; uint32_t max_decals; RID decal_buffer; + uint32_t decal_count; + + LightData *omni_lights; + LightData *spot_lights; - LightData *lights; + InstanceSort<LightInstance> *omni_light_sort; + InstanceSort<LightInstance> *spot_light_sort; uint32_t max_lights; - RID light_buffer; - RID *lights_instances; - Rect2i *lights_shadow_rect_cache; - uint32_t lights_shadow_rect_cache_count = 0; + RID omni_light_buffer; + RID spot_light_buffer; + uint32_t omni_light_count = 0; + uint32_t spot_light_count = 0; DirectionalLightData *directional_lights; uint32_t max_directional_lights; RID directional_light_buffer; - LightClusterBuilder builder; - } cluster; + struct RenderState { + RID render_buffers; + Transform cam_transform; + CameraMatrix cam_projection; + bool cam_ortogonal = false; + const PagedArray<GeometryInstance *> *instances = nullptr; + const PagedArray<RID> *lights = nullptr; + const PagedArray<RID> *reflection_probes = nullptr; + const PagedArray<RID> *gi_probes = nullptr; + const PagedArray<RID> *decals = nullptr; + const PagedArray<RID> *lightmaps = nullptr; + RID environment; + RID camera_effects; + RID shadow_atlas; + RID reflection_atlas; + RID reflection_probe; + int reflection_probe_pass = 0; + float screen_lod_threshold = 0.0; + + const RenderShadowData *render_shadows = nullptr; + int render_shadow_count = 0; + const RenderSDFGIData *render_sdfgi_regions = nullptr; + int render_sdfgi_region_count = 0; + const RenderSDFGIUpdateData *sdfgi_update_data = nullptr; + + uint32_t directional_light_count = 0; + uint32_t gi_probe_count = 0; + + LocalVector<int> cube_shadows; + LocalVector<int> shadows; + LocalVector<int> directional_shadows; + + bool depth_prepass_used; + } render_state; + struct VolumetricFog { + enum { + MAX_TEMPORAL_FRAMES = 16 + }; + uint32_t width = 0; uint32_t height = 0; uint32_t depth = 0; @@ -1403,6 +1517,8 @@ private: float spread; RID light_density_map; + RID prev_light_density_map; + RID fog_map; RID uniform_set; RID uniform_set2; @@ -1410,6 +1526,8 @@ private: RID sky_uniform_set; int last_shadow_filter = -1; + + Transform prev_cam_transform; }; enum { @@ -1421,7 +1539,7 @@ private: }; struct VolumetricFogShader { - struct PushConstant { + struct ParamsUBO { float fog_frustum_size_begin[2]; float fog_frustum_size_end[2]; @@ -1439,13 +1557,24 @@ private: float detail_spread; float gi_inject; uint32_t max_gi_probes; - uint32_t pad; + uint32_t cluster_type_size; + + float screen_size[2]; + uint32_t cluster_shift; + uint32_t cluster_width; + + uint32_t max_cluster_element_count_div_32; + uint32_t use_temporal_reprojection; + uint32_t temporal_frame; + float temporal_blend; float cam_rotation[12]; + float to_prev_view[16]; }; VolumetricFogShaderRD shader; + RID params_ubo; RID shader_version; RID pipelines[VOLUMETRIC_FOG_SHADER_MAX]; @@ -1453,9 +1582,7 @@ private: uint32_t volumetric_fog_depth = 128; uint32_t volumetric_fog_size = 128; - bool volumetric_fog_filter_active = false; - uint32_t volumetric_fog_directional_shadow_shrink = 512; - uint32_t volumetric_fog_positional_shadow_shrink = 512; + bool volumetric_fog_filter_active = true; void _volumetric_fog_erase(RenderBuffers *rb); void _update_volumetric_fog(RID p_render_buffers, RID p_environment, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_shadow_atlas, int p_directional_light_count, bool p_use_directional_shadows, int p_positional_light_count, int p_gi_probe_count); @@ -1470,13 +1597,21 @@ private: float weight; }; + uint32_t max_cluster_elements = 512; bool low_end = false; + void _render_shadow_pass(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<GeometryInstance *> &p_instances, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0, float p_screen_lod_threshold = 0.0, bool p_open_pass = true, bool p_close_pass = true, bool p_clear_region = true); + void _render_sdfgi_region(RID p_render_buffers, int p_region, const PagedArray<GeometryInstance *> &p_instances); + void _render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_light_cull_result); + public: + virtual Transform geometry_instance_get_transform(GeometryInstance *p_instance) = 0; + virtual AABB geometry_instance_get_aabb(GeometryInstance *p_instance) = 0; + /* SHADOW ATLAS API */ RID shadow_atlas_create(); - void shadow_atlas_set_size(RID p_atlas, int p_size); + void shadow_atlas_set_size(RID p_atlas, int p_size, bool p_16_bits = false); void shadow_atlas_set_quadrant_subdivision(RID p_atlas, int p_quadrant, int p_subdivision); bool shadow_atlas_update_light(RID p_atlas, RID p_light_intance, float p_coverage, uint64_t p_light_version); _FORCE_INLINE_ bool shadow_atlas_owns_light_instance(RID p_atlas, RID p_light_intance) { @@ -1497,7 +1632,7 @@ public: return Size2(atlas->size, atlas->size); } - void directional_shadow_atlas_set_size(int p_size); + void directional_shadow_atlas_set_size(int p_size, bool p_16_bits = false); int get_directional_light_shadow_size(RID p_light_intance); void set_directional_shadow_count(int p_count); @@ -1516,7 +1651,6 @@ public: virtual int sdfgi_get_pending_region_count(RID p_render_buffers) const; virtual AABB sdfgi_get_pending_region_bounds(RID p_render_buffers, int p_region) const; virtual uint32_t sdfgi_get_pending_region_cascade(RID p_render_buffers, int p_region) const; - virtual void sdfgi_update_probes(RID p_render_buffers, RID p_environment, const Vector<RID> &p_directional_lights, const RID *p_positional_light_instances, uint32_t p_positional_light_count); RID sdfgi_get_ubo() const { return gi.sdfgi_ubo; } /* SKY API */ @@ -1573,12 +1707,10 @@ public: float environment_get_fog_height_density(RID p_env) const; float environment_get_fog_aerial_perspective(RID p_env) const; - void environment_set_volumetric_fog(RID p_env, bool p_enable, float p_density, const Color &p_light, float p_light_energy, float p_length, float p_detail_spread, float p_gi_inject, RS::EnvVolumetricFogShadowFilter p_shadow_filter); + void environment_set_volumetric_fog(RID p_env, bool p_enable, float p_density, const Color &p_light, float p_light_energy, float p_length, float p_detail_spread, float p_gi_inject, bool p_temporal_reprojection, float p_temporal_reprojection_amount); virtual void environment_set_volumetric_fog_volume_size(int p_size, int p_depth); virtual void environment_set_volumetric_fog_filter_active(bool p_enable); - virtual void environment_set_volumetric_fog_directional_shadow_shrink_size(int p_shrink_size); - virtual void environment_set_volumetric_fog_positional_shadow_shrink_size(int p_shrink_size); void environment_set_ssr(RID p_env, bool p_enable, int p_max_steps, float p_fade_int, float p_fade_out, float p_depth_tolerance); void environment_set_ssao(RID p_env, bool p_enable, float p_radius, float p_intensity, float p_power, float p_detail, float p_horizon, float p_sharpness, float p_light_affect, float p_ao_channel_affect); @@ -1592,6 +1724,7 @@ public: virtual void environment_set_sdfgi(RID p_env, bool p_enable, RS::EnvironmentSDFGICascades p_cascades, float p_min_cell_size, RS::EnvironmentSDFGIYScale p_y_scale, bool p_use_occlusion, bool p_use_multibounce, bool p_read_sky, float p_energy, float p_normal_bias, float p_probe_bias); virtual void environment_set_sdfgi_ray_count(RS::EnvironmentSDFGIRayCount p_ray_count); virtual void environment_set_sdfgi_frames_to_converge(RS::EnvironmentSDFGIFramesToConverge p_frames); + virtual void environment_set_sdfgi_frames_to_update_light(RS::EnvironmentSDFGIFramesToUpdateLight p_update); void environment_set_ssr_roughness_quality(RS::EnvironmentSSRRoughnessQuality p_quality); RS::EnvironmentSSRRoughnessQuality environment_get_ssr_roughness_quality() const; @@ -1822,10 +1955,25 @@ public: return decal->transform; } + virtual RID lightmap_instance_create(RID p_lightmap); + virtual void lightmap_instance_set_transform(RID p_lightmap, const Transform &p_transform); + _FORCE_INLINE_ bool lightmap_instance_is_valid(RID p_lightmap_instance) { + return lightmap_instance_owner.getornull(p_lightmap_instance) != nullptr; + } + + _FORCE_INLINE_ RID lightmap_instance_get_lightmap(RID p_lightmap_instance) { + LightmapInstance *li = lightmap_instance_owner.getornull(p_lightmap_instance); + return li->lightmap; + } + _FORCE_INLINE_ Transform lightmap_instance_get_transform(RID p_lightmap_instance) { + LightmapInstance *li = lightmap_instance_owner.getornull(p_lightmap_instance); + return li->transform; + } + RID gi_probe_instance_create(RID p_base); void gi_probe_instance_set_transform_to_data(RID p_probe, const Transform &p_xform); bool gi_probe_needs_update(RID p_probe) const; - void gi_probe_update(RID p_probe, bool p_update_light_instances, const Vector<RID> &p_light_instances, const PagedArray<RendererSceneRender::InstanceBase *> &p_dynamic_objects); + void gi_probe_update(RID p_probe, bool p_update_light_instances, const Vector<RID> &p_light_instances, const PagedArray<RendererSceneRender::GeometryInstance *> &p_dynamic_objects); void gi_probe_set_quality(RS::GIProbeQuality p_quality) { gi_probe_quality = p_quality; } @@ -1875,11 +2023,14 @@ public: */ RID render_buffers_create(); void render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_width, int p_height, RS::ViewportMSAA p_msaa, RS::ViewportScreenSpaceAA p_screen_space_aa, bool p_use_debanding); + void gi_set_use_half_resolution(bool p_enable); RID render_buffers_get_ao_texture(RID p_render_buffers); RID render_buffers_get_back_buffer_texture(RID p_render_buffers); RID render_buffers_get_gi_probe_buffer(RID p_render_buffers); RID render_buffers_get_default_gi_probe_buffer(); + RID render_buffers_get_gi_ambient_texture(RID p_render_buffers); + RID render_buffers_get_gi_reflection_texture(RID p_render_buffers); uint32_t render_buffers_get_sdfgi_cascade_count(RID p_render_buffers) const; bool render_buffers_is_sdfgi_enabled(RID p_render_buffers) const; @@ -1900,16 +2051,11 @@ public: float render_buffers_get_volumetric_fog_end(RID p_render_buffers); float render_buffers_get_volumetric_fog_detail_spread(RID p_render_buffers); - void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<InstanceBase *> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold); - - void render_shadow(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<InstanceBase *> &p_instances, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0, float p_screen_lod_threshold = 0.0); - - void render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, RID p_framebuffer, const Rect2i &p_region); + void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data = nullptr); - void render_sdfgi(RID p_render_buffers, int p_region, const PagedArray<InstanceBase *> &p_instances); - void render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_light_cull_result); + void render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region); - void render_particle_collider_heightfield(RID p_collider, const Transform &p_transform, const PagedArray<InstanceBase *> &p_instances); + void render_particle_collider_heightfield(RID p_collider, const Transform &p_transform, const PagedArray<GeometryInstance *> &p_instances); virtual void set_scene_pass(uint64_t p_pass) { scene_pass = p_pass; @@ -1960,10 +2106,9 @@ public: virtual void set_time(double p_time, double p_step); - RID get_cluster_builder_texture(); - RID get_cluster_builder_indices_buffer(); RID get_reflection_probe_buffer(); - RID get_positional_light_buffer(); + RID get_omni_light_buffer(); + RID get_spot_light_buffer(); RID get_directional_light_buffer(); RID get_decal_buffer(); int get_max_directional_lights() const; diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.cpp b/servers/rendering/renderer_rd/renderer_storage_rd.cpp index 68983da408..a1358f94fa 100644 --- a/servers/rendering/renderer_rd/renderer_storage_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_storage_rd.cpp @@ -873,7 +873,7 @@ void RendererStorageRD::_texture_2d_update(RID p_texture, const Ref<Image> &p_im TextureToRDFormat f; Ref<Image> validated = _validate_texture_format(p_image, f); - RD::get_singleton()->texture_update(tex->rd_texture, p_layer, validated->get_data(), !p_immediate); + RD::get_singleton()->texture_update(tex->rd_texture, p_layer, validated->get_data()); } void RendererStorageRD::texture_2d_update_immediate(RID p_texture, const Ref<Image> &p_image, int p_layer) { @@ -918,7 +918,7 @@ void RendererStorageRD::texture_3d_update(RID p_texture, const Vector<Ref<Image> } } - RD::get_singleton()->texture_update(tex->rd_texture, 0, all_data, true); + RD::get_singleton()->texture_update(tex->rd_texture, 0, all_data); } void RendererStorageRD::texture_proxy_update(RID p_texture, RID p_proxy_to) { @@ -1438,7 +1438,7 @@ void RendererStorageRD::shader_set_code(RID p_shader, const String &p_code) { for (Set<Material *>::Element *E = shader->owners.front(); E; E = E->next()) { Material *material = E->get(); - material->instance_dependency.instance_notify_changed(false, true); + material->dependency.changed_notify(DEPENDENCY_CHANGED_MATERIAL); _material_queue_update(material, true, true); } } @@ -1499,6 +1499,15 @@ void RendererStorageRD::shader_set_data_request_function(ShaderType p_shader_typ shader_data_request_func[p_shader_type] = p_function; } +RS::ShaderNativeSourceCode RendererStorageRD::shader_get_native_source_code(RID p_shader) const { + Shader *shader = shader_owner.getornull(p_shader); + ERR_FAIL_COND_V(!shader, RS::ShaderNativeSourceCode()); + if (shader->data) { + return shader->data->get_native_source_code(); + } + return RS::ShaderNativeSourceCode(); +} + /* COMMON MATERIAL API */ RID RendererStorageRD::material_create() { @@ -1547,7 +1556,8 @@ void RendererStorageRD::material_set_shader(RID p_material, RID p_shader) { } if (p_shader.is_null()) { - material->instance_dependency.instance_notify_changed(false, true); + material->dependency.changed_notify(DEPENDENCY_CHANGED_MATERIAL); + material->shader_id = 0; return; } @@ -1555,6 +1565,7 @@ void RendererStorageRD::material_set_shader(RID p_material, RID p_shader) { ERR_FAIL_COND(!shader); material->shader = shader; material->shader_type = shader->type; + material->shader_id = p_shader.get_local_index(); shader->owners.insert(material); if (shader->type == SHADER_TYPE_MAX) { @@ -1568,7 +1579,7 @@ void RendererStorageRD::material_set_shader(RID p_material, RID p_shader) { material->data->set_next_pass(material->next_pass); material->data->set_render_priority(material->priority); //updating happens later - material->instance_dependency.instance_notify_changed(false, true); + material->dependency.changed_notify(DEPENDENCY_CHANGED_MATERIAL); _material_queue_update(material, true, true); } @@ -1613,7 +1624,7 @@ void RendererStorageRD::material_set_next_pass(RID p_material, RID p_next_materi material->data->set_next_pass(p_next_material); } - material->instance_dependency.instance_notify_changed(false, true); + material->dependency.changed_notify(DEPENDENCY_CHANGED_MATERIAL); } void RendererStorageRD::material_set_render_priority(RID p_material, int priority) { @@ -1663,10 +1674,10 @@ void RendererStorageRD::material_get_instance_shader_parameters(RID p_material, } } -void RendererStorageRD::material_update_dependency(RID p_material, InstanceBaseDependency *p_instance) { +void RendererStorageRD::material_update_dependency(RID p_material, DependencyTracker *p_instance) { Material *material = material_owner.getornull(p_material); ERR_FAIL_COND(!material); - p_instance->update_dependency(&material->instance_dependency); + p_instance->update_dependency(&material->dependency); if (material->next_pass.is_valid()) { material_update_dependency(material->next_pass, p_instance); } @@ -2596,7 +2607,13 @@ void RendererStorageRD::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_su _mesh_instance_add_surface(mi, mesh, mesh->surface_count - 1); } - mesh->instance_dependency.instance_notify_changed(true, true); + mesh->dependency.changed_notify(DEPENDENCY_CHANGED_MESH); + + for (Set<Mesh *>::Element *E = mesh->shadow_owners.front(); E; E = E->next()) { + Mesh *shadow_owner = E->get(); + shadow_owner->shadow_mesh = RID(); + shadow_owner->dependency.changed_notify(DEPENDENCY_CHANGED_MESH); + } mesh->material_cache.clear(); } @@ -2638,7 +2655,7 @@ void RendererStorageRD::mesh_surface_set_material(RID p_mesh, int p_surface, RID ERR_FAIL_UNSIGNED_INDEX((uint32_t)p_surface, mesh->surface_count); mesh->surfaces[p_surface]->material = p_material; - mesh->instance_dependency.instance_notify_changed(false, true); + mesh->dependency.changed_notify(DEPENDENCY_CHANGED_MATERIAL); mesh->material_cache.clear(); } @@ -2813,6 +2830,25 @@ AABB RendererStorageRD::mesh_get_aabb(RID p_mesh, RID p_skeleton) { return aabb; } +void RendererStorageRD::mesh_set_shadow_mesh(RID p_mesh, RID p_shadow_mesh) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND(!mesh); + + Mesh *shadow_mesh = mesh_owner.getornull(mesh->shadow_mesh); + if (shadow_mesh) { + shadow_mesh->shadow_owners.erase(mesh); + } + mesh->shadow_mesh = p_shadow_mesh; + + shadow_mesh = mesh_owner.getornull(mesh->shadow_mesh); + + if (shadow_mesh) { + shadow_mesh->shadow_owners.insert(mesh); + } + + mesh->dependency.changed_notify(DEPENDENCY_CHANGED_MESH); +} + void RendererStorageRD::mesh_clear(RID p_mesh) { Mesh *mesh = mesh_owner.getornull(p_mesh); ERR_FAIL_COND(!mesh); @@ -2858,8 +2894,14 @@ void RendererStorageRD::mesh_clear(RID p_mesh) { MeshInstance *mi = E->get(); _mesh_instance_clear(mi); } - mesh->instance_dependency.instance_notify_changed(true, true); mesh->has_bone_weights = false; + mesh->dependency.changed_notify(DEPENDENCY_CHANGED_MESH); + + for (Set<Mesh *>::Element *E = mesh->shadow_owners.front(); E; E = E->next()) { + Mesh *shadow_owner = E->get(); + shadow_owner->shadow_mesh = RID(); + shadow_owner->dependency.changed_notify(DEPENDENCY_CHANGED_MESH); + } } bool RendererStorageRD::mesh_needs_instance(RID p_mesh, bool p_has_skeleton) { @@ -3002,7 +3044,7 @@ void RendererStorageRD::update_mesh_instances() { MeshInstance *mi = dirty_mesh_instance_weights.first()->self(); if (mi->blend_weights_buffer.is_valid()) { - RD::get_singleton()->buffer_update(mi->blend_weights_buffer, 0, mi->blend_weights.size() * sizeof(float), mi->blend_weights.ptr(), true); + RD::get_singleton()->buffer_update(mi->blend_weights_buffer, 0, mi->blend_weights.size() * sizeof(float), mi->blend_weights.ptr()); } dirty_mesh_instance_weights.remove(&mi->weight_update_list); mi->weights_dirty = false; @@ -3056,7 +3098,7 @@ void RendererStorageRD::update_mesh_instances() { RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SkeletonShader::PushConstant)); //dispatch without barrier, so all is done at the same time - RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.vertex_count, 1, 1, 64, 1, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.vertex_count, 1, 1); } mi->dirty = false; @@ -3298,6 +3340,8 @@ void RendererStorageRD::multimesh_allocate(RID p_multimesh, int p_instances, RS: if (multimesh->instances) { multimesh->buffer = RD::get_singleton()->storage_buffer_create(multimesh->instances * multimesh->stride_cache * 4); } + + multimesh->dependency.changed_notify(DEPENDENCY_CHANGED_MULTIMESH); } int RendererStorageRD::multimesh_get_instance_count(RID p_multimesh) const { @@ -3331,7 +3375,7 @@ void RendererStorageRD::multimesh_set_mesh(RID p_multimesh, RID p_mesh) { } } - multimesh->instance_dependency.instance_notify_changed(true, true); + multimesh->dependency.changed_notify(DEPENDENCY_CHANGED_MESH); } #define MULTIMESH_DIRTY_REGION_SIZE 512 @@ -3668,7 +3712,7 @@ void RendererStorageRD::multimesh_set_buffer(RID p_multimesh, const Vector<float { const float *r = p_buffer.ptr(); - RD::get_singleton()->buffer_update(multimesh->buffer, 0, p_buffer.size() * sizeof(float), r, false); + RD::get_singleton()->buffer_update(multimesh->buffer, 0, p_buffer.size() * sizeof(float), r); multimesh->buffer_set = true; } @@ -3690,7 +3734,7 @@ void RendererStorageRD::multimesh_set_buffer(RID p_multimesh, const Vector<float const float *data = p_buffer.ptr(); _multimesh_re_create_aabb(multimesh, data, multimesh->instances); - multimesh->instance_dependency.instance_notify_changed(true, false); + multimesh->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); } } @@ -3731,6 +3775,8 @@ void RendererStorageRD::multimesh_set_visible_instances(RID p_multimesh, int p_v } multimesh->visible_instances = p_visible; + + multimesh->dependency.changed_notify(DEPENDENCY_CHANGED_MULTIMESH_VISIBLE_INSTANCES); } int RendererStorageRD::multimesh_get_visible_instances(RID p_multimesh) const { @@ -3765,14 +3811,14 @@ void RendererStorageRD::_update_dirty_multimeshes() { if (multimesh->data_cache_used_dirty_regions > 32 || multimesh->data_cache_used_dirty_regions > visible_region_count / 2) { //if there too many dirty regions, or represent the majority of regions, just copy all, else transfer cost piles up too much - RD::get_singleton()->buffer_update(multimesh->buffer, 0, MIN(visible_region_count * region_size, multimesh->instances * multimesh->stride_cache * sizeof(float)), data, false); + RD::get_singleton()->buffer_update(multimesh->buffer, 0, MIN(visible_region_count * region_size, multimesh->instances * multimesh->stride_cache * sizeof(float)), data); } else { //not that many regions? update them all for (uint32_t i = 0; i < visible_region_count; i++) { if (multimesh->data_cache_dirty_regions[i]) { uint64_t offset = i * region_size; uint64_t size = multimesh->stride_cache * multimesh->instances * sizeof(float); - RD::get_singleton()->buffer_update(multimesh->buffer, offset, MIN(region_size, size - offset), &data[i * region_size], false); + RD::get_singleton()->buffer_update(multimesh->buffer, offset, MIN(region_size, size - offset), &data[i * region_size]); } } } @@ -3788,7 +3834,7 @@ void RendererStorageRD::_update_dirty_multimeshes() { //aabb is dirty.. _multimesh_re_create_aabb(multimesh, data, visible_instances); multimesh->aabb_dirty = false; - multimesh->instance_dependency.instance_notify_changed(true, false); + multimesh->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); } } @@ -3926,7 +3972,7 @@ void RendererStorageRD::particles_set_custom_aabb(RID p_particles, const AABB &p Particles *particles = particles_owner.getornull(p_particles); ERR_FAIL_COND(!particles); particles->custom_aabb = p_aabb; - particles->instance_dependency.instance_notify_changed(true, false); + particles->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); } void RendererStorageRD::particles_set_speed_scale(RID p_particles, float p_scale) { @@ -4155,24 +4201,18 @@ RID RendererStorageRD::particles_get_draw_pass_mesh(RID p_particles, int p_pass) return particles->draw_passes[p_pass]; } -void RendererStorageRD::particles_add_collision(RID p_particles, InstanceBaseDependency *p_instance) { - RendererSceneRender::InstanceBase *instance = static_cast<RendererSceneRender::InstanceBase *>(p_instance); - +void RendererStorageRD::particles_add_collision(RID p_particles, RID p_particles_collision_instance) { Particles *particles = particles_owner.getornull(p_particles); ERR_FAIL_COND(!particles); - ERR_FAIL_COND(instance->base_type != RS::INSTANCE_PARTICLES_COLLISION); - - particles->collisions.insert(instance); + particles->collisions.insert(p_particles_collision_instance); } -void RendererStorageRD::particles_remove_collision(RID p_particles, InstanceBaseDependency *p_instance) { - RendererSceneRender::InstanceBase *instance = static_cast<RendererSceneRender::InstanceBase *>(p_instance); - +void RendererStorageRD::particles_remove_collision(RID p_particles, RID p_particles_collision_instance) { Particles *particles = particles_owner.getornull(p_particles); ERR_FAIL_COND(!particles); - particles->collisions.erase(instance); + particles->collisions.erase(p_particles_collision_instance); } void RendererStorageRD::_particles_process(Particles *p_particles, float p_delta) { @@ -4272,9 +4312,15 @@ void RendererStorageRD::_particles_process(Particles *p_particles, float p_delta to_particles = p_particles->emission_transform.affine_inverse(); } uint32_t collision_3d_textures_used = 0; - for (const Set<RendererSceneRender::InstanceBase *>::Element *E = p_particles->collisions.front(); E; E = E->next()) { - ParticlesCollision *pc = particles_collision_owner.getornull(E->get()->base); - Transform to_collider = E->get()->transform; + for (const Set<RID>::Element *E = p_particles->collisions.front(); E; E = E->next()) { + ParticlesCollisionInstance *pci = particles_collision_instance_owner.getornull(E->get()); + if (!pci || !pci->active) { + continue; + } + ParticlesCollision *pc = particles_collision_owner.getornull(pci->collision); + ERR_CONTINUE(!pc); + + Transform to_collider = pci->transform; if (p_particles->use_local_coords) { to_collider = to_particles * to_collider; } @@ -4463,7 +4509,7 @@ void RendererStorageRD::_particles_process(Particles *p_particles, float p_delta if (sub_emitter && sub_emitter->emission_storage_buffer.is_valid()) { // print_line("updating subemitter buffer"); int32_t zero[4] = { 0, sub_emitter->amount, 0, 0 }; - RD::get_singleton()->buffer_update(sub_emitter->emission_storage_buffer, 0, sizeof(uint32_t) * 4, zero, true); + RD::get_singleton()->buffer_update(sub_emitter->emission_storage_buffer, 0, sizeof(uint32_t) * 4, zero); push_constant.can_emit = true; if (sub_emitter->emitting) { @@ -4481,13 +4527,13 @@ void RendererStorageRD::_particles_process(Particles *p_particles, float p_delta } if (p_particles->emission_buffer && p_particles->emission_buffer->particle_count) { - RD::get_singleton()->buffer_update(p_particles->emission_storage_buffer, 0, sizeof(uint32_t) * 4 + sizeof(ParticleEmissionBuffer::Data) * p_particles->emission_buffer->particle_count, p_particles->emission_buffer, true); + RD::get_singleton()->buffer_update(p_particles->emission_storage_buffer, 0, sizeof(uint32_t) * 4 + sizeof(ParticleEmissionBuffer::Data) * p_particles->emission_buffer->particle_count, p_particles->emission_buffer); p_particles->emission_buffer->particle_count = 0; } p_particles->clear = false; - RD::get_singleton()->buffer_update(p_particles->frame_params_buffer, 0, sizeof(ParticlesFrameParams), &frame_params, true); + RD::get_singleton()->buffer_update(p_particles->frame_params_buffer, 0, sizeof(ParticlesFrameParams), &frame_params); ParticlesMaterialData *m = (ParticlesMaterialData *)material_get_data(p_particles->process_material, SHADER_TYPE_PARTICLES); if (!m) { @@ -4509,7 +4555,7 @@ void RendererStorageRD::_particles_process(Particles *p_particles, float p_delta RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ParticlesShader::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_particles->amount, 1, 1, 64, 1, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_particles->amount, 1, 1); RD::get_singleton()->compute_list_end(); } @@ -4563,7 +4609,7 @@ void RendererStorageRD::particles_set_view_axis(RID p_particles, const Vector3 & RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_sort_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©_push_constant, sizeof(ParticlesShader::CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1, 64, 1, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1); RD::get_singleton()->compute_list_end(); @@ -4575,7 +4621,7 @@ void RendererStorageRD::particles_set_view_axis(RID p_particles, const Vector3 & RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_sort_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©_push_constant, sizeof(ParticlesShader::CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1, 64, 1, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1); RD::get_singleton()->compute_list_end(); } @@ -4682,12 +4728,12 @@ void RendererStorageRD::update_particles() { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_copy_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©_push_constant, sizeof(ParticlesShader::CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1, 64, 1, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1); RD::get_singleton()->compute_list_end(); } - particles->instance_dependency.instance_notify_changed(true, false); //make sure shadows are updated + particles->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); } } @@ -4817,6 +4863,10 @@ Variant RendererStorageRD::ParticlesShaderData::get_default_parameter(const Stri return Variant(); } +RS::ShaderNativeSourceCode RendererStorageRD::ParticlesShaderData::get_native_source_code() const { + return base_singleton->particles_shader.shader.version_get_native_source_code(version); +} + RendererStorageRD::ParticlesShaderData::ParticlesShaderData() { valid = false; } @@ -4986,7 +5036,7 @@ void RendererStorageRD::particles_collision_set_collision_type(RID p_particles_c particles_collision->heightfield_texture = RID(); } particles_collision->type = p_type; - particles_collision->instance_dependency.instance_notify_changed(true, false); + particles_collision->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); } void RendererStorageRD::particles_collision_set_cull_mask(RID p_particles_collision, uint32_t p_cull_mask) { @@ -5000,7 +5050,7 @@ void RendererStorageRD::particles_collision_set_sphere_radius(RID p_particles_co ERR_FAIL_COND(!particles_collision); particles_collision->radius = p_radius; - particles_collision->instance_dependency.instance_notify_changed(true, false); + particles_collision->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); } void RendererStorageRD::particles_collision_set_box_extents(RID p_particles_collision, const Vector3 &p_extents) { @@ -5008,7 +5058,7 @@ void RendererStorageRD::particles_collision_set_box_extents(RID p_particles_coll ERR_FAIL_COND(!particles_collision); particles_collision->extents = p_extents; - particles_collision->instance_dependency.instance_notify_changed(true, false); + particles_collision->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); } void RendererStorageRD::particles_collision_set_attractor_strength(RID p_particles_collision, float p_strength) { @@ -5042,7 +5092,7 @@ void RendererStorageRD::particles_collision_set_field_texture(RID p_particles_co void RendererStorageRD::particles_collision_height_field_update(RID p_particles_collision) { ParticlesCollision *particles_collision = particles_collision_owner.getornull(p_particles_collision); ERR_FAIL_COND(!particles_collision); - particles_collision->instance_dependency.instance_notify_changed(true, false); + particles_collision->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); } void RendererStorageRD::particles_collision_set_height_field_resolution(RID p_particles_collision, RS::ParticlesCollisionHeightfieldResolution p_resolution) { @@ -5096,6 +5146,22 @@ bool RendererStorageRD::particles_collision_is_heightfield(RID p_particles_colli return particles_collision->type == RS::PARTICLES_COLLISION_TYPE_HEIGHTFIELD_COLLIDE; } +RID RendererStorageRD::particles_collision_instance_create(RID p_collision) { + ParticlesCollisionInstance pci; + pci.collision = p_collision; + return particles_collision_instance_owner.make_rid(pci); +} +void RendererStorageRD::particles_collision_instance_set_transform(RID p_collision_instance, const Transform &p_transform) { + ParticlesCollisionInstance *pci = particles_collision_instance_owner.getornull(p_collision_instance); + ERR_FAIL_COND(!pci); + pci->transform = p_transform; +} +void RendererStorageRD::particles_collision_instance_set_active(RID p_collision_instance, bool p_active) { + ParticlesCollisionInstance *pci = particles_collision_instance_owner.getornull(p_collision_instance); + ERR_FAIL_COND(!pci); + pci->active = p_active; +} + /* SKELETON API */ RID RendererStorageRD::skeleton_create() { @@ -5149,6 +5215,8 @@ void RendererStorageRD::skeleton_allocate(RID p_skeleton, int p_bones, bool p_2d skeleton->uniform_set_mi = RD::get_singleton()->uniform_set_create(uniforms, skeleton_shader.version_shader[0], SkeletonShader::UNIFORM_SET_SKELETON); } } + + skeleton->dependency.changed_notify(DEPENDENCY_CHANGED_SKELETON_DATA); } int RendererStorageRD::skeleton_get_bone_count(RID p_skeleton) const { @@ -5264,12 +5332,13 @@ void RendererStorageRD::_update_dirty_skeletons() { Skeleton *skeleton = skeleton_dirty_list; if (skeleton->size) { - RD::get_singleton()->buffer_update(skeleton->buffer, 0, skeleton->data.size() * sizeof(float), skeleton->data.ptr(), false); + RD::get_singleton()->buffer_update(skeleton->buffer, 0, skeleton->data.size() * sizeof(float), skeleton->data.ptr()); } skeleton_dirty_list = skeleton->dirty_list; - skeleton->instance_dependency.instance_notify_changed(true, false); + skeleton->dependency.changed_notify(DEPENDENCY_CHANGED_SKELETON_BONES); + skeleton->version++; skeleton->dirty = false; @@ -5290,17 +5359,20 @@ RID RendererStorageRD::light_create(RS::LightType p_type) { light.param[RS::LIGHT_PARAM_SPECULAR] = 0.5; light.param[RS::LIGHT_PARAM_RANGE] = 1.0; light.param[RS::LIGHT_PARAM_SIZE] = 0.0; + light.param[RS::LIGHT_PARAM_ATTENUATION] = 1.0; light.param[RS::LIGHT_PARAM_SPOT_ANGLE] = 45; + light.param[RS::LIGHT_PARAM_SPOT_ATTENUATION] = 1.0; light.param[RS::LIGHT_PARAM_SHADOW_MAX_DISTANCE] = 0; light.param[RS::LIGHT_PARAM_SHADOW_SPLIT_1_OFFSET] = 0.1; light.param[RS::LIGHT_PARAM_SHADOW_SPLIT_2_OFFSET] = 0.3; light.param[RS::LIGHT_PARAM_SHADOW_SPLIT_3_OFFSET] = 0.6; light.param[RS::LIGHT_PARAM_SHADOW_FADE_START] = 0.8; - light.param[RS::LIGHT_PARAM_SHADOW_BIAS] = 0.02; light.param[RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS] = 1.0; + light.param[RS::LIGHT_PARAM_SHADOW_BIAS] = 0.02; + light.param[RS::LIGHT_PARAM_SHADOW_BLUR] = 0; light.param[RS::LIGHT_PARAM_SHADOW_PANCAKE_SIZE] = 20.0; + light.param[RS::LIGHT_PARAM_SHADOW_VOLUMETRIC_FOG_FADE] = 0.1; light.param[RS::LIGHT_PARAM_TRANSMITTANCE_BIAS] = 0.05; - light.param[RS::LIGHT_PARAM_SHADOW_VOLUMETRIC_FOG_FADE] = 1.0; return light_owner.make_rid(light); } @@ -5328,7 +5400,7 @@ void RendererStorageRD::light_set_param(RID p_light, RS::LightParam p_param, flo case RS::LIGHT_PARAM_SHADOW_PANCAKE_SIZE: case RS::LIGHT_PARAM_SHADOW_BIAS: { light->version++; - light->instance_dependency.instance_notify_changed(true, false); + light->dependency.changed_notify(DEPENDENCY_CHANGED_LIGHT); } break; default: { } @@ -5343,7 +5415,7 @@ void RendererStorageRD::light_set_shadow(RID p_light, bool p_enabled) { light->shadow = p_enabled; light->version++; - light->instance_dependency.instance_notify_changed(true, false); + light->dependency.changed_notify(DEPENDENCY_CHANGED_LIGHT); } void RendererStorageRD::light_set_shadow_color(RID p_light, const Color &p_color) { @@ -5385,7 +5457,7 @@ void RendererStorageRD::light_set_cull_mask(RID p_light, uint32_t p_mask) { light->cull_mask = p_mask; light->version++; - light->instance_dependency.instance_notify_changed(true, false); + light->dependency.changed_notify(DEPENDENCY_CHANGED_LIGHT); } void RendererStorageRD::light_set_reverse_cull_face_mode(RID p_light, bool p_enabled) { @@ -5395,7 +5467,7 @@ void RendererStorageRD::light_set_reverse_cull_face_mode(RID p_light, bool p_ena light->reverse_cull = p_enabled; light->version++; - light->instance_dependency.instance_notify_changed(true, false); + light->dependency.changed_notify(DEPENDENCY_CHANGED_LIGHT); } void RendererStorageRD::light_set_bake_mode(RID p_light, RS::LightBakeMode p_bake_mode) { @@ -5405,7 +5477,7 @@ void RendererStorageRD::light_set_bake_mode(RID p_light, RS::LightBakeMode p_bak light->bake_mode = p_bake_mode; light->version++; - light->instance_dependency.instance_notify_changed(true, false); + light->dependency.changed_notify(DEPENDENCY_CHANGED_LIGHT); } void RendererStorageRD::light_set_max_sdfgi_cascade(RID p_light, uint32_t p_cascade) { @@ -5415,7 +5487,7 @@ void RendererStorageRD::light_set_max_sdfgi_cascade(RID p_light, uint32_t p_casc light->max_sdfgi_cascade = p_cascade; light->version++; - light->instance_dependency.instance_notify_changed(true, false); + light->dependency.changed_notify(DEPENDENCY_CHANGED_LIGHT); } void RendererStorageRD::light_omni_set_shadow_mode(RID p_light, RS::LightOmniShadowMode p_mode) { @@ -5425,7 +5497,7 @@ void RendererStorageRD::light_omni_set_shadow_mode(RID p_light, RS::LightOmniSha light->omni_shadow_mode = p_mode; light->version++; - light->instance_dependency.instance_notify_changed(true, false); + light->dependency.changed_notify(DEPENDENCY_CHANGED_LIGHT); } RS::LightOmniShadowMode RendererStorageRD::light_omni_get_shadow_mode(RID p_light) { @@ -5441,7 +5513,7 @@ void RendererStorageRD::light_directional_set_shadow_mode(RID p_light, RS::Light light->directional_shadow_mode = p_mode; light->version++; - light->instance_dependency.instance_notify_changed(true, false); + light->dependency.changed_notify(DEPENDENCY_CHANGED_LIGHT); } void RendererStorageRD::light_directional_set_blend_splits(RID p_light, bool p_enable) { @@ -5450,7 +5522,7 @@ void RendererStorageRD::light_directional_set_blend_splits(RID p_light, bool p_e light->directional_blend_splits = p_enable; light->version++; - light->instance_dependency.instance_notify_changed(true, false); + light->dependency.changed_notify(DEPENDENCY_CHANGED_LIGHT); } bool RendererStorageRD::light_directional_get_blend_splits(RID p_light) const { @@ -5549,7 +5621,7 @@ void RendererStorageRD::reflection_probe_set_update_mode(RID p_probe, RS::Reflec ERR_FAIL_COND(!reflection_probe); reflection_probe->update_mode = p_mode; - reflection_probe->instance_dependency.instance_notify_changed(true, false); + reflection_probe->dependency.changed_notify(DEPENDENCY_CHANGED_REFLECTION_PROBE); } void RendererStorageRD::reflection_probe_set_intensity(RID p_probe, float p_intensity) { @@ -5586,7 +5658,7 @@ void RendererStorageRD::reflection_probe_set_max_distance(RID p_probe, float p_d reflection_probe->max_distance = p_distance; - reflection_probe->instance_dependency.instance_notify_changed(true, false); + reflection_probe->dependency.changed_notify(DEPENDENCY_CHANGED_REFLECTION_PROBE); } void RendererStorageRD::reflection_probe_set_extents(RID p_probe, const Vector3 &p_extents) { @@ -5597,7 +5669,7 @@ void RendererStorageRD::reflection_probe_set_extents(RID p_probe, const Vector3 return; } reflection_probe->extents = p_extents; - reflection_probe->instance_dependency.instance_notify_changed(true, false); + reflection_probe->dependency.changed_notify(DEPENDENCY_CHANGED_REFLECTION_PROBE); } void RendererStorageRD::reflection_probe_set_origin_offset(RID p_probe, const Vector3 &p_offset) { @@ -5605,7 +5677,7 @@ void RendererStorageRD::reflection_probe_set_origin_offset(RID p_probe, const Ve ERR_FAIL_COND(!reflection_probe); reflection_probe->origin_offset = p_offset; - reflection_probe->instance_dependency.instance_notify_changed(true, false); + reflection_probe->dependency.changed_notify(DEPENDENCY_CHANGED_REFLECTION_PROBE); } void RendererStorageRD::reflection_probe_set_as_interior(RID p_probe, bool p_enable) { @@ -5613,7 +5685,7 @@ void RendererStorageRD::reflection_probe_set_as_interior(RID p_probe, bool p_ena ERR_FAIL_COND(!reflection_probe); reflection_probe->interior = p_enable; - reflection_probe->instance_dependency.instance_notify_changed(true, false); + reflection_probe->dependency.changed_notify(DEPENDENCY_CHANGED_REFLECTION_PROBE); } void RendererStorageRD::reflection_probe_set_enable_box_projection(RID p_probe, bool p_enable) { @@ -5628,7 +5700,7 @@ void RendererStorageRD::reflection_probe_set_enable_shadows(RID p_probe, bool p_ ERR_FAIL_COND(!reflection_probe); reflection_probe->enable_shadows = p_enable; - reflection_probe->instance_dependency.instance_notify_changed(true, false); + reflection_probe->dependency.changed_notify(DEPENDENCY_CHANGED_REFLECTION_PROBE); } void RendererStorageRD::reflection_probe_set_cull_mask(RID p_probe, uint32_t p_layers) { @@ -5636,7 +5708,7 @@ void RendererStorageRD::reflection_probe_set_cull_mask(RID p_probe, uint32_t p_l ERR_FAIL_COND(!reflection_probe); reflection_probe->cull_mask = p_layers; - reflection_probe->instance_dependency.instance_notify_changed(true, false); + reflection_probe->dependency.changed_notify(DEPENDENCY_CHANGED_REFLECTION_PROBE); } void RendererStorageRD::reflection_probe_set_resolution(RID p_probe, int p_resolution) { @@ -5653,7 +5725,7 @@ void RendererStorageRD::reflection_probe_set_lod_threshold(RID p_probe, float p_ reflection_probe->lod_threshold = p_ratio; - reflection_probe->instance_dependency.instance_notify_changed(true, false); + reflection_probe->dependency.changed_notify(DEPENDENCY_CHANGED_REFLECTION_PROBE); } AABB RendererStorageRD::reflection_probe_get_aabb(RID p_probe) const { @@ -5771,7 +5843,7 @@ void RendererStorageRD::decal_set_extents(RID p_decal, const Vector3 &p_extents) Decal *decal = decal_owner.getornull(p_decal); ERR_FAIL_COND(!decal); decal->extents = p_extents; - decal->instance_dependency.instance_notify_changed(true, false); + decal->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); } void RendererStorageRD::decal_set_texture(RID p_decal, RS::DecalTexture p_type, RID p_texture) { @@ -5795,7 +5867,7 @@ void RendererStorageRD::decal_set_texture(RID p_decal, RS::DecalTexture p_type, texture_add_to_decal_atlas(decal->textures[p_type]); } - decal->instance_dependency.instance_notify_changed(false, true); + decal->dependency.changed_notify(DEPENDENCY_CHANGED_DECAL); } void RendererStorageRD::decal_set_emission_energy(RID p_decal, float p_energy) { @@ -5820,7 +5892,7 @@ void RendererStorageRD::decal_set_cull_mask(RID p_decal, uint32_t p_layers) { Decal *decal = decal_owner.getornull(p_decal); ERR_FAIL_COND(!decal); decal->cull_mask = p_layers; - decal->instance_dependency.instance_notify_changed(true, false); + decal->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); } void RendererStorageRD::decal_set_distance_fade(RID p_decal, bool p_enabled, float p_begin, float p_length) { @@ -5977,7 +6049,7 @@ void RendererStorageRD::gi_probe_allocate(RID p_gi_probe, const Transform &p_to_ gi_probe->version++; gi_probe->data_version++; - gi_probe->instance_dependency.instance_notify_changed(true, false); + gi_probe->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); } AABB RendererStorageRD::gi_probe_get_bounds(RID p_gi_probe) const { @@ -6908,7 +6980,7 @@ void RendererStorageRD::render_target_sdf_process(RID p_render_target) { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rt->sdf_buffer_process_uniform_sets[1], 0); //fill [0] RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RenderTargetSDF::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1); /* Process */ @@ -6924,7 +6996,7 @@ void RendererStorageRD::render_target_sdf_process(RID p_render_target) { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rt->sdf_buffer_process_uniform_sets[swap ? 1 : 0], 0); push_constant.stride = stride; RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RenderTargetSDF::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1); stride /= 2; swap = !swap; RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -6935,7 +7007,7 @@ void RendererStorageRD::render_target_sdf_process(RID p_render_target) { RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, rt_sdf.pipelines[shrink ? RenderTargetSDF::SHADER_STORE_SHRINK : RenderTargetSDF::SHADER_STORE]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rt->sdf_buffer_process_uniform_sets[swap ? 1 : 0], 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RenderTargetSDF::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1); RD::get_singleton()->compute_list_end(); } @@ -7055,45 +7127,45 @@ void RendererStorageRD::render_target_set_backbuffer_uniform_set(RID p_render_ta rt->backbuffer_uniform_set = p_uniform_set; } -void RendererStorageRD::base_update_dependency(RID p_base, InstanceBaseDependency *p_instance) { +void RendererStorageRD::base_update_dependency(RID p_base, DependencyTracker *p_instance) { if (mesh_owner.owns(p_base)) { Mesh *mesh = mesh_owner.getornull(p_base); - p_instance->update_dependency(&mesh->instance_dependency); + p_instance->update_dependency(&mesh->dependency); } else if (multimesh_owner.owns(p_base)) { MultiMesh *multimesh = multimesh_owner.getornull(p_base); - p_instance->update_dependency(&multimesh->instance_dependency); + p_instance->update_dependency(&multimesh->dependency); if (multimesh->mesh.is_valid()) { base_update_dependency(multimesh->mesh, p_instance); } } else if (reflection_probe_owner.owns(p_base)) { ReflectionProbe *rp = reflection_probe_owner.getornull(p_base); - p_instance->update_dependency(&rp->instance_dependency); + p_instance->update_dependency(&rp->dependency); } else if (decal_owner.owns(p_base)) { Decal *decal = decal_owner.getornull(p_base); - p_instance->update_dependency(&decal->instance_dependency); + p_instance->update_dependency(&decal->dependency); } else if (gi_probe_owner.owns(p_base)) { GIProbe *gip = gi_probe_owner.getornull(p_base); - p_instance->update_dependency(&gip->instance_dependency); + p_instance->update_dependency(&gip->dependency); } else if (lightmap_owner.owns(p_base)) { Lightmap *lm = lightmap_owner.getornull(p_base); - p_instance->update_dependency(&lm->instance_dependency); + p_instance->update_dependency(&lm->dependency); } else if (light_owner.owns(p_base)) { Light *l = light_owner.getornull(p_base); - p_instance->update_dependency(&l->instance_dependency); + p_instance->update_dependency(&l->dependency); } else if (particles_owner.owns(p_base)) { Particles *p = particles_owner.getornull(p_base); - p_instance->update_dependency(&p->instance_dependency); + p_instance->update_dependency(&p->dependency); } else if (particles_collision_owner.owns(p_base)) { ParticlesCollision *pc = particles_collision_owner.getornull(p_base); - p_instance->update_dependency(&pc->instance_dependency); + p_instance->update_dependency(&pc->dependency); } } -void RendererStorageRD::skeleton_update_dependency(RID p_skeleton, InstanceBaseDependency *p_instance) { +void RendererStorageRD::skeleton_update_dependency(RID p_skeleton, DependencyTracker *p_instance) { Skeleton *skeleton = skeleton_owner.getornull(p_skeleton); ERR_FAIL_COND(!skeleton); - p_instance->update_dependency(&skeleton->instance_dependency); + p_instance->update_dependency(&skeleton->dependency); } RS::InstanceType RendererStorageRD::get_base_type(RID p_rid) const { @@ -7299,6 +7371,7 @@ void RendererStorageRD::_update_decal_atlas() { tformat.shareable_formats.push_back(RD::DATA_FORMAT_R8G8B8A8_SRGB); decal_atlas.texture = RD::get_singleton()->texture_create(tformat, RD::TextureView()); + RD::get_singleton()->texture_clear(decal_atlas.texture, Color(0, 0, 0, 0), 0, decal_atlas.mipmaps, 0, 1); { //create the framebuffer @@ -7353,7 +7426,7 @@ void RendererStorageRD::_update_decal_atlas() { prev_texture = mm.texture; } } else { - RD::get_singleton()->texture_clear(mm.texture, clear_color, 0, 1, 0, 1, false); + RD::get_singleton()->texture_clear(mm.texture, clear_color, 0, 1, 0, 1); } } } @@ -8114,21 +8187,31 @@ bool RendererStorageRD::free(RID p_rid) { _update_queued_materials(); } material_set_shader(p_rid, RID()); //clean up shader - material->instance_dependency.instance_notify_deleted(p_rid); + material->dependency.deleted_notify(p_rid); + material_owner.free(p_rid); } else if (mesh_owner.owns(p_rid)) { mesh_clear(p_rid); + mesh_set_shadow_mesh(p_rid, RID()); Mesh *mesh = mesh_owner.getornull(p_rid); - mesh->instance_dependency.instance_notify_deleted(p_rid); + mesh->dependency.deleted_notify(p_rid); if (mesh->instances.size()) { ERR_PRINT("deleting mesh with active instances"); } + if (mesh->shadow_owners.size()) { + for (Set<Mesh *>::Element *E = mesh->shadow_owners.front(); E; E = E->next()) { + Mesh *shadow_owner = E->get(); + shadow_owner->shadow_mesh = RID(); + shadow_owner->dependency.changed_notify(DEPENDENCY_CHANGED_MESH); + } + } mesh_owner.free(p_rid); } else if (mesh_instance_owner.owns(p_rid)) { MeshInstance *mi = mesh_instance_owner.getornull(p_rid); _mesh_instance_clear(mi); mi->mesh->instances.erase(mi->I); mi->I = nullptr; + mesh_instance_owner.free(p_rid); memdelete(mi); @@ -8136,17 +8219,17 @@ bool RendererStorageRD::free(RID p_rid) { _update_dirty_multimeshes(); multimesh_allocate(p_rid, 0, RS::MULTIMESH_TRANSFORM_2D); MultiMesh *multimesh = multimesh_owner.getornull(p_rid); - multimesh->instance_dependency.instance_notify_deleted(p_rid); + multimesh->dependency.deleted_notify(p_rid); multimesh_owner.free(p_rid); } else if (skeleton_owner.owns(p_rid)) { _update_dirty_skeletons(); skeleton_allocate(p_rid, 0); Skeleton *skeleton = skeleton_owner.getornull(p_rid); - skeleton->instance_dependency.instance_notify_deleted(p_rid); + skeleton->dependency.deleted_notify(p_rid); skeleton_owner.free(p_rid); } else if (reflection_probe_owner.owns(p_rid)) { ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_rid); - reflection_probe->instance_dependency.instance_notify_deleted(p_rid); + reflection_probe->dependency.deleted_notify(p_rid); reflection_probe_owner.free(p_rid); } else if (decal_owner.owns(p_rid)) { Decal *decal = decal_owner.getornull(p_rid); @@ -8155,30 +8238,30 @@ bool RendererStorageRD::free(RID p_rid) { texture_remove_from_decal_atlas(decal->textures[i]); } } - decal->instance_dependency.instance_notify_deleted(p_rid); + decal->dependency.deleted_notify(p_rid); decal_owner.free(p_rid); } else if (gi_probe_owner.owns(p_rid)) { gi_probe_allocate(p_rid, Transform(), AABB(), Vector3i(), Vector<uint8_t>(), Vector<uint8_t>(), Vector<uint8_t>(), Vector<int>()); //deallocate GIProbe *gi_probe = gi_probe_owner.getornull(p_rid); - gi_probe->instance_dependency.instance_notify_deleted(p_rid); + gi_probe->dependency.deleted_notify(p_rid); gi_probe_owner.free(p_rid); } else if (lightmap_owner.owns(p_rid)) { lightmap_set_textures(p_rid, RID(), false); Lightmap *lightmap = lightmap_owner.getornull(p_rid); - lightmap->instance_dependency.instance_notify_deleted(p_rid); + lightmap->dependency.deleted_notify(p_rid); lightmap_owner.free(p_rid); } else if (light_owner.owns(p_rid)) { light_set_projector(p_rid, RID()); //clear projector // delete the texture Light *light = light_owner.getornull(p_rid); - light->instance_dependency.instance_notify_deleted(p_rid); + light->dependency.deleted_notify(p_rid); light_owner.free(p_rid); } else if (particles_owner.owns(p_rid)) { Particles *particles = particles_owner.getornull(p_rid); _particles_free_data(particles); - particles->instance_dependency.instance_notify_deleted(p_rid); + particles->dependency.deleted_notify(p_rid); particles_owner.free(p_rid); } else if (particles_collision_owner.owns(p_rid)) { ParticlesCollision *particles_collision = particles_collision_owner.getornull(p_rid); @@ -8186,8 +8269,10 @@ bool RendererStorageRD::free(RID p_rid) { if (particles_collision->heightfield_texture.is_valid()) { RD::get_singleton()->free(particles_collision->heightfield_texture); } - particles_collision->instance_dependency.instance_notify_deleted(p_rid); + particles_collision->dependency.deleted_notify(p_rid); particles_collision_owner.free(p_rid); + } else if (particles_collision_instance_owner.owns(p_rid)) { + particles_collision_instance_owner.free(p_rid); } else if (render_target_owner.owns(p_rid)) { RenderTarget *rt = render_target_owner.getornull(p_rid); @@ -8212,11 +8297,11 @@ EffectsRD *RendererStorageRD::get_effects() { } void RendererStorageRD::capture_timestamps_begin() { - RD::get_singleton()->capture_timestamp("Frame Begin", false); + RD::get_singleton()->capture_timestamp("Frame Begin"); } void RendererStorageRD::capture_timestamp(const String &p_name) { - RD::get_singleton()->capture_timestamp(p_name, true); + RD::get_singleton()->capture_timestamp(p_name); } uint32_t RendererStorageRD::get_captured_timestamps_count() const { @@ -8736,7 +8821,7 @@ RendererStorageRD::RendererStorageRD() { actions.renames["RESTART_VELOCITY"] = "restart_velocity"; actions.renames["RESTART_COLOR"] = "restart_color"; actions.renames["RESTART_CUSTOM"] = "restart_custom"; - actions.renames["emit_particle"] = "emit_particle"; + actions.renames["emit_subparticle"] = "emit_subparticle"; actions.renames["COLLIDED"] = "collided"; actions.renames["COLLISION_NORMAL"] = "collision_normal"; actions.renames["COLLISION_DEPTH"] = "collision_depth"; diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.h b/servers/rendering/renderer_rd/renderer_storage_rd.h index 6d1587185e..48d43568c4 100644 --- a/servers/rendering/renderer_rd/renderer_storage_rd.h +++ b/servers/rendering/renderer_rd/renderer_storage_rd.h @@ -95,6 +95,21 @@ public: p_array[11] = 0; } + static _FORCE_INLINE_ void store_transform_transposed_3x4(const Transform &p_mtx, float *p_array) { + p_array[0] = p_mtx.basis.elements[0][0]; + p_array[1] = p_mtx.basis.elements[0][1]; + p_array[2] = p_mtx.basis.elements[0][2]; + p_array[3] = p_mtx.origin.x; + p_array[4] = p_mtx.basis.elements[1][0]; + p_array[5] = p_mtx.basis.elements[1][1]; + p_array[6] = p_mtx.basis.elements[1][2]; + p_array[7] = p_mtx.origin.y; + p_array[8] = p_mtx.basis.elements[2][0]; + p_array[9] = p_mtx.basis.elements[2][1]; + p_array[10] = p_mtx.basis.elements[2][2]; + p_array[11] = p_mtx.origin.z; + } + static _FORCE_INLINE_ void store_camera(const CameraMatrix &p_mtx, float *p_array) { for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j++) { @@ -127,6 +142,8 @@ public: virtual bool is_animated() const = 0; virtual bool casts_shadows() const = 0; virtual Variant get_default_parameter(const StringName &p_parameter) const = 0; + virtual RS::ShaderNativeSourceCode get_native_source_code() const { return RS::ShaderNativeSourceCode(); } + virtual ~ShaderData() {} }; @@ -360,6 +377,7 @@ private: Shader *shader; //shortcut to shader data and type ShaderType shader_type; + uint32_t shader_id = 0; bool update_requested; bool uniform_dirty; bool texture_dirty; @@ -367,7 +385,7 @@ private: Map<StringName, Variant> params; int32_t priority; RID next_pass; - RendererStorage::InstanceDependency instance_dependency; + Dependency dependency; }; MaterialDataRequestFunction material_data_request_func[SHADER_TYPE_MAX]; @@ -460,7 +478,10 @@ private: List<MeshInstance *> instances; - RendererStorage::InstanceDependency instance_dependency; + RID shadow_mesh; + Set<Mesh *> shadow_owners; + + Dependency dependency; }; mutable RID_Owner<Mesh> mesh_owner; @@ -563,7 +584,7 @@ private: bool dirty = false; MultiMesh *dirty_list = nullptr; - RendererStorage::InstanceDependency instance_dependency; + Dependency dependency; }; mutable RID_Owner<MultiMesh> multimesh_owner; @@ -734,7 +755,7 @@ private: ParticleEmissionBuffer *emission_buffer = nullptr; RID emission_storage_buffer; - Set<RendererSceneRender::InstanceBase *> collisions; + Set<RID> collisions; Particles() : inactive(true), @@ -761,7 +782,7 @@ private: clear(true) { } - RendererStorage::InstanceDependency instance_dependency; + Dependency dependency; ParticlesFrameParams frame_params; }; @@ -839,6 +860,8 @@ private: virtual bool is_animated() const; virtual bool casts_shadows() const; virtual Variant get_default_parameter(const StringName &p_parameter) const; + virtual RS::ShaderNativeSourceCode get_native_source_code() const; + ParticlesShaderData(); virtual ~ParticlesShaderData(); }; @@ -889,11 +912,19 @@ private: RS::ParticlesCollisionHeightfieldResolution heightfield_resolution = RS::PARTICLES_COLLISION_HEIGHTFIELD_RESOLUTION_1024; - RendererStorage::InstanceDependency instance_dependency; + Dependency dependency; }; mutable RID_Owner<ParticlesCollision> particles_collision_owner; + struct ParticlesCollisionInstance { + RID collision; + Transform transform; + bool active = false; + }; + + mutable RID_Owner<ParticlesCollisionInstance> particles_collision_instance_owner; + /* Skeleton */ struct Skeleton { @@ -911,7 +942,7 @@ private: uint64_t version = 1; - RendererStorage::InstanceDependency instance_dependency; + Dependency dependency; }; mutable RID_Owner<Skeleton> skeleton_owner; @@ -943,7 +974,7 @@ private: bool directional_sky_only = false; uint64_t version = 0; - RendererStorage::InstanceDependency instance_dependency; + Dependency dependency; }; mutable RID_Owner<Light> light_owner; @@ -966,7 +997,7 @@ private: uint32_t cull_mask = (1 << 20) - 1; float lod_threshold = 0.01; - RendererStorage::InstanceDependency instance_dependency; + Dependency dependency; }; mutable RID_Owner<ReflectionProbe> reflection_probe_owner; @@ -987,7 +1018,7 @@ private: float distance_fade_length = 1; float normal_fade = 0.0; - RendererStorage::InstanceDependency instance_dependency; + Dependency dependency; }; mutable RID_Owner<Decal> decal_owner; @@ -1025,7 +1056,7 @@ private: uint32_t version = 1; uint32_t data_version = 1; - RendererStorage::InstanceDependency instance_dependency; + Dependency dependency; }; GiprobeSdfShaderRD giprobe_sdf_shader; @@ -1054,7 +1085,7 @@ private: int32_t over = EMPTY_LEAF, under = EMPTY_LEAF; }; - RendererStorage::InstanceDependency instance_dependency; + Dependency dependency; }; bool using_lightmap_array; //high end uses this @@ -1330,6 +1361,8 @@ public: Variant shader_get_param_default(RID p_shader, const StringName &p_param) const; void shader_set_data_request_function(ShaderType p_shader_type, ShaderDataRequestFunction p_function); + virtual RS::ShaderNativeSourceCode shader_get_native_source_code(RID p_shader) const; + /* COMMON MATERIAL API */ RID material_create(); @@ -1347,11 +1380,16 @@ public: void material_get_instance_shader_parameters(RID p_material, List<InstanceShaderParam> *r_parameters); - void material_update_dependency(RID p_material, InstanceBaseDependency *p_instance); + void material_update_dependency(RID p_material, DependencyTracker *p_instance); void material_force_update_textures(RID p_material, ShaderType p_shader_type); void material_set_data_request_function(ShaderType p_shader_type, MaterialDataRequestFunction p_function); + _FORCE_INLINE_ uint32_t material_get_shader_id(RID p_material) { + Material *material = material_owner.getornull(p_material); + return material->shader_id; + } + _FORCE_INLINE_ MaterialData *material_get_data(RID p_material, ShaderType p_shader_type) { Material *material = material_owner.getornull(p_material); if (!material || material->shader_type != p_shader_type) { @@ -1388,6 +1426,7 @@ public: virtual AABB mesh_get_custom_aabb(RID p_mesh) const; virtual AABB mesh_get_aabb(RID p_mesh, RID p_skeleton = RID()); + virtual void mesh_set_shadow_mesh(RID p_mesh, RID p_shadow_mesh); virtual void mesh_clear(RID p_mesh); @@ -1426,6 +1465,13 @@ public: return mesh->surfaces[p_surface_index]; } + _FORCE_INLINE_ RID mesh_get_shadow_mesh(RID p_mesh) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh, RID()); + + return mesh->shadow_mesh; + } + _FORCE_INLINE_ RS::PrimitiveType mesh_surface_get_primitive(void *p_surface) { Mesh::Surface *surface = reinterpret_cast<Mesh::Surface *>(p_surface); return surface->primitive; @@ -1436,13 +1482,7 @@ public: return s->lod_count > 0; } - _FORCE_INLINE_ RID mesh_surface_get_index_array(void *p_surface) const { - Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); - - return s->index_array; - } - - _FORCE_INLINE_ RID mesh_surface_get_index_array_with_lod(void *p_surface, float p_model_scale, float p_distance_threshold, float p_lod_threshold) const { + _FORCE_INLINE_ uint32_t mesh_surface_get_lod(void *p_surface, float p_model_scale, float p_distance_threshold, float p_lod_threshold) const { Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); int32_t current_lod = -1; @@ -1454,9 +1494,19 @@ public: current_lod = i; } if (current_lod == -1) { + return 0; + } else { + return current_lod + 1; + } + } + + _FORCE_INLINE_ RID mesh_surface_get_index_array(void *p_surface, uint32_t p_lod) const { + Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); + + if (p_lod == 0) { return s->index_array; } else { - return s->lods[current_lod].index_array; + return s->lods[p_lod - 1].index_array; } } @@ -1664,6 +1714,10 @@ public: void skeleton_bone_set_transform_2d(RID p_skeleton, int p_bone, const Transform2D &p_transform); Transform2D skeleton_bone_get_transform_2d(RID p_skeleton, int p_bone) const; + _FORCE_INLINE_ bool skeleton_is_valid(RID p_skeleton) { + return skeleton_owner.getornull(p_skeleton) != nullptr; + } + _FORCE_INLINE_ RID skeleton_get_3d_uniform_set(RID p_skeleton, RID p_shader, uint32_t p_set) const { Skeleton *skeleton = skeleton_owner.getornull(p_skeleton); ERR_FAIL_COND_V(!skeleton, RID()); @@ -1827,8 +1881,8 @@ public: Color reflection_probe_get_ambient_color(RID p_probe) const; float reflection_probe_get_ambient_color_energy(RID p_probe) const; - void base_update_dependency(RID p_base, InstanceBaseDependency *p_instance); - void skeleton_update_dependency(RID p_skeleton, InstanceBaseDependency *p_instance); + void base_update_dependency(RID p_base, DependencyTracker *p_instance); + void skeleton_update_dependency(RID p_skeleton, DependencyTracker *p_instance); /* DECAL API */ @@ -1977,7 +2031,11 @@ public: _FORCE_INLINE_ float lightmap_get_probe_capture_update_speed() const { return lightmap_probe_capture_update_speed; } - + _FORCE_INLINE_ RID lightmap_get_texture(RID p_lightmap) const { + const Lightmap *lm = lightmap_owner.getornull(p_lightmap); + ERR_FAIL_COND_V(!lm, RID()); + return lm->light_texture; + } _FORCE_INLINE_ int32_t lightmap_get_array_index(RID p_lightmap) const { ERR_FAIL_COND_V(!using_lightmap_array, -1); //only for arrays const Lightmap *lm = lightmap_owner.getornull(p_lightmap); @@ -2078,8 +2136,8 @@ public: return particles->particles_transforms_buffer_uniform_set; } - virtual void particles_add_collision(RID p_particles, InstanceBaseDependency *p_instance); - virtual void particles_remove_collision(RID p_particles, InstanceBaseDependency *p_instance); + virtual void particles_add_collision(RID p_particles, RID p_particles_collision_instance); + virtual void particles_remove_collision(RID p_particles, RID p_particles_collision_instance); /* PARTICLES COLLISION */ @@ -2099,6 +2157,11 @@ public: virtual bool particles_collision_is_heightfield(RID p_particles_collision) const; RID particles_collision_get_heightfield_framebuffer(RID p_particles_collision) const; + //used from 2D and 3D + virtual RID particles_collision_instance_create(RID p_collision); + virtual void particles_collision_instance_set_transform(RID p_collision_instance, const Transform &p_transform); + virtual void particles_collision_instance_set_active(RID p_collision_instance, bool p_active); + /* GLOBAL VARIABLES API */ virtual void global_variable_add(const StringName &p_name, RS::GlobalVariableType p_type, const Variant &p_value); diff --git a/servers/rendering/renderer_rd/shader_rd.cpp b/servers/rendering/renderer_rd/shader_rd.cpp index d1f07a354f..e4a39ff813 100644 --- a/servers/rendering/renderer_rd/shader_rd.cpp +++ b/servers/rendering/renderer_rd/shader_rd.cpp @@ -301,6 +301,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) { builder.append(compute_codev.get_data()); // version info (if exists) builder.append("\n"); //make sure defines begin at newline + builder.append(base_compute_defines.get_data()); builder.append(general_defines.get_data()); builder.append(variant_defines[p_variant].get_data()); @@ -351,6 +352,127 @@ void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) { } } +RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_version) { + Version *version = version_owner.getornull(p_version); + RS::ShaderNativeSourceCode source_code; + ERR_FAIL_COND_V(!version, source_code); + + source_code.versions.resize(variant_defines.size()); + + for (int i = 0; i < source_code.versions.size(); i++) { + if (!is_compute) { + //vertex stage + + StringBuilder builder; + + builder.append(vertex_codev.get_data()); // version info (if exists) + builder.append("\n"); //make sure defines begin at newline + builder.append(general_defines.get_data()); + builder.append(variant_defines[i].get_data()); + + for (int j = 0; j < version->custom_defines.size(); j++) { + builder.append(version->custom_defines[j].get_data()); + } + + builder.append(vertex_code0.get_data()); //first part of vertex + + builder.append(version->uniforms.get_data()); //uniforms (same for vertex and fragment) + + builder.append(vertex_code1.get_data()); //second part of vertex + + builder.append(version->vertex_globals.get_data()); // vertex globals + + builder.append(vertex_code2.get_data()); //third part of vertex + + builder.append(version->vertex_code.get_data()); // code + + builder.append(vertex_code3.get_data()); //fourth of vertex + + RS::ShaderNativeSourceCode::Version::Stage stage; + stage.name = "vertex"; + stage.code = builder.as_string(); + + source_code.versions.write[i].stages.push_back(stage); + } + + if (!is_compute) { + //fragment stage + + StringBuilder builder; + + builder.append(fragment_codev.get_data()); // version info (if exists) + builder.append("\n"); //make sure defines begin at newline + builder.append(general_defines.get_data()); + builder.append(variant_defines[i].get_data()); + for (int j = 0; j < version->custom_defines.size(); j++) { + builder.append(version->custom_defines[j].get_data()); + } + + builder.append(fragment_code0.get_data()); //first part of fragment + + builder.append(version->uniforms.get_data()); //uniforms (same for fragment and fragment) + + builder.append(fragment_code1.get_data()); //first part of fragment + + builder.append(version->fragment_globals.get_data()); // fragment globals + + builder.append(fragment_code2.get_data()); //third part of fragment + + builder.append(version->fragment_light.get_data()); // fragment light + + builder.append(fragment_code3.get_data()); //fourth part of fragment + + builder.append(version->fragment_code.get_data()); // fragment code + + builder.append(fragment_code4.get_data()); //fourth part of fragment + + RS::ShaderNativeSourceCode::Version::Stage stage; + stage.name = "fragment"; + stage.code = builder.as_string(); + + source_code.versions.write[i].stages.push_back(stage); + } + + if (is_compute) { + //compute stage + + StringBuilder builder; + + builder.append(compute_codev.get_data()); // version info (if exists) + builder.append("\n"); //make sure defines begin at newline + builder.append(base_compute_defines.get_data()); + builder.append(general_defines.get_data()); + builder.append(variant_defines[i].get_data()); + + for (int j = 0; j < version->custom_defines.size(); j++) { + builder.append(version->custom_defines[j].get_data()); + } + + builder.append(compute_code0.get_data()); //first part of compute + + builder.append(version->uniforms.get_data()); //uniforms (same for compute and fragment) + + builder.append(compute_code1.get_data()); //second part of compute + + builder.append(version->compute_globals.get_data()); // compute globals + + builder.append(compute_code2.get_data()); //third part of compute + + builder.append(version->compute_code.get_data()); // code + + builder.append(compute_code3.get_data()); //fourth of compute + + RS::ShaderNativeSourceCode::Version::Stage stage; + stage.name = "compute"; + stage.code = builder.as_string(); + + source_code.versions.write[i].stages.push_back(stage); + } + } + + return source_code; +} + void ShaderRD::_compile_version(Version *p_version) { _clear_version(p_version); @@ -360,7 +482,7 @@ void ShaderRD::_compile_version(Version *p_version) { p_version->variants = memnew_arr(RID, variant_defines.size()); #if 1 - RendererCompositorRD::thread_work_pool.do_work(variant_defines.size(), this, &ShaderRD::_compile_variant, p_version); + RendererThreadPool::singleton->thread_work_pool.do_work(variant_defines.size(), this, &ShaderRD::_compile_variant, p_version); #else for (int i = 0; i < variant_defines.size(); i++) { _compile_variant(i, p_version); @@ -475,6 +597,22 @@ bool ShaderRD::is_variant_enabled(int p_variant) const { return variants_enabled[p_variant]; } +ShaderRD::ShaderRD() { + // Do not feel forced to use this, in most cases it makes little to no difference. + bool use_32_threads = false; + if (RD::get_singleton()->get_device_vendor_name() == "NVIDIA") { + use_32_threads = true; + } + String base_compute_define_text; + if (use_32_threads) { + base_compute_define_text = "\n#define NATIVE_LOCAL_GROUP_SIZE 32\n#define NATIVE_LOCAL_SIZE_2D_X 8\n#define NATIVE_LOCAL_SIZE_2D_Y 4\n"; + } else { + base_compute_define_text = "\n#define NATIVE_LOCAL_GROUP_SIZE 64\n#define NATIVE_LOCAL_SIZE_2D_X 8\n#define NATIVE_LOCAL_SIZE_2D_Y 8\n"; + } + + base_compute_defines = base_compute_define_text.ascii(); +} + void ShaderRD::initialize(const Vector<String> &p_variant_defines, const String &p_general_defines) { ERR_FAIL_COND(variant_defines.size()); ERR_FAIL_COND(p_variant_defines.size() == 0); diff --git a/servers/rendering/renderer_rd/shader_rd.h b/servers/rendering/renderer_rd/shader_rd.h index a80d08050a..e0f4dcf2d0 100644 --- a/servers/rendering/renderer_rd/shader_rd.h +++ b/servers/rendering/renderer_rd/shader_rd.h @@ -36,6 +36,7 @@ #include "core/templates/map.h" #include "core/templates/rid_owner.h" #include "core/variant/variant.h" +#include "servers/rendering_server.h" #include <stdio.h> /** @@ -98,8 +99,10 @@ class ShaderRD { const char *name; + CharString base_compute_defines; + protected: - ShaderRD() {} + ShaderRD(); void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name); public: @@ -133,6 +136,8 @@ public: void set_variant_enabled(int p_variant, bool p_enabled); bool is_variant_enabled(int p_variant) const; + RS::ShaderNativeSourceCode version_get_native_source_code(RID p_version); + void initialize(const Vector<String> &p_variant_defines, const String &p_general_defines = ""); virtual ~ShaderRD(); }; diff --git a/servers/rendering/renderer_rd/shaders/SCsub b/servers/rendering/renderer_rd/shaders/SCsub index deaa9668df..c192574ff2 100644 --- a/servers/rendering/renderer_rd/shaders/SCsub +++ b/servers/rendering/renderer_rd/shaders/SCsub @@ -39,8 +39,10 @@ if "RD_GLSL" in env["BUILDERS"]: env.RD_GLSL("sdfgi_debug.glsl") env.RD_GLSL("sdfgi_debug_probes.glsl") env.RD_GLSL("volumetric_fog.glsl") - env.RD_GLSL("shadow_reduce.glsl") env.RD_GLSL("particles.glsl") env.RD_GLSL("particles_copy.glsl") env.RD_GLSL("sort.glsl") env.RD_GLSL("skeleton.glsl") + env.RD_GLSL("cluster_render.glsl") + env.RD_GLSL("cluster_store.glsl") + env.RD_GLSL("cluster_debug.glsl") diff --git a/servers/rendering/renderer_rd/shaders/canvas.glsl b/servers/rendering/renderer_rd/shaders/canvas.glsl index 2a7cae3b4c..3b39edc70e 100644 --- a/servers/rendering/renderer_rd/shaders/canvas.glsl +++ b/servers/rendering/renderer_rd/shaders/canvas.glsl @@ -396,7 +396,7 @@ vec4 light_shadow_compute(uint light_base, vec4 light_color, vec4 shadow_uv vec4 shadow_color = unpackUnorm4x8(light_array.data[light_base].shadow_color); #ifdef LIGHT_SHADER_CODE_USED - shadow_color *= shadow_modulate; + shadow_color.rgb *= shadow_modulate; #endif shadow_color.a *= light_color.a; //respect light alpha @@ -497,7 +497,7 @@ void main() { vec2 shadow_vertex = vertex; { - float normal_depth = 1.0; + float normal_map_depth = 1.0; #if defined(NORMAL_MAP_USED) vec3 normal_map = vec3(0.0, 0.0, 1.0); @@ -511,7 +511,7 @@ FRAGMENT_SHADER_CODE /* clang-format on */ #if defined(NORMAL_MAP_USED) - normal = mix(vec3(0.0, 0.0, 1.0), normal_map * vec3(2.0, -2.0, 1.0) - vec3(1.0, -1.0, 0.0), normal_depth); + normal = mix(vec3(0.0, 0.0, 1.0), normal_map * vec3(2.0, -2.0, 1.0) - vec3(1.0, -1.0, 0.0), normal_map_depth); #endif } @@ -546,7 +546,7 @@ FRAGMENT_SHADER_CODE #ifdef LIGHT_SHADER_CODE_USED vec4 shadow_modulate = vec4(1.0); - light_color = light_compute(light_vertex, direction, normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, color, uv, true); + light_color = light_compute(light_vertex, vec3(direction, light_array.data[light_base].height), normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, uv, color, true); #else if (normal_used) { @@ -563,7 +563,7 @@ FRAGMENT_SHADER_CODE light_color = light_shadow_compute(light_base, light_color, shadow_uv #ifdef LIGHT_SHADER_CODE_USED , - shadow_modulate + shadow_modulate.rgb #endif ); } @@ -605,7 +605,7 @@ FRAGMENT_SHADER_CODE vec3 light_position = vec3(light_array.data[light_base].position, light_array.data[light_base].height); light_color.rgb *= light_base_color.rgb; - light_color = light_compute(light_vertex, light_position, normal, light_color, light_base_color.a, specular_shininess, shadow_modulate, screen_uv, color, uv, false); + light_color = light_compute(light_vertex, light_position, normal, light_color, light_base_color.a, specular_shininess, shadow_modulate, screen_uv, uv, color, false); #else light_color.rgb *= light_base_color.rgb * light_base_color.a; @@ -659,7 +659,7 @@ FRAGMENT_SHADER_CODE light_color = light_shadow_compute(light_base, light_color, shadow_uv #ifdef LIGHT_SHADER_CODE_USED , - shadow_modulate + shadow_modulate.rgb #endif ); } diff --git a/servers/rendering/renderer_rd/shaders/cluster_data_inc.glsl b/servers/rendering/renderer_rd/shaders/cluster_data_inc.glsl index e723468dd8..3a4bf4da07 100644 --- a/servers/rendering/renderer_rd/shaders/cluster_data_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/cluster_data_inc.glsl @@ -6,12 +6,18 @@ struct LightData { //this structure needs to be as packed as possible vec3 position; float inv_radius; + vec3 direction; float size; - uint attenuation_energy; //attenuation - uint color_specular; //rgb color, a specular (8 bit unorm) - uint cone_attenuation_angle; // attenuation and angle, (16bit float) - uint shadow_color_enabled; //shadow rgb color, a>0.5 enabled (8bit unorm) + + vec3 color; + float attenuation; + + float cone_attenuation; + float cone_angle; + float specular_amount; + bool shadow_enabled; + vec4 atlas_rect; // rect in the shadow atlas mat4 shadow_matrix; float shadow_bias; @@ -34,9 +40,13 @@ struct ReflectionData { float index; vec3 box_offset; uint mask; - vec4 params; // intensity, 0, interior , boxproject vec3 ambient; // ambient color + float intensity; + bool exterior; + bool box_project; uint ambient_mode; + uint pad; + //0-8 is intensity,8-9 is ambient, mode mat4 local_matrix; // up to here for spot and omni, rest is for directional // notes: for ambientblend, use distance to edge to blend between already existing global environment }; diff --git a/servers/rendering/renderer_rd/shaders/cluster_debug.glsl b/servers/rendering/renderer_rd/shaders/cluster_debug.glsl new file mode 100644 index 0000000000..70a875192c --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/cluster_debug.glsl @@ -0,0 +1,115 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +const vec3 usage_gradient[33] = vec3[]( // 1 (none) + 32 + vec3(0.14, 0.17, 0.23), + vec3(0.24, 0.44, 0.83), + vec3(0.23, 0.57, 0.84), + vec3(0.22, 0.71, 0.84), + vec3(0.22, 0.85, 0.83), + vec3(0.21, 0.85, 0.72), + vec3(0.21, 0.85, 0.57), + vec3(0.20, 0.85, 0.42), + vec3(0.20, 0.85, 0.27), + vec3(0.27, 0.86, 0.19), + vec3(0.51, 0.85, 0.19), + vec3(0.57, 0.86, 0.19), + vec3(0.62, 0.85, 0.19), + vec3(0.67, 0.86, 0.20), + vec3(0.73, 0.85, 0.20), + vec3(0.78, 0.85, 0.20), + vec3(0.83, 0.85, 0.20), + vec3(0.85, 0.82, 0.20), + vec3(0.85, 0.76, 0.20), + vec3(0.85, 0.81, 0.20), + vec3(0.85, 0.65, 0.20), + vec3(0.84, 0.60, 0.21), + vec3(0.84, 0.56, 0.21), + vec3(0.84, 0.51, 0.21), + vec3(0.84, 0.46, 0.21), + vec3(0.84, 0.41, 0.21), + vec3(0.84, 0.36, 0.21), + vec3(0.84, 0.31, 0.21), + vec3(0.84, 0.27, 0.21), + vec3(0.83, 0.22, 0.22), + vec3(0.83, 0.22, 0.27), + vec3(0.83, 0.22, 0.32), + vec3(1.00, 0.63, 0.70)); +layout(push_constant, binding = 0, std430) uniform Params { + uvec2 screen_size; + uvec2 cluster_screen_size; + + uint cluster_shift; + uint cluster_type; + float z_near; + float z_far; + + bool orthogonal; + uint max_cluster_element_count_div_32; + uint pad1; + uint pad2; +} +params; + +layout(set = 0, binding = 1, std430) buffer restrict readonly ClusterData { + uint data[]; +} +cluster_data; + +layout(rgba16f, set = 0, binding = 2) uniform restrict writeonly image2D screen_buffer; +layout(set = 0, binding = 3) uniform texture2D depth_buffer; +layout(set = 0, binding = 4) uniform sampler depth_buffer_sampler; + +void main() { + uvec2 screen_pos = gl_GlobalInvocationID.xy; + if (any(greaterThanEqual(screen_pos, params.screen_size))) { + return; + } + + uvec2 cluster_pos = screen_pos >> params.cluster_shift; + + uint offset = cluster_pos.y * params.cluster_screen_size.x + cluster_pos.x; + offset += params.cluster_screen_size.x * params.cluster_screen_size.y * params.cluster_type; + offset *= (params.max_cluster_element_count_div_32 + 32); + + //depth buffers generally can't be accessed via image API + float depth = texelFetch(sampler2D(depth_buffer, depth_buffer_sampler), ivec2(screen_pos), 0).r * 2.0 - 1.0; + + if (params.orthogonal) { + depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; + } else { + depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near)); + } + depth /= params.z_far; + + uint slice = uint(clamp(floor(depth * 32.0), 0.0, 31.0)); + uint slice_minmax = cluster_data.data[offset + params.max_cluster_element_count_div_32 + slice]; + uint item_min = slice_minmax & 0xFFFF; + uint item_max = slice_minmax >> 16; + + uint item_count = 0; + for (uint i = 0; i < params.max_cluster_element_count_div_32; i++) { + uint slice_bits = cluster_data.data[offset + i]; + while (slice_bits != 0) { + uint bit = findLSB(slice_bits); + uint item = i * 32 + bit; + if ((item >= item_min && item < item_max)) { + item_count++; + } + slice_bits &= ~(1 << bit); + } + } + + item_count = min(item_count, 32); + + vec3 color = usage_gradient[item_count]; + + color = mix(color * 1.2, color * 0.3, float(slice) / 31.0); + + imageStore(screen_buffer, ivec2(screen_pos), vec4(color, 1.0)); +} diff --git a/servers/rendering/renderer_rd/shaders/cluster_render.glsl b/servers/rendering/renderer_rd/shaders/cluster_render.glsl new file mode 100644 index 0000000000..8723ea78e4 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/cluster_render.glsl @@ -0,0 +1,168 @@ +#[vertex] + +#version 450 + +VERSION_DEFINES + +layout(location = 0) in vec3 vertex_attrib; + +layout(location = 0) out float depth_interp; +layout(location = 1) out flat uint element_index; + +layout(push_constant, binding = 0, std430) uniform Params { + uint base_index; + uint pad0; + uint pad1; + uint pad2; +} +params; + +layout(set = 0, binding = 1, std140) uniform State { + mat4 projection; + + float inv_z_far; + uint screen_to_clusters_shift; // shift to obtain coordinates in block indices + uint cluster_screen_width; // + uint cluster_data_size; // how much data for a single cluster takes + + uint cluster_depth_offset; + uint pad0; + uint pad1; + uint pad2; +} +state; + +struct RenderElement { + uint type; //0-4 + bool touches_near; + bool touches_far; + uint original_index; + mat3x4 transform_inv; + vec3 scale; + uint pad; +}; + +layout(set = 0, binding = 2, std430) buffer restrict readonly RenderElements { + RenderElement data[]; +} +render_elements; + +void main() { + element_index = params.base_index + gl_InstanceIndex; + + vec3 vertex = vertex_attrib; + vertex *= render_elements.data[element_index].scale; + + vertex = vec4(vertex, 1.0) * render_elements.data[element_index].transform_inv; + depth_interp = -vertex.z; + + gl_Position = state.projection * vec4(vertex, 1.0); +} + +#[fragment] + +#version 450 + +VERSION_DEFINES + +#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic) && defined(GL_KHR_shader_subgroup_vote) + +#extension GL_KHR_shader_subgroup_ballot : enable +#extension GL_KHR_shader_subgroup_arithmetic : enable +#extension GL_KHR_shader_subgroup_vote : enable + +#define USE_SUBGROUPS +#endif + +layout(location = 0) in float depth_interp; +layout(location = 1) in flat uint element_index; + +layout(set = 0, binding = 1, std140) uniform State { + mat4 projection; + float inv_z_far; + uint screen_to_clusters_shift; // shift to obtain coordinates in block indices + uint cluster_screen_width; // + uint cluster_data_size; // how much data for a single cluster takes + uint cluster_depth_offset; + uint pad0; + uint pad1; + uint pad2; +} +state; + +//cluster data is layout linearly, each cell contains the follow information: +// - list of bits for every element to mark as used, so (max_elem_count/32)*4 uints +// - a uint for each element to mark the depth bits used when rendering (0-31) + +layout(set = 0, binding = 3, std430) buffer restrict ClusterRender { + uint data[]; +} +cluster_render; + +void main() { + //convert from screen to cluster + uvec2 cluster = uvec2(gl_FragCoord.xy) >> state.screen_to_clusters_shift; + + //get linear cluster offset from screen poss + uint cluster_offset = cluster.x + state.cluster_screen_width * cluster.y; + //multiply by data size to position at the beginning of the element list for this cluster + cluster_offset *= state.cluster_data_size; + + //find the current element in the list and plot the bit to mark it as used + uint usage_write_offset = cluster_offset + (element_index >> 5); + uint usage_write_bit = 1 << (element_index & 0x1F); + +#ifdef USE_SUBGROUPS + + uint cluster_thread_group_index; + + if (!gl_HelperInvocation) { + //http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf + + uvec4 mask; + + while (true) { + // find the cluster offset of the first active thread + // threads that did break; go inactive and no longer count + uint first = subgroupBroadcastFirst(cluster_offset); + // update the mask for thread that match this cluster + mask = subgroupBallot(first == cluster_offset); + if (first == cluster_offset) { + // This thread belongs to the group of threads that match this offset, + // so exit the loop. + break; + } + } + + cluster_thread_group_index = subgroupBallotExclusiveBitCount(mask); + + if (cluster_thread_group_index == 0) { + atomicOr(cluster_render.data[usage_write_offset], usage_write_bit); + } + } +#else + if (!gl_HelperInvocation) { + atomicOr(cluster_render.data[usage_write_offset], usage_write_bit); + } +#endif + //find the current element in the depth usage list and mark the current depth as used + float unit_depth = depth_interp * state.inv_z_far; + + uint z_bit = clamp(uint(floor(unit_depth * 32.0)), 0, 31); + + uint z_write_offset = cluster_offset + state.cluster_depth_offset + element_index; + uint z_write_bit = 1 << z_bit; + +#ifdef USE_SUBGROUPS + if (!gl_HelperInvocation) { + z_write_bit = subgroupOr(z_write_bit); //merge all Zs + if (cluster_thread_group_index == 0) { + atomicOr(cluster_render.data[z_write_offset], z_write_bit); + } + } +#else + if (!gl_HelperInvocation) { + atomicOr(cluster_render.data[z_write_offset], z_write_bit); + } +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/cluster_store.glsl b/servers/rendering/renderer_rd/shaders/cluster_store.glsl new file mode 100644 index 0000000000..5be0893c4f --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/cluster_store.glsl @@ -0,0 +1,119 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(push_constant, binding = 0, std430) uniform Params { + uint cluster_render_data_size; // how much data for a single cluster takes + uint max_render_element_count_div_32; //divided by 32 + uvec2 cluster_screen_size; + uint render_element_count_div_32; //divided by 32 + + uint max_cluster_element_count_div_32; //divided by 32 + uint pad1; + uint pad2; +} +params; + +layout(set = 0, binding = 1, std430) buffer restrict readonly ClusterRender { + uint data[]; +} +cluster_render; + +layout(set = 0, binding = 2, std430) buffer restrict ClusterStore { + uint data[]; +} +cluster_store; + +struct RenderElement { + uint type; //0-4 + bool touches_near; + bool touches_far; + uint original_index; + mat3x4 transform_inv; + vec3 scale; + uint pad; +}; + +layout(set = 0, binding = 3, std430) buffer restrict readonly RenderElements { + RenderElement data[]; +} +render_elements; + +void main() { + uvec2 pos = gl_GlobalInvocationID.xy; + if (any(greaterThanEqual(pos, params.cluster_screen_size))) { + return; + } + + //counter for each type of render_element + + //base offset for this cluster + uint base_offset = (pos.x + params.cluster_screen_size.x * pos.y); + uint src_offset = base_offset * params.cluster_render_data_size; + + uint render_element_offset = 0; + + //check all render_elements and see which one was written to + while (render_element_offset < params.render_element_count_div_32) { + uint bits = cluster_render.data[src_offset + render_element_offset]; + while (bits != 0) { + //if bits exist, check the render_element + uint index_bit = findLSB(bits); + uint index = render_element_offset * 32 + index_bit; + uint type = render_elements.data[index].type; + + uint z_range_offset = src_offset + params.max_render_element_count_div_32 + index; + uint z_range = cluster_render.data[z_range_offset]; + + //if object was written, z was written, but check just in case + if (z_range != 0) { //should always be > 0 + + uint from_z = findLSB(z_range); + uint to_z = findMSB(z_range) + 1; + + if (render_elements.data[index].touches_near) { + from_z = 0; + } + + if (render_elements.data[index].touches_far) { + to_z = 32; + } + + // find cluster offset in the buffer used for indexing in the renderer + uint dst_offset = (base_offset + type * (params.cluster_screen_size.x * params.cluster_screen_size.y)) * (params.max_cluster_element_count_div_32 + 32); + + uint orig_index = render_elements.data[index].original_index; + //store this index in the Z slices by setting the relevant bit + for (uint i = from_z; i < to_z; i++) { + uint slice_ofs = dst_offset + params.max_cluster_element_count_div_32 + i; + + uint minmax = cluster_store.data[slice_ofs]; + + if (minmax == 0) { + minmax = 0xFFFF; //min 0, max 0xFFFF + } + + uint elem_min = min(orig_index, minmax & 0xFFFF); + uint elem_max = max(orig_index + 1, minmax >> 16); //always store plus one, so zero means range is empty when not written to + + minmax = elem_min | (elem_max << 16); + cluster_store.data[slice_ofs] = minmax; + } + + uint store_word = orig_index >> 5; + uint store_bit = orig_index & 0x1F; + + //store the actual render_element index at the end, so the rendering code can reference it + cluster_store.data[dst_offset + store_word] |= 1 << store_bit; + } + + bits &= ~(1 << index_bit); //clear the bit to continue iterating + } + + render_element_offset++; + } +} diff --git a/servers/rendering/renderer_rd/shaders/cube_to_dp.glsl b/servers/rendering/renderer_rd/shaders/cube_to_dp.glsl index 54d67db6c6..c3ac0bee57 100644 --- a/servers/rendering/renderer_rd/shaders/cube_to_dp.glsl +++ b/servers/rendering/renderer_rd/shaders/cube_to_dp.glsl @@ -1,33 +1,48 @@ -#[compute] +#[vertex] #version 450 VERSION_DEFINES -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; +layout(push_constant, binding = 1, std430) uniform Params { + float z_far; + float z_near; + bool z_flip; + uint pad; + vec4 screen_rect; +} +params; + +layout(location = 0) out vec2 uv_interp; + +void main() { + vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0)); + uv_interp = base_arr[gl_VertexIndex]; + vec2 screen_pos = uv_interp * params.screen_rect.zw + params.screen_rect.xy; + gl_Position = vec4(screen_pos * 2.0 - 1.0, 0.0, 1.0); +} + +#[fragment] + +#version 450 + +VERSION_DEFINES + +layout(location = 0) in vec2 uv_interp; layout(set = 0, binding = 0) uniform samplerCube source_cube; layout(push_constant, binding = 1, std430) uniform Params { - ivec2 screen_size; - ivec2 offset; - float bias; float z_far; float z_near; bool z_flip; + uint pad; + vec4 screen_rect; } params; -layout(r32f, set = 1, binding = 0) uniform restrict writeonly image2D depth_buffer; - void main() { - ivec2 pos = ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThan(pos, params.screen_size))) { //too large, do nothing - return; - } - - vec2 pixel_size = 1.0 / vec2(params.screen_size); - vec2 uv = (vec2(pos) + 0.5) * pixel_size; + vec2 uv = uv_interp; vec3 normal = vec3(uv * 2.0 - 1.0, 0.0); @@ -65,5 +80,5 @@ void main() { float linear_depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near)); depth = (linear_depth * depth_fix) / params.z_far; - imageStore(depth_buffer, pos + params.offset, vec4(depth)); + gl_FragDepth = depth; } diff --git a/servers/rendering/renderer_rd/shaders/gi.glsl b/servers/rendering/renderer_rd/shaders/gi.glsl index 8011dadc72..92a5682572 100644 --- a/servers/rendering/renderer_rd/shaders/gi.glsl +++ b/servers/rendering/renderer_rd/shaders/gi.glsl @@ -97,13 +97,12 @@ layout(push_constant, binding = 0, std430) uniform Params { vec4 proj_info; + vec3 ao_color; uint max_giprobes; + bool high_quality_vct; - bool use_sdfgi; bool orthogonal; - - vec3 ao_color; - uint pad; + uint pad[2]; mat3x4 cam_rotation; } @@ -331,7 +330,7 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o } ambient_light.rgb = diffuse; -#if 1 + if (roughness < 0.2) { vec3 pos_to_uvw = 1.0 / sdfgi.grid_size; vec4 light_accum = vec4(0.0); @@ -363,59 +362,63 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o //ray_pos += ray_dir * (bias / sdfgi.cascades[cascade].to_cell); //bias to avoid self occlusion ray_pos += (ray_dir * 1.0 / max(abs_ray_dir.x, max(abs_ray_dir.y, abs_ray_dir.z)) + cam_normal * 1.4) * bias / sdfgi.cascades[cascade].to_cell; } - float softness = 0.2 + min(1.0, roughness * 5.0) * 4.0; //approximation to roughness so it does not seem like a hard fade - while (length(ray_pos) < max_distance) { - for (uint i = 0; i < sdfgi.max_cascades; i++) { - if (i >= cascade && length(ray_pos) < radius_sizes[i]) { - cascade = max(i, cascade); //never go down - - vec3 pos = ray_pos - sdfgi.cascades[i].position; - pos *= sdfgi.cascades[i].to_cell * pos_to_uvw; - - float distance = texture(sampler3D(sdf_cascades[i], linear_sampler), pos).r * 255.0 - 1.1; - - vec4 hit_light = vec4(0.0); - if (distance < softness) { - hit_light.rgb = texture(sampler3D(light_cascades[i], linear_sampler), pos).rgb; - hit_light.rgb *= 0.5; //approximation given value read is actually meant for anisotropy - hit_light.a = clamp(1.0 - (distance / softness), 0.0, 1.0); - hit_light.rgb *= hit_light.a; - } + uint i = 0; + bool found = false; + while (true) { + if (length(ray_pos) >= max_distance || light_accum.a > 0.99) { + break; + } + if (!found && i >= cascade && length(ray_pos) < radius_sizes[i]) { + uint next_i = min(i + 1, sdfgi.max_cascades - 1); + cascade = max(i, cascade); //never go down - distance /= sdfgi.cascades[i].to_cell; + vec3 pos = ray_pos - sdfgi.cascades[i].position; + pos *= sdfgi.cascades[i].to_cell * pos_to_uvw; - if (i < (sdfgi.max_cascades - 1)) { - pos = ray_pos - sdfgi.cascades[i + 1].position; - pos *= sdfgi.cascades[i + 1].to_cell * pos_to_uvw; + float fdistance = textureLod(sampler3D(sdf_cascades[i], linear_sampler), pos, 0.0).r * 255.0 - 1.1; - float distance2 = texture(sampler3D(sdf_cascades[i + 1], linear_sampler), pos).r * 255.0 - 1.1; + vec4 hit_light = vec4(0.0); + if (fdistance < softness) { + hit_light.rgb = textureLod(sampler3D(light_cascades[i], linear_sampler), pos, 0.0).rgb; + hit_light.rgb *= 0.5; //approximation given value read is actually meant for anisotropy + hit_light.a = clamp(1.0 - (fdistance / softness), 0.0, 1.0); + hit_light.rgb *= hit_light.a; + } - vec4 hit_light2 = vec4(0.0); - if (distance2 < softness) { - hit_light2.rgb = texture(sampler3D(light_cascades[i + 1], linear_sampler), pos).rgb; - hit_light2.rgb *= 0.5; //approximation given value read is actually meant for anisotropy - hit_light2.a = clamp(1.0 - (distance2 / softness), 0.0, 1.0); - hit_light2.rgb *= hit_light2.a; - } + fdistance /= sdfgi.cascades[i].to_cell; - float prev_radius = i == 0 ? 0.0 : radius_sizes[i - 1]; - float blend = clamp((length(ray_pos) - prev_radius) / (radius_sizes[i] - prev_radius), 0.0, 1.0); + if (i < (sdfgi.max_cascades - 1)) { + pos = ray_pos - sdfgi.cascades[next_i].position; + pos *= sdfgi.cascades[next_i].to_cell * pos_to_uvw; - distance2 /= sdfgi.cascades[i + 1].to_cell; + float fdistance2 = textureLod(sampler3D(sdf_cascades[next_i], linear_sampler), pos, 0.0).r * 255.0 - 1.1; - hit_light = mix(hit_light, hit_light2, blend); - distance = mix(distance, distance2, blend); + vec4 hit_light2 = vec4(0.0); + if (fdistance2 < softness) { + hit_light2.rgb = textureLod(sampler3D(light_cascades[next_i], linear_sampler), pos, 0.0).rgb; + hit_light2.rgb *= 0.5; //approximation given value read is actually meant for anisotropy + hit_light2.a = clamp(1.0 - (fdistance2 / softness), 0.0, 1.0); + hit_light2.rgb *= hit_light2.a; } - light_accum += hit_light; - ray_pos += ray_dir * distance; - break; + float prev_radius = i == 0 ? 0.0 : radius_sizes[max(0, i - 1)]; + float blend = clamp((length(ray_pos) - prev_radius) / (radius_sizes[i] - prev_radius), 0.0, 1.0); + + fdistance2 /= sdfgi.cascades[next_i].to_cell; + + hit_light = mix(hit_light, hit_light2, blend); + fdistance = mix(fdistance, fdistance2, blend); } - } - if (light_accum.a > 0.99) { - break; + light_accum += hit_light; + ray_pos += ray_dir * fdistance; + found = true; + } + i++; + if (i == sdfgi.max_cascades) { + i = 0; + found = false; } } @@ -434,8 +437,6 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o } } -#endif - reflection_light.rgb = specular; ambient_light.rgb *= sdfgi.energy; @@ -597,35 +598,24 @@ vec4 fetch_normal_and_roughness(ivec2 pos) { return normal_roughness; } -void main() { - // Pixel being shaded - ivec2 pos = ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(pos, params.screen_size))) { //too large, do nothing - return; - } - - vec3 vertex = reconstruct_position(pos); - vertex.y = -vertex.y; - +void process_gi(ivec2 pos, vec3 vertex, inout vec4 ambient_light, inout vec4 reflection_light) { vec4 normal_roughness = fetch_normal_and_roughness(pos); - vec3 normal = normal_roughness.xyz; - vec4 ambient_light = vec4(0.0), reflection_light = vec4(0.0); + vec3 normal = normal_roughness.xyz; if (normal.length() > 0.5) { //valid normal, can do GI float roughness = normal_roughness.w; - vertex = mat3(params.cam_rotation) * vertex; normal = normalize(mat3(params.cam_rotation) * normal); - vec3 reflection = normalize(reflect(normalize(vertex), normal)); - if (params.use_sdfgi) { - sdfgi_process(vertex, normal, reflection, roughness, ambient_light, reflection_light); - } +#ifdef USE_SDFGI + sdfgi_process(vertex, normal, reflection, roughness, ambient_light, reflection_light); +#endif - if (params.max_giprobes > 0) { +#ifdef USE_GIPROBES + { uvec2 giprobe_tex = texelFetch(usampler2D(giprobe_buffer, linear_sampler), pos, 0).rg; roughness *= roughness; //find arbitrary tangent and bitangent, then build a matrix @@ -648,16 +638,40 @@ void main() { spec_accum /= blend_accum; } - if (params.use_sdfgi) { - reflection_light = blend_color(spec_accum, reflection_light); - ambient_light = blend_color(amb_accum, ambient_light); - } else { - reflection_light = spec_accum; - ambient_light = amb_accum; - } +#ifdef USE_SDFGI + reflection_light = blend_color(spec_accum, reflection_light); + ambient_light = blend_color(amb_accum, ambient_light); +#else + reflection_light = spec_accum; + ambient_light = amb_accum; +#endif } +#endif + } +} + +void main() { + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + +#ifdef MODE_HALF_RES + pos <<= 1; +#endif + if (any(greaterThanEqual(pos, params.screen_size))) { //too large, do nothing + return; } + vec4 ambient_light = vec4(0.0); + vec4 reflection_light = vec4(0.0); + + vec3 vertex = reconstruct_position(pos); + vertex.y = -vertex.y; + + process_gi(pos, vertex, ambient_light, reflection_light); + +#ifdef MODE_HALF_RES + pos >>= 1; +#endif + imageStore(ambient_buffer, pos, ambient_light); imageStore(reflection_buffer, pos, reflection_light); } diff --git a/servers/rendering/renderer_rd/shaders/giprobe.glsl b/servers/rendering/renderer_rd/shaders/giprobe.glsl index ea4237a45e..b931461b31 100644 --- a/servers/rendering/renderer_rd/shaders/giprobe.glsl +++ b/servers/rendering/renderer_rd/shaders/giprobe.glsl @@ -51,10 +51,10 @@ struct Light { float attenuation; vec3 color; - float spot_angle_radians; + float cos_spot_angle; vec3 position; - float spot_attenuation; + float inv_spot_attenuation; vec3 direction; bool has_shadow; @@ -208,6 +208,15 @@ float raymarch(float distance, float distance_adv, vec3 from, vec3 direction) { return occlusion; //max(0.0,distance); } +float get_omni_attenuation(float distance, float inv_range, float decay) { + float nd = distance * inv_range; + nd *= nd; + nd *= nd; // nd^4 + nd = max(1.0 - nd, 0.0); + nd *= nd; // nd^2 + return nd * pow(max(distance, 0.0001), -decay); +} + bool compute_light_vector(uint light, vec3 pos, out float attenuation, out vec3 light_pos) { if (lights.data[light].type == LIGHT_TYPE_DIRECTIONAL) { light_pos = pos - lights.data[light].direction * length(vec3(params.limits)); @@ -220,17 +229,19 @@ bool compute_light_vector(uint light, vec3 pos, out float attenuation, out vec3 return false; } - attenuation = pow(clamp(1.0 - distance / lights.data[light].radius, 0.0001, 1.0), lights.data[light].attenuation); + attenuation = get_omni_attenuation(distance, 1.0 / lights.data[light].radius, lights.data[light].attenuation); if (lights.data[light].type == LIGHT_TYPE_SPOT) { vec3 rel = normalize(pos - light_pos); - float angle = acos(dot(rel, lights.data[light].direction)); - if (angle > lights.data[light].spot_angle_radians) { + float cos_spot_angle = lights.data[light].cos_spot_angle; + float cos_angle = dot(rel, lights.data[light].direction); + if (cos_angle < cos_spot_angle) { return false; } - float d = clamp(angle / lights.data[light].spot_angle_radians, 0, 1); - attenuation *= pow(1.0 - d, lights.data[light].spot_attenuation); + float scos = max(cos_angle, cos_spot_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cos_spot_angle)); + attenuation *= 1.0 - pow(spot_rim, lights.data[light].inv_spot_attenuation); } } diff --git a/servers/rendering/renderer_rd/shaders/giprobe_write.glsl b/servers/rendering/renderer_rd/shaders/giprobe_write.glsl index 9c794f1bcc..56b3b7ccb4 100644 --- a/servers/rendering/renderer_rd/shaders/giprobe_write.glsl +++ b/servers/rendering/renderer_rd/shaders/giprobe_write.glsl @@ -43,10 +43,10 @@ struct Light { float attenuation; vec3 color; - float spot_angle_radians; + float cos_spot_angle; vec3 position; - float spot_attenuation; + float inv_spot_attenuation; vec3 direction; bool has_shadow; @@ -146,13 +146,15 @@ bool compute_light_vector(uint light, uint cell, vec3 pos, out float attenuation if (lights.data[light].type == LIGHT_TYPE_SPOT) { vec3 rel = normalize(pos - light_pos); - float angle = acos(dot(rel, lights.data[light].direction)); - if (angle > lights.data[light].spot_angle_radians) { + float cos_spot_angle = lights.data[light].cos_spot_angle; + float cos_angle = dot(rel, lights.data[light].direction); + if (cos_angle < cos_spot_angle) { return false; } - float d = clamp(angle / lights.data[light].spot_angle_radians, 0, 1); - attenuation *= pow(1.0 - d, lights.data[light].spot_attenuation); + float scos = max(cos_angle, cos_spot_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cos_spot_angle)); + attenuation *= 1.0 - pow(spot_rim, lights.data[light].inv_spot_attenuation); } } diff --git a/servers/rendering/renderer_rd/shaders/particles.glsl b/servers/rendering/renderer_rd/shaders/particles.glsl index 926c7ef9fc..cb6d8dc7f6 100644 --- a/servers/rendering/renderer_rd/shaders/particles.glsl +++ b/servers/rendering/renderer_rd/shaders/particles.glsl @@ -173,7 +173,7 @@ uint hash(uint x) { return x; } -bool emit_particle(mat4 p_xform, vec3 p_velocity, vec4 p_color, vec4 p_custom, uint p_flags) { +bool emit_subparticle(mat4 p_xform, vec3 p_velocity, vec4 p_color, vec4 p_custom, uint p_flags) { if (!params.can_emit) { return false; } diff --git a/servers/rendering/renderer_rd/shaders/resolve.glsl b/servers/rendering/renderer_rd/shaders/resolve.glsl index 9429a66dc9..e83c4ca93b 100644 --- a/servers/rendering/renderer_rd/shaders/resolve.glsl +++ b/servers/rendering/renderer_rd/shaders/resolve.glsl @@ -58,6 +58,116 @@ void main() { #else +#if 1 + + vec4 group1; + vec4 group2; + vec4 group3; + vec4 group4; + int best_index = 0; + + //2X + group1.x = texelFetch(source_depth, pos, 0).r; + group1.y = texelFetch(source_depth, pos, 1).r; + + //4X + if (params.sample_count >= 4) { + group1.z = texelFetch(source_depth, pos, 2).r; + group1.w = texelFetch(source_depth, pos, 3).r; + } + //8X + if (params.sample_count >= 8) { + group2.x = texelFetch(source_depth, pos, 4).r; + group2.y = texelFetch(source_depth, pos, 5).r; + group2.z = texelFetch(source_depth, pos, 6).r; + group2.w = texelFetch(source_depth, pos, 7).r; + } + //16X + if (params.sample_count >= 16) { + group3.x = texelFetch(source_depth, pos, 8).r; + group3.y = texelFetch(source_depth, pos, 9).r; + group3.z = texelFetch(source_depth, pos, 10).r; + group3.w = texelFetch(source_depth, pos, 11).r; + + group4.x = texelFetch(source_depth, pos, 12).r; + group4.y = texelFetch(source_depth, pos, 13).r; + group4.z = texelFetch(source_depth, pos, 14).r; + group4.w = texelFetch(source_depth, pos, 15).r; + } + + if (params.sample_count == 2) { + best_index = (pos.x & 1) ^ ((pos.y >> 1) & 1); //not much can be done here + } else if (params.sample_count == 4) { + vec4 freq = vec4(equal(group1, vec4(group1.x))); + freq += vec4(equal(group1, vec4(group1.y))); + freq += vec4(equal(group1, vec4(group1.z))); + freq += vec4(equal(group1, vec4(group1.w))); + + float min_f = freq.x; + best_index = 0; + if (freq.y < min_f) { + best_index = 1; + min_f = freq.y; + } + if (freq.z < min_f) { + best_index = 2; + min_f = freq.z; + } + if (freq.w < min_f) { + best_index = 3; + } + } else if (params.sample_count == 8) { + vec4 freq0 = vec4(equal(group1, vec4(group1.x))); + vec4 freq1 = vec4(equal(group2, vec4(group1.x))); + freq0 += vec4(equal(group1, vec4(group1.y))); + freq1 += vec4(equal(group2, vec4(group1.y))); + freq0 += vec4(equal(group1, vec4(group1.z))); + freq1 += vec4(equal(group2, vec4(group1.z))); + freq0 += vec4(equal(group1, vec4(group1.w))); + freq1 += vec4(equal(group2, vec4(group1.w))); + freq0 += vec4(equal(group1, vec4(group2.x))); + freq1 += vec4(equal(group2, vec4(group2.x))); + freq0 += vec4(equal(group1, vec4(group2.y))); + freq1 += vec4(equal(group2, vec4(group2.y))); + freq0 += vec4(equal(group1, vec4(group2.z))); + freq1 += vec4(equal(group2, vec4(group2.z))); + freq0 += vec4(equal(group1, vec4(group2.w))); + freq1 += vec4(equal(group2, vec4(group2.w))); + + float min_f0 = freq0.x; + int best_index0 = 0; + if (freq0.y < min_f0) { + best_index0 = 1; + min_f0 = freq0.y; + } + if (freq0.z < min_f0) { + best_index0 = 2; + min_f0 = freq0.z; + } + if (freq0.w < min_f0) { + best_index0 = 3; + min_f0 = freq0.w; + } + + float min_f1 = freq1.x; + int best_index1 = 4; + if (freq1.y < min_f1) { + best_index1 = 5; + min_f1 = freq1.y; + } + if (freq1.z < min_f1) { + best_index1 = 6; + min_f1 = freq1.z; + } + if (freq1.w < min_f1) { + best_index1 = 7; + min_f1 = freq1.w; + } + + best_index = mix(best_index0, best_index1, min_f0 < min_f1); + } + +#else float depths[16]; int depth_indices[16]; int depth_amount[16]; @@ -91,7 +201,7 @@ void main() { depth_least = depth_amount[j]; } } - +#endif best_depth = texelFetch(source_depth, pos, best_index).r; best_normal_roughness = texelFetch(source_normal_roughness, pos, best_index); #ifdef GIPROBE_RESOLVE diff --git a/servers/rendering/renderer_rd/shaders/scene_forward.glsl b/servers/rendering/renderer_rd/shaders/scene_forward.glsl index 05f7637478..adccf1e712 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward.glsl @@ -89,33 +89,45 @@ MATERIAL_UNIFORMS } material; #endif -/* clang-format off */ - -VERTEX_SHADER_GLOBALS - -/* clang-format on */ - invariant gl_Position; -layout(location = 7) flat out uint instance_index; - #ifdef MODE_DUAL_PARABOLOID layout(location = 8) out float dp_clip; #endif +layout(location = 9) out flat uint instance_index; + +/* clang-format off */ + +VERTEX_SHADER_GLOBALS + +/* clang-format on */ + void main() { - instance_index = draw_call.instance_index; vec4 instance_custom = vec4(0.0); #if defined(COLOR_USED) color_interp = color_attrib; #endif + instance_index = draw_call.instance_index; + + bool is_multimesh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH); + if (!is_multimesh) { + instance_index += gl_InstanceIndex; + } + mat4 world_matrix = instances.data[instance_index].transform; - mat3 world_normal_matrix = mat3(instances.data[instance_index].normal_transform); - if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH)) { + mat3 world_normal_matrix; + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) { + world_normal_matrix = inverse(mat3(world_matrix)); + } else { + world_normal_matrix = mat3(world_matrix); + } + + if (is_multimesh) { //multimesh, instances are for it uint offset = (instances.data[instance_index].flags >> INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT) & INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK; offset *= gl_InstanceIndex; @@ -144,10 +156,6 @@ void main() { matrix = transpose(matrix); world_matrix = world_matrix * matrix; world_normal_matrix = world_normal_matrix * mat3(matrix); - - } else { - //not a multimesh, instances are for multiple draw calls - instance_index += gl_InstanceIndex; } vec3 vertex = vertex_attrib; @@ -195,7 +203,7 @@ void main() { uv2_interp = uv2_attrib; #endif -#ifdef USE_OVERRIDE_POSITION +#ifdef OVERRIDE_POSITION vec4 position; #endif @@ -290,7 +298,7 @@ VERTEX_SHADER_CODE #endif //MODE_RENDER_DEPTH -#ifdef USE_OVERRIDE_POSITION +#ifdef OVERRIDE_POSITION gl_Position = position; #else gl_Position = projection_matrix * vec4(vertex_interp, 1.0); @@ -305,7 +313,8 @@ VERTEX_SHADER_CODE #endif #ifdef MODE_RENDER_MATERIAL if (scene_data.material_uv2_mode) { - gl_Position.xy = (uv2_attrib.xy + draw_call.bake_uv2_offset) * 2.0 - 1.0; + vec2 uv_offset = unpackHalf2x16(draw_call.uv_offset); + gl_Position.xy = (uv2_attrib.xy + uv_offset) * 2.0 - 1.0; gl_Position.z = 0.00001; gl_Position.w = 1.0; } @@ -345,18 +354,17 @@ layout(location = 5) in vec3 tangent_interp; layout(location = 6) in vec3 binormal_interp; #endif -layout(location = 7) flat in uint instance_index; - #ifdef MODE_DUAL_PARABOLOID layout(location = 8) in float dp_clip; #endif +layout(location = 9) in flat uint instance_index; + //defines to keep compatibility with vertex #define world_matrix instances.data[instance_index].transform -#define world_normal_matrix instances.data[instance_index].normal_transform #define projection_matrix scene_data.projection_matrix #if defined(ENABLE_SSS) && defined(ENABLE_TRANSMITTANCE) @@ -545,7 +553,7 @@ vec3 F0(float metallic, float specular, vec3 albedo) { return mix(vec3(dielectric), albedo, vec3(metallic)); } -void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float attenuation, vec3 shadow_attenuation, vec3 diffuse_color, float roughness, float metallic, float specular, float specular_blob_intensity, +void light_compute(vec3 N, vec3 L, vec3 V, vec3 light_color, float attenuation, vec3 f0, uint orms, float specular_amount, #ifdef LIGHT_BACKLIGHT_USED vec3 backlight, #endif @@ -557,7 +565,7 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float atte float transmittance_z, #endif #ifdef LIGHT_RIM_USED - float rim, float rim_tint, + float rim, float rim_tint, vec3 rim_color, #endif #ifdef LIGHT_CLEARCOAT_USED float clearcoat, float clearcoat_gloss, @@ -565,6 +573,9 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float atte #ifdef LIGHT_ANISOTROPY_USED vec3 B, vec3 T, float anisotropy, #endif +#ifdef USE_SOFT_SHADOWS + float A, +#endif #ifdef USE_SHADOW_TO_OPACITY inout float alpha, #endif @@ -574,7 +585,6 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float atte // light is written by the light shader vec3 normal = N; - vec3 albedo = diffuse_color; vec3 light = L; vec3 view = V; @@ -585,7 +595,12 @@ LIGHT_SHADER_CODE /* clang-format on */ #else + +#ifdef USE_SOFT_SHADOWS float NdotL = min(A + dot(N, L), 1.0); +#else + float NdotL = dot(N, L); +#endif float cNdotL = max(NdotL, 0.0); // clamped NdotL float NdotV = dot(N, V); float cNdotV = max(NdotV, 0.0); @@ -595,14 +610,25 @@ LIGHT_SHADER_CODE #endif #if defined(SPECULAR_BLINN) || defined(SPECULAR_SCHLICK_GGX) || defined(LIGHT_CLEARCOAT_USED) +#ifdef USE_SOFT_SHADOWS float cNdotH = clamp(A + dot(N, H), 0.0, 1.0); +#else + float cNdotH = clamp(dot(N, H), 0.0, 1.0); +#endif #endif #if defined(DIFFUSE_BURLEY) || defined(SPECULAR_SCHLICK_GGX) || defined(LIGHT_CLEARCOAT_USED) +#ifdef USE_SOFT_SHADOWS float cLdotH = clamp(A + dot(L, H), 0.0, 1.0); +#else + float cLdotH = clamp(dot(L, H), 0.0, 1.0); +#endif #endif + float metallic = unpackUnorm4x8(orms).z; if (metallic < 1.0) { + float roughness = unpackUnorm4x8(orms).y; + #if defined(DIFFUSE_OREN_NAYAR) vec3 diffuse_brdf_NL; #else @@ -612,23 +638,6 @@ LIGHT_SHADER_CODE #if defined(DIFFUSE_LAMBERT_WRAP) // energy conserving lambert wrap shader diffuse_brdf_NL = max(0.0, (NdotL + roughness) / ((1.0 + roughness) * (1.0 + roughness))); - -#elif defined(DIFFUSE_OREN_NAYAR) - - { - // see http://mimosa-pudica.net/improved-oren-nayar.html - float LdotV = dot(L, V); - - float s = LdotV - NdotL * NdotV; - float t = mix(1.0, max(NdotL, NdotV), step(0.0, s)); - - float sigma2 = roughness * roughness; // TODO: this needs checking - vec3 A = 1.0 + sigma2 * (-0.5 / (sigma2 + 0.33) + 0.17 * diffuse_color / (sigma2 + 0.13)); - float B = 0.45 * sigma2 / (sigma2 + 0.09); - - diffuse_brdf_NL = cNdotL * (A + vec3(B) * s / t) * (1.0 / M_PI); - } - #elif defined(DIFFUSE_TOON) diffuse_brdf_NL = smoothstep(-roughness, max(roughness, 0.01), NdotL); @@ -656,15 +665,15 @@ LIGHT_SHADER_CODE diffuse_brdf_NL = cNdotL * (1.0 / M_PI); #endif - diffuse_light += light_color * diffuse_color * shadow_attenuation * diffuse_brdf_NL * attenuation; + diffuse_light += light_color * diffuse_brdf_NL * attenuation; #if defined(LIGHT_BACKLIGHT_USED) - diffuse_light += light_color * diffuse_color * (vec3(1.0 / M_PI) - diffuse_brdf_NL) * backlight * attenuation; + diffuse_light += light_color * (vec3(1.0 / M_PI) - diffuse_brdf_NL) * backlight * attenuation; #endif #if defined(LIGHT_RIM_USED) float rim_light = pow(max(0.0, 1.0 - cNdotV), max(0.0, (1.0 - roughness) * 16.0)); - diffuse_light += rim_light * rim * mix(vec3(1.0), diffuse_color, rim_tint) * light_color; + diffuse_light += rim_light * rim * mix(vec3(1.0), rim_color, rim_tint) * light_color; #endif #ifdef LIGHT_TRANSMITTANCE_USED @@ -682,7 +691,7 @@ LIGHT_SHADER_CODE vec3(0.358, 0.004, 0.0) * exp(dd / 1.99) + vec3(0.078, 0.0, 0.0) * exp(dd / 7.41); - diffuse_light += profile * transmittance_color.a * diffuse_color * light_color * clamp(transmittance_boost - NdotL, 0.0, 1.0) * (1.0 / M_PI) * attenuation; + diffuse_light += profile * transmittance_color.a * light_color * clamp(transmittance_boost - NdotL, 0.0, 1.0) * (1.0 / M_PI); } #else @@ -692,7 +701,7 @@ LIGHT_SHADER_CODE fade = pow(max(0.0, 1.0 - fade), transmittance_curve); fade *= clamp(transmittance_boost - NdotL, 0.0, 1.0); - diffuse_light += diffuse_color * transmittance_color.rgb * light_color * (1.0 / M_PI) * transmittance_color.a * fade * attenuation; + diffuse_light += transmittance_color.rgb * light_color * (1.0 / M_PI) * transmittance_color.a * fade; } #endif //SSS_MODE_SKIN @@ -700,6 +709,7 @@ LIGHT_SHADER_CODE #endif //LIGHT_TRANSMITTANCE_USED } + float roughness = unpackUnorm4x8(orms).y; if (roughness > 0.0) { // FIXME: roughness == 0 should not disable specular light entirely // D @@ -712,7 +722,7 @@ LIGHT_SHADER_CODE blinn *= (shininess + 8.0) * (1.0 / (8.0 * M_PI)); float intensity = blinn; - specular_light += light_color * shadow_attenuation * intensity * specular_blob_intensity * attenuation; + specular_light += light_color * intensity * attenuation * specular_amount; #elif defined(SPECULAR_PHONG) @@ -723,7 +733,7 @@ LIGHT_SHADER_CODE phong *= (shininess + 8.0) * (1.0 / (8.0 * M_PI)); float intensity = (phong) / max(4.0 * cNdotV * cNdotL, 0.75); - specular_light += light_color * shadow_attenuation * intensity * specular_blob_intensity * attenuation; + specular_light += light_color * intensity * attenuation * specular_amount; #elif defined(SPECULAR_TOON) @@ -732,7 +742,7 @@ LIGHT_SHADER_CODE float mid = 1.0 - roughness; mid *= mid; float intensity = smoothstep(mid - roughness * 0.5, mid + roughness * 0.5, RdotV) * mid; - diffuse_light += light_color * shadow_attenuation * intensity * specular_blob_intensity * attenuation; // write to diffuse_light, as in toon shading you generally want no reflection + diffuse_light += light_color * intensity * attenuation * specular_amount; // write to diffuse_light, as in toon shading you generally want no reflection #elif defined(SPECULAR_DISABLED) // none.. @@ -757,13 +767,12 @@ LIGHT_SHADER_CODE float G = G_GGX_2cos(cNdotL, alpha_ggx) * G_GGX_2cos(cNdotV, alpha_ggx); #endif // F - vec3 f0 = F0(metallic, specular, diffuse_color); float cLdotH5 = SchlickFresnel(cLdotH); vec3 F = mix(vec3(cLdotH5), vec3(1.0), f0); vec3 specular_brdf_NL = cNdotL * D * F * G; - specular_light += specular_brdf_NL * light_color * shadow_attenuation * specular_blob_intensity * attenuation; + specular_light += specular_brdf_NL * light_color * attenuation * specular_amount; #endif #if defined(LIGHT_CLEARCOAT_USED) @@ -777,12 +786,12 @@ LIGHT_SHADER_CODE float clearcoat_specular_brdf_NL = 0.25 * clearcoat * Gr * Fr * Dr * cNdotL; - specular_light += clearcoat_specular_brdf_NL * light_color * shadow_attenuation * specular_blob_intensity * attenuation; + specular_light += clearcoat_specular_brdf_NL * light_color * attenuation * specular_amount; #endif } #ifdef USE_SHADOW_TO_OPACITY - alpha = min(alpha, clamp(1.0 - length(shadow_attenuation * attenuation), 0.0, 1.0)); + alpha = min(alpha, clamp(1.0 - attenuation), 0.0, 1.0)); #endif #endif //defined(USE_LIGHT_SHADER_CODE) @@ -895,69 +904,39 @@ float sample_directional_soft_shadow(texture2D shadow, vec3 pssm_coord, vec2 tex #endif //USE_NO_SHADOWS -void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 albedo, float roughness, float metallic, float specular, float p_blob_intensity, -#ifdef LIGHT_BACKLIGHT_USED - vec3 backlight, -#endif -#ifdef LIGHT_TRANSMITTANCE_USED - vec4 transmittance_color, - float transmittance_depth, - float transmittance_curve, - float transmittance_boost, -#endif -#ifdef LIGHT_RIM_USED - float rim, float rim_tint, -#endif -#ifdef LIGHT_CLEARCOAT_USED - float clearcoat, float clearcoat_gloss, -#endif -#ifdef LIGHT_ANISOTROPY_USED - vec3 binormal, vec3 tangent, float anisotropy, -#endif -#ifdef USE_SHADOW_TO_OPACITY - inout float alpha, -#endif - inout vec3 diffuse_light, inout vec3 specular_light) { - vec3 light_rel_vec = lights.data[idx].position - vertex; - float light_length = length(light_rel_vec); - float normalized_distance = light_length * lights.data[idx].inv_radius; - vec2 attenuation_energy = unpackHalf2x16(lights.data[idx].attenuation_energy); - float omni_attenuation = pow(max(1.0 - normalized_distance, 0.0), attenuation_energy.x); - float light_attenuation = omni_attenuation; - vec3 shadow_attenuation = vec3(1.0); - vec4 color_specular = unpackUnorm4x8(lights.data[idx].color_specular); - color_specular.rgb *= attenuation_energy.y; - float size_A = 0.0; - - if (lights.data[idx].size > 0.0) { - float t = lights.data[idx].size / max(0.001, light_length); - size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t)); - } - -#ifdef LIGHT_TRANSMITTANCE_USED - float transmittance_z = transmittance_depth; //no transmittance by default -#endif +float get_omni_attenuation(float distance, float inv_range, float decay) { + float nd = distance * inv_range; + nd *= nd; + nd *= nd; // nd^4 + nd = max(1.0 - nd, 0.0); + nd *= nd; // nd^2 + return nd * pow(max(distance, 0.0001), -decay); +} +float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { #ifndef USE_NO_SHADOWS - vec4 shadow_color_enabled = unpackUnorm4x8(lights.data[idx].shadow_color_enabled); - if (shadow_color_enabled.w > 0.5) { + if (omni_lights.data[idx].shadow_enabled) { // there is a shadowmap + vec3 light_rel_vec = omni_lights.data[idx].position - vertex; + float light_length = length(light_rel_vec); + vec4 v = vec4(vertex, 1.0); - vec4 splane = (lights.data[idx].shadow_matrix * v); + vec4 splane = (omni_lights.data[idx].shadow_matrix * v); float shadow_len = length(splane.xyz); //need to remember shadow len from here { - vec3 nofs = normal_interp * lights.data[idx].shadow_normal_bias / lights.data[idx].inv_radius; + vec3 nofs = normal_interp * omni_lights.data[idx].shadow_normal_bias / omni_lights.data[idx].inv_radius; nofs *= (1.0 - max(0.0, dot(normalize(light_rel_vec), normalize(normal_interp)))); v.xyz += nofs; - splane = (lights.data[idx].shadow_matrix * v); + splane = (omni_lights.data[idx].shadow_matrix * v); } float shadow; - if (lights.data[idx].soft_shadow_size > 0.0) { +#ifdef USE_SOFT_SHADOWS + if (omni_lights.data[idx].soft_shadow_size > 0.0) { //soft shadow //find blocker @@ -977,10 +956,10 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); vec3 tangent = normalize(cross(v0, normal)); vec3 bitangent = normalize(cross(tangent, normal)); - float z_norm = shadow_len * lights.data[idx].inv_radius; + float z_norm = shadow_len * omni_lights.data[idx].inv_radius; - tangent *= lights.data[idx].soft_shadow_size * lights.data[idx].soft_shadow_scale; - bitangent *= lights.data[idx].soft_shadow_size * lights.data[idx].soft_shadow_scale; + tangent *= omni_lights.data[idx].soft_shadow_size * omni_lights.data[idx].soft_shadow_scale; + bitangent *= omni_lights.data[idx].soft_shadow_size * omni_lights.data[idx].soft_shadow_scale; for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) { vec2 disk = disk_rotation * scene_data.penumbra_shadow_kernel[i].xy; @@ -988,7 +967,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v vec3 pos = splane.xyz + tangent * disk.x + bitangent * disk.y; pos = normalize(pos); - vec4 uv_rect = lights.data[idx].atlas_rect; + vec4 uv_rect = omni_lights.data[idx].atlas_rect; if (pos.z >= 0.0) { pos.z += 1.0; @@ -1016,7 +995,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v tangent *= penumbra; bitangent *= penumbra; - z_norm -= lights.data[idx].inv_radius * lights.data[idx].shadow_bias; + z_norm -= omni_lights.data[idx].inv_radius * omni_lights.data[idx].shadow_bias; shadow = 0.0; for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) { @@ -1024,7 +1003,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v vec3 pos = splane.xyz + tangent * disk.x + bitangent * disk.y; pos = normalize(pos); - vec4 uv_rect = lights.data[idx].atlas_rect; + vec4 uv_rect = omni_lights.data[idx].atlas_rect; if (pos.z >= 0.0) { pos.z += 1.0; @@ -1047,8 +1026,9 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v shadow = 1.0; } } else { +#endif splane.xyz = normalize(splane.xyz); - vec4 clamp_rect = lights.data[idx].atlas_rect; + vec4 clamp_rect = omni_lights.data[idx].atlas_rect; if (splane.z >= 0.0) { splane.z += 1.0; @@ -1062,101 +1042,149 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v splane.xy /= splane.z; splane.xy = splane.xy * 0.5 + 0.5; - splane.z = (shadow_len - lights.data[idx].shadow_bias) * lights.data[idx].inv_radius; + splane.z = (shadow_len - omni_lights.data[idx].shadow_bias) * omni_lights.data[idx].inv_radius; splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; splane.w = 1.0; //needed? i think it should be 1 already - shadow = sample_pcf_shadow(shadow_atlas, lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, splane); + shadow = sample_pcf_shadow(shadow_atlas, omni_lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, splane); +#ifdef USE_SOFT_SHADOWS } +#endif + + return shadow; + } +#endif + return 1.0; +} + +void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 f0, uint orms, float shadow, +#ifdef LIGHT_BACKLIGHT_USED + vec3 backlight, +#endif #ifdef LIGHT_TRANSMITTANCE_USED - { - vec4 clamp_rect = lights.data[idx].atlas_rect; + vec4 transmittance_color, + float transmittance_depth, + float transmittance_curve, + float transmittance_boost, +#endif +#ifdef LIGHT_RIM_USED + float rim, float rim_tint, vec3 rim_color, +#endif +#ifdef LIGHT_CLEARCOAT_USED + float clearcoat, float clearcoat_gloss, +#endif +#ifdef LIGHT_ANISOTROPY_USED + vec3 binormal, vec3 tangent, float anisotropy, +#endif +#ifdef USE_SHADOW_TO_OPACITY + inout float alpha, +#endif + inout vec3 diffuse_light, inout vec3 specular_light) { + vec3 light_rel_vec = omni_lights.data[idx].position - vertex; + float light_length = length(light_rel_vec); + float omni_attenuation = get_omni_attenuation(light_length, omni_lights.data[idx].inv_radius, omni_lights.data[idx].attenuation); + float light_attenuation = omni_attenuation; + vec3 color = omni_lights.data[idx].color; - //redo shadowmapping, but shrink the model a bit to avoid arctifacts - splane = (lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * lights.data[idx].transmittance_bias, 1.0)); +#ifdef USE_SOFT_SHADOWS + float size_A = 0.0; - shadow_len = length(splane.xyz); - splane = normalize(splane.xyz); + if (omni_lights.data[idx].size > 0.0) { + float t = omni_lights.data[idx].size / max(0.001, light_length); + size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t)); + } +#endif - if (splane.z >= 0.0) { - splane.z += 1.0; +#ifdef LIGHT_TRANSMITTANCE_USED + float transmittance_z = transmittance_depth; //no transmittance by default + transmittance_color.a *= light_attenuation; + { + vec4 clamp_rect = omni_lights.data[idx].atlas_rect; - } else { - splane.z = 1.0 - splane.z; - } + //redo shadowmapping, but shrink the model a bit to avoid arctifacts + vec4 splane = (omni_lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * omni_lights.data[idx].transmittance_bias, 1.0)); - splane.xy /= splane.z; - splane.xy = splane.xy * 0.5 + 0.5; - splane.z = shadow_len * lights.data[idx].inv_radius; - splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; - splane.w = 1.0; //needed? i think it should be 1 already + shadow_len = length(splane.xyz); + splane = normalize(splane.xyz); + + if (splane.z >= 0.0) { + splane.z += 1.0; - float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r; - transmittance_z = (splane.z - shadow_z) / lights.data[idx].inv_radius; + } else { + splane.z = 1.0 - splane.z; } -#endif - vec3 no_shadow = vec3(1.0); + splane.xy /= splane.z; + splane.xy = splane.xy * 0.5 + 0.5; + splane.z = shadow_len * omni_lights.data[idx].inv_radius; + splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; + splane.w = 1.0; //needed? i think it should be 1 already - if (lights.data[idx].projector_rect != vec4(0.0)) { - vec3 local_v = (lights.data[idx].shadow_matrix * vec4(vertex, 1.0)).xyz; - local_v = normalize(local_v); + float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r; + transmittance_z = (splane.z - shadow_z) / omni_lights.data[idx].inv_radius; + } +#endif - vec4 atlas_rect = lights.data[idx].projector_rect; +#if 0 - if (local_v.z >= 0.0) { - local_v.z += 1.0; - atlas_rect.y += atlas_rect.w; + if (omni_lights.data[idx].projector_rect != vec4(0.0)) { + vec3 local_v = (omni_lights.data[idx].shadow_matrix * vec4(vertex, 1.0)).xyz; + local_v = normalize(local_v); - } else { - local_v.z = 1.0 - local_v.z; - } + vec4 atlas_rect = omni_lights.data[idx].projector_rect; - local_v.xy /= local_v.z; - local_v.xy = local_v.xy * 0.5 + 0.5; - vec2 proj_uv = local_v.xy * atlas_rect.zw; + if (local_v.z >= 0.0) { + local_v.z += 1.0; + atlas_rect.y += atlas_rect.w; - vec2 proj_uv_ddx; - vec2 proj_uv_ddy; - { - vec3 local_v_ddx = (lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddx, 1.0)).xyz; - local_v_ddx = normalize(local_v_ddx); + } else { + local_v.z = 1.0 - local_v.z; + } - if (local_v_ddx.z >= 0.0) { - local_v_ddx.z += 1.0; - } else { - local_v_ddx.z = 1.0 - local_v_ddx.z; - } + local_v.xy /= local_v.z; + local_v.xy = local_v.xy * 0.5 + 0.5; + vec2 proj_uv = local_v.xy * atlas_rect.zw; - local_v_ddx.xy /= local_v_ddx.z; - local_v_ddx.xy = local_v_ddx.xy * 0.5 + 0.5; + vec2 proj_uv_ddx; + vec2 proj_uv_ddy; + { + vec3 local_v_ddx = (omni_lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddx, 1.0)).xyz; + local_v_ddx = normalize(local_v_ddx); - proj_uv_ddx = local_v_ddx.xy * atlas_rect.zw - proj_uv; + if (local_v_ddx.z >= 0.0) { + local_v_ddx.z += 1.0; + } else { + local_v_ddx.z = 1.0 - local_v_ddx.z; + } - vec3 local_v_ddy = (lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddy, 1.0)).xyz; - local_v_ddy = normalize(local_v_ddy); + local_v_ddx.xy /= local_v_ddx.z; + local_v_ddx.xy = local_v_ddx.xy * 0.5 + 0.5; - if (local_v_ddy.z >= 0.0) { - local_v_ddy.z += 1.0; - } else { - local_v_ddy.z = 1.0 - local_v_ddy.z; - } + proj_uv_ddx = local_v_ddx.xy * atlas_rect.zw - proj_uv; - local_v_ddy.xy /= local_v_ddy.z; - local_v_ddy.xy = local_v_ddy.xy * 0.5 + 0.5; + vec3 local_v_ddy = (omni_lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddy, 1.0)).xyz; + local_v_ddy = normalize(local_v_ddy); - proj_uv_ddy = local_v_ddy.xy * atlas_rect.zw - proj_uv; + if (local_v_ddy.z >= 0.0) { + local_v_ddy.z += 1.0; + } else { + local_v_ddy.z = 1.0 - local_v_ddy.z; } - vec4 proj = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), proj_uv + atlas_rect.xy, proj_uv_ddx, proj_uv_ddy); - no_shadow = mix(no_shadow, proj.rgb, proj.a); + local_v_ddy.xy /= local_v_ddy.z; + local_v_ddy.xy = local_v_ddy.xy * 0.5 + 0.5; + + proj_uv_ddy = local_v_ddy.xy * atlas_rect.zw - proj_uv; } - shadow_attenuation = mix(shadow_color_enabled.rgb, no_shadow, shadow); + vec4 proj = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), proj_uv + atlas_rect.xy, proj_uv_ddx, proj_uv_ddy); + no_shadow = mix(no_shadow, proj.rgb, proj.a); } -#endif //USE_NO_SHADOWS +#endif + + light_attenuation *= shadow; - light_compute(normal, normalize(light_rel_vec), eye_vec, size_A, color_specular.rgb, light_attenuation, shadow_attenuation, albedo, roughness, metallic, specular, color_specular.a * p_blob_intensity, + light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms, omni_lights.data[idx].specular_amount, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -1168,7 +1196,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v transmittance_z, #endif #ifdef LIGHT_RIM_USED - rim * omni_attenuation, rim_tint, + rim * omni_attenuation, rim_tint, rim_color, #endif #ifdef LIGHT_CLEARCOAT_USED clearcoat, clearcoat_gloss, @@ -1176,6 +1204,9 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v #ifdef LIGHT_ANISOTROPY_USED binormal, tangent, anisotropy, #endif +#ifdef USE_SOFT_SHADOWS + size_A, +#endif #ifdef USE_SHADOW_TO_OPACITY alpha, #endif @@ -1183,89 +1214,39 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v specular_light); } -void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 albedo, float roughness, float metallic, float specular, float p_blob_intensity, -#ifdef LIGHT_BACKLIGHT_USED - vec3 backlight, -#endif -#ifdef LIGHT_TRANSMITTANCE_USED - vec4 transmittance_color, - float transmittance_depth, - float transmittance_curve, - float transmittance_boost, -#endif -#ifdef LIGHT_RIM_USED - float rim, float rim_tint, -#endif -#ifdef LIGHT_CLEARCOAT_USED - float clearcoat, float clearcoat_gloss, -#endif -#ifdef LIGHT_ANISOTROPY_USED - vec3 binormal, vec3 tangent, float anisotropy, -#endif -#ifdef USE_SHADOW_TO_OPACITY - inout float alpha, -#endif - inout vec3 diffuse_light, - inout vec3 specular_light) { - vec3 light_rel_vec = lights.data[idx].position - vertex; - float light_length = length(light_rel_vec); - float normalized_distance = light_length * lights.data[idx].inv_radius; - vec2 attenuation_energy = unpackHalf2x16(lights.data[idx].attenuation_energy); - float spot_attenuation = pow(max(1.0 - normalized_distance, 0.001), attenuation_energy.x); - vec3 spot_dir = lights.data[idx].direction; - vec2 spot_att_angle = unpackHalf2x16(lights.data[idx].cone_attenuation_angle); - float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_att_angle.y); - float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_att_angle.y)); - spot_attenuation *= 1.0 - pow(spot_rim, spot_att_angle.x); - float light_attenuation = spot_attenuation; - vec3 shadow_attenuation = vec3(1.0); - vec4 color_specular = unpackUnorm4x8(lights.data[idx].color_specular); - color_specular.rgb *= attenuation_energy.y; - - float size_A = 0.0; - - if (lights.data[idx].size > 0.0) { - float t = lights.data[idx].size / max(0.001, light_length); - size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t)); - } -/* - if (lights.data[idx].atlas_rect!=vec4(0.0)) { - //use projector texture - } - */ -#ifdef LIGHT_TRANSMITTANCE_USED - float transmittance_z = transmittance_depth; -#endif - +float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) { #ifndef USE_NO_SHADOWS - vec4 shadow_color_enabled = unpackUnorm4x8(lights.data[idx].shadow_color_enabled); - if (shadow_color_enabled.w > 0.5) { + if (spot_lights.data[idx].shadow_enabled) { + vec3 light_rel_vec = spot_lights.data[idx].position - vertex; + float light_length = length(light_rel_vec); + vec3 spot_dir = spot_lights.data[idx].direction; //there is a shadowmap vec4 v = vec4(vertex, 1.0); - v.xyz -= spot_dir * lights.data[idx].shadow_bias; + v.xyz -= spot_dir * spot_lights.data[idx].shadow_bias; - float z_norm = dot(spot_dir, -light_rel_vec) * lights.data[idx].inv_radius; + float z_norm = dot(spot_dir, -light_rel_vec) * spot_lights.data[idx].inv_radius; float depth_bias_scale = 1.0 / (max(0.0001, z_norm)); //the closer to the light origin, the more you have to offset to reach 1px in the map - vec3 normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(spot_dir, -normalize(normal_interp)))) * lights.data[idx].shadow_normal_bias * depth_bias_scale; + vec3 normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(spot_dir, -normalize(normal_interp)))) * spot_lights.data[idx].shadow_normal_bias * depth_bias_scale; normal_bias -= spot_dir * dot(spot_dir, normal_bias); //only XY, no Z v.xyz += normal_bias; //adjust with bias - z_norm = dot(spot_dir, v.xyz - lights.data[idx].position) * lights.data[idx].inv_radius; + z_norm = dot(spot_dir, v.xyz - spot_lights.data[idx].position) * spot_lights.data[idx].inv_radius; float shadow; - vec4 splane = (lights.data[idx].shadow_matrix * v); + vec4 splane = (spot_lights.data[idx].shadow_matrix * v); splane /= splane.w; - if (lights.data[idx].soft_shadow_size > 0.0) { +#ifdef USE_SOFT_SHADOWS + if (spot_lights.data[idx].soft_shadow_size > 0.0) { //soft shadow //find blocker - vec2 shadow_uv = splane.xy * lights.data[idx].atlas_rect.zw + lights.data[idx].atlas_rect.xy; + vec2 shadow_uv = splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy; float blocker_count = 0.0; float blocker_average = 0.0; @@ -1278,11 +1259,11 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v disk_rotation = mat2(vec2(cr, -sr), vec2(sr, cr)); } - float uv_size = lights.data[idx].soft_shadow_size * z_norm * lights.data[idx].soft_shadow_scale; - vec2 clamp_max = lights.data[idx].atlas_rect.xy + lights.data[idx].atlas_rect.zw; + float uv_size = spot_lights.data[idx].soft_shadow_size * z_norm * spot_lights.data[idx].soft_shadow_scale; + vec2 clamp_max = spot_lights.data[idx].atlas_rect.xy + spot_lights.data[idx].atlas_rect.zw; for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) { vec2 suv = shadow_uv + (disk_rotation * scene_data.penumbra_shadow_kernel[i].xy) * uv_size; - suv = clamp(suv, lights.data[idx].atlas_rect.xy, clamp_max); + suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max); float d = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), suv, 0.0).r; if (d < z_norm) { blocker_average += d; @@ -1299,7 +1280,7 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v shadow = 0.0; for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) { vec2 suv = shadow_uv + (disk_rotation * scene_data.penumbra_shadow_kernel[i].xy) * uv_size; - suv = clamp(suv, lights.data[idx].atlas_rect.xy, clamp_max); + suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max); shadow += textureProj(sampler2DShadow(shadow_atlas, shadow_sampler), vec4(suv, z_norm, 1.0)); } @@ -1311,54 +1292,93 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v } } else { +#endif //hard shadow - vec4 shadow_uv = vec4(splane.xy * lights.data[idx].atlas_rect.zw + lights.data[idx].atlas_rect.xy, z_norm, 1.0); + vec4 shadow_uv = vec4(splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy, splane.z, 1.0); - shadow = sample_pcf_shadow(shadow_atlas, lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, shadow_uv); + shadow = sample_pcf_shadow(shadow_atlas, spot_lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, shadow_uv); +#ifdef USE_SOFT_SHADOWS } +#endif - vec3 no_shadow = vec3(1.0); + return shadow; + } - if (lights.data[idx].projector_rect != vec4(0.0)) { - splane = (lights.data[idx].shadow_matrix * vec4(vertex, 1.0)); - splane /= splane.w; +#endif //USE_NO_SHADOWS - vec2 proj_uv = splane.xy * lights.data[idx].projector_rect.zw; + return 1.0; +} - //ensure we have proper mipmaps - vec4 splane_ddx = (lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddx, 1.0)); - splane_ddx /= splane_ddx.w; - vec2 proj_uv_ddx = splane_ddx.xy * lights.data[idx].projector_rect.zw - proj_uv; +void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 f0, uint orms, float shadow, +#ifdef LIGHT_BACKLIGHT_USED + vec3 backlight, +#endif +#ifdef LIGHT_TRANSMITTANCE_USED + vec4 transmittance_color, + float transmittance_depth, + float transmittance_curve, + float transmittance_boost, +#endif +#ifdef LIGHT_RIM_USED + float rim, float rim_tint, vec3 rim_color, +#endif +#ifdef LIGHT_CLEARCOAT_USED + float clearcoat, float clearcoat_gloss, +#endif +#ifdef LIGHT_ANISOTROPY_USED + vec3 binormal, vec3 tangent, float anisotropy, +#endif +#ifdef USE_SHADOW_TO_OPACITY + inout float alpha, +#endif + inout vec3 diffuse_light, + inout vec3 specular_light) { + vec3 light_rel_vec = spot_lights.data[idx].position - vertex; + float light_length = length(light_rel_vec); + float spot_attenuation = get_omni_attenuation(light_length, spot_lights.data[idx].inv_radius, spot_lights.data[idx].attenuation); + vec3 spot_dir = spot_lights.data[idx].direction; + float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_lights.data[idx].cone_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_lights.data[idx].cone_angle)); + spot_attenuation *= 1.0 - pow(spot_rim, spot_lights.data[idx].cone_attenuation); + float light_attenuation = spot_attenuation; + vec3 color = spot_lights.data[idx].color; + float specular_amount = spot_lights.data[idx].specular_amount; - vec4 splane_ddy = (lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddy, 1.0)); - splane_ddy /= splane_ddy.w; - vec2 proj_uv_ddy = splane_ddy.xy * lights.data[idx].projector_rect.zw - proj_uv; +#ifdef USE_SOFT_SHADOWS + float size_A = 0.0; - vec4 proj = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), proj_uv + lights.data[idx].projector_rect.xy, proj_uv_ddx, proj_uv_ddy); - no_shadow = mix(no_shadow, proj.rgb, proj.a); - } + if (spot_lights.data[idx].size > 0.0) { + float t = spot_lights.data[idx].size / max(0.001, light_length); + size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t)); + } +#endif - shadow_attenuation = mix(shadow_color_enabled.rgb, no_shadow, shadow); + /* + if (spot_lights.data[idx].atlas_rect!=vec4(0.0)) { + //use projector texture + } + */ #ifdef LIGHT_TRANSMITTANCE_USED - { - splane = (lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * lights.data[idx].transmittance_bias, 1.0)); - splane /= splane.w; - splane.xy = splane.xy * lights.data[idx].atlas_rect.zw + lights.data[idx].atlas_rect.xy; - - float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r; - //reconstruct depth - shadow_z /= lights.data[idx].inv_radius; - //distance to light plane - float z = dot(spot_dir, -light_rel_vec); - transmittance_z = z - shadow_z; - } -#endif //LIGHT_TRANSMITTANCE_USED + float transmittance_z = transmittance_depth; + transmittance_color.a *= light_attenuation; + { + splane = (spot_lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * spot_lights.data[idx].transmittance_bias, 1.0)); + splane /= splane.w; + splane.xy = splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy; + + float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r; + //reconstruct depth + shadow_z /= spot_lights.data[idx].inv_radius; + //distance to light plane + float z = dot(spot_dir, -light_rel_vec); + transmittance_z = z - shadow_z; } +#endif //LIGHT_TRANSMITTANCE_USED -#endif //USE_NO_SHADOWS + light_attenuation *= shadow; - light_compute(normal, normalize(light_rel_vec), eye_vec, size_A, color_specular.rgb, light_attenuation, shadow_attenuation, albedo, roughness, metallic, specular, color_specular.a * p_blob_intensity, + light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms, spot_lights.data[idx].specular_amount, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -1370,7 +1390,7 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v transmittance_z, #endif #ifdef LIGHT_RIM_USED - rim * spot_attenuation, rim_tint, + rim * spot_attenuation, rim_tint, rim_color, #endif #ifdef LIGHT_CLEARCOAT_USED clearcoat, clearcoat_gloss, @@ -1378,6 +1398,9 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v #ifdef LIGHT_ANISOTROPY_USED binormal, tangent, anisotropy, #endif +#ifdef USE_SOFT_SHADOW + size_A, +#endif #ifdef USE_SHADOW_TO_OPACITY alpha, #endif @@ -1401,11 +1424,11 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 normal, float roughnes blend *= blend; blend = max(0.0, 1.0 - blend); - if (reflections.data[ref_index].params.x > 0.0) { // compute reflection + if (reflections.data[ref_index].intensity > 0.0) { // compute reflection vec3 local_ref_vec = (reflections.data[ref_index].local_matrix * vec4(ref_vec, 0.0)).xyz; - if (reflections.data[ref_index].params.w > 0.5) { //box project + if (reflections.data[ref_index].box_project) { //box project vec3 nrdir = normalize(local_ref_vec); vec3 rbmax = (box_extents - local_pos) / nrdir; @@ -1422,11 +1445,11 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 normal, float roughnes reflection.rgb = textureLod(samplerCubeArray(reflection_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(local_ref_vec, reflections.data[ref_index].index), roughness * MAX_ROUGHNESS_LOD).rgb; - if (reflections.data[ref_index].params.z < 0.5) { + if (reflections.data[ref_index].exterior) { reflection.rgb = mix(specular_light, reflection.rgb, blend); } - reflection.rgb *= reflections.data[ref_index].params.x; + reflection.rgb *= reflections.data[ref_index].intensity; //intensity reflection.a = blend; reflection.rgb *= reflection.a; @@ -1445,7 +1468,7 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 normal, float roughnes ambient_out.rgb = textureLod(samplerCubeArray(reflection_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(local_amb_vec, reflections.data[ref_index].index), MAX_ROUGHNESS_LOD).rgb; ambient_out.a = blend; - if (reflections.data[ref_index].params.z < 0.5) { //interior + if (reflections.data[ref_index].exterior) { ambient_out.rgb = mix(ambient_light, ambient_out.rgb, blend); } @@ -1456,7 +1479,7 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 normal, float roughnes vec4 ambient_out; ambient_out.a = blend; ambient_out.rgb = reflections.data[ref_index].ambient; - if (reflections.data[ref_index].params.z < 0.5) { + if (reflections.data[ref_index].exterior) { ambient_out.rgb = mix(ambient_light, ambient_out.rgb, blend); } ambient_out.rgb *= ambient_out.a; @@ -1759,7 +1782,7 @@ vec4 fog_process(vec3 vertex) { } } - float fog_amount = 1.0 - exp(vertex.z * scene_data.fog_density); + float fog_amount = 1.0 - exp(min(0.0, vertex.z * scene_data.fog_density)); if (abs(scene_data.fog_height_density) > 0.001) { float y = (scene_data.camera_matrix * vec4(vertex, 1.0)).y; @@ -1774,7 +1797,43 @@ vec4 fog_process(vec3 vertex) { return vec4(fog_color, fog_amount); } +void cluster_get_item_range(uint p_offset, out uint item_min, out uint item_max, out uint item_from, out uint item_to) { + uint item_min_max = cluster_buffer.data[p_offset]; + item_min = item_min_max & 0xFFFF; + item_max = item_min_max >> 16; + ; + + item_from = item_min >> 5; + item_to = (item_max == 0) ? 0 : ((item_max - 1) >> 5) + 1; //side effect of how it is stored, as item_max 0 means no elements +} + +uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) { + int local_min = clamp(int(z_min) - int(i) * 32, 0, 31); + int mask_width = min(int(z_max) - int(z_min), 32 - local_min); + return bitfieldInsert(uint(0), uint(0xFFFFFFFF), local_min, mask_width); +} + +float blur_shadow(float shadow) { + return shadow; +#if 0 + //disabling for now, will investigate later + float interp_shadow = shadow; + if (gl_HelperInvocation) { + interp_shadow = -4.0; // technically anything below -4 will do but just to make sure + } + + uvec2 fc2 = uvec2(gl_FragCoord.xy); + interp_shadow -= dFdx(interp_shadow) * (float(fc2.x & 1) - 0.5); + interp_shadow -= dFdy(interp_shadow) * (float(fc2.y & 1) - 0.5); + + if (interp_shadow >= 0.0) { + shadow = interp_shadow; + } + return shadow; #endif +} + +#endif //!MODE_RENDER DEPTH void main() { #ifdef MODE_DUAL_PARABOLOID @@ -1802,9 +1861,7 @@ void main() { float clearcoat_gloss = 0.0; float anisotropy = 0.0; vec2 anisotropy_flow = vec2(1.0, 0.0); -#if defined(CUSTOM_FOG_USED) - vec4 custom_fog = vec4(0.0); -#endif + vec4 fog = vec4(0.0); #if defined(CUSTOM_RADIANCE_USED) vec4 custom_radiance = vec4(0.0); #endif @@ -1812,10 +1869,8 @@ void main() { vec4 custom_irradiance = vec4(0.0); #endif -#if defined(AO_USED) float ao = 1.0; float ao_light_affect = 0.0; -#endif float alpha = 1.0; @@ -1855,7 +1910,7 @@ void main() { vec3 normal_map = vec3(0.5); #endif - float normal_depth = 1.0; + float normal_map_depth = 1.0; vec2 screen_uv = gl_FragCoord.xy * scene_data.screen_pixel_size + scene_data.screen_pixel_size * 0.5; //account for center @@ -1931,7 +1986,7 @@ FRAGMENT_SHADER_CODE normal_map.xy = normal_map.xy * 2.0 - 1.0; normal_map.z = sqrt(max(0.0, 1.0 - dot(normal_map.xy, normal_map.xy))); //always ignore Z, as it can be RG packed, Z may be pos/neg, etc. - normal = normalize(mix(normal, tangent * normal_map.x + binormal * normal_map.y + normal * normal_map.z, normal_depth)); + normal = normalize(mix(normal, tangent * normal_map.x + binormal * normal_map.y + normal * normal_map.z, normal_map_depth)); #endif @@ -1953,77 +2008,147 @@ FRAGMENT_SHADER_CODE discard; } #endif + + /////////////////////// FOG ////////////////////// +#ifndef MODE_RENDER_DEPTH + +#ifndef CUSTOM_FOG_USED + // fog must be processed as early as possible and then packed. + // to maximize VGPR usage + // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky. + + if (scene_data.fog_enabled) { + fog = fog_process(vertex); + } + +#ifndef LOW_END_MODE + if (scene_data.volumetric_fog_enabled) { + vec4 volumetric_fog = volumetric_fog_process(screen_uv, -vertex.z); + if (scene_data.fog_enabled) { + //must use the full blending equation here to blend fogs + vec4 res; + float sa = 1.0 - volumetric_fog.a; + res.a = fog.a * sa + volumetric_fog.a; + if (res.a == 0.0) { + res.rgb = vec3(0.0); + } else { + res.rgb = (fog.rgb * fog.a * sa + volumetric_fog.rgb * volumetric_fog.a) / res.a; + } + fog = res; + } else { + fog = volumetric_fog; + } + } +#endif //!LOW_END_MODE +#endif //!CUSTOM_FOG_USED + + uint fog_rg = packHalf2x16(fog.rg); + uint fog_ba = packHalf2x16(fog.ba); + +#endif //!MODE_RENDER_DEPTH + /////////////////////// DECALS //////////////////////////////// #ifndef MODE_RENDER_DEPTH - uvec4 cluster_cell = texture(usampler3D(cluster_texture, material_samplers[SAMPLER_NEAREST_CLAMP]), vec3(screen_uv, (abs(vertex.z) - scene_data.z_near) / (scene_data.z_far - scene_data.z_near))); + uvec2 cluster_pos = uvec2(gl_FragCoord.xy) >> scene_data.cluster_shift; + uint cluster_offset = (scene_data.cluster_width * cluster_pos.y + cluster_pos.x) * (scene_data.max_cluster_element_count_div_32 + 32); + + uint cluster_z = uint(clamp((-vertex.z / scene_data.z_far) * 32.0, 0.0, 31.0)); + //used for interpolating anything cluster related vec3 vertex_ddx = dFdx(vertex); vec3 vertex_ddy = dFdy(vertex); { // process decals - uint decal_count = cluster_cell.w >> CLUSTER_COUNTER_SHIFT; - uint decal_pointer = cluster_cell.w & CLUSTER_POINTER_MASK; + uint cluster_decal_offset = cluster_offset + scene_data.cluster_type_size * 2; - //do outside for performance and avoiding arctifacts + uint item_min; + uint item_max; + uint item_from; + uint item_to; - for (uint i = 0; i < decal_count; i++) { - uint decal_index = cluster_data.indices[decal_pointer + i]; - if (!bool(decals.data[decal_index].mask & instances.data[instance_index].layer_mask)) { - continue; //not masked - } + cluster_get_item_range(cluster_decal_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); - vec3 uv_local = (decals.data[decal_index].xform * vec4(vertex, 1.0)).xyz; - if (any(lessThan(uv_local, vec3(0.0, -1.0, 0.0))) || any(greaterThan(uv_local, vec3(1.0)))) { - continue; //out of decal - } +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif - //we need ddx/ddy for mipmaps, so simulate them - vec2 ddx = (decals.data[decal_index].xform * vec4(vertex_ddx, 0.0)).xz; - vec2 ddy = (decals.data[decal_index].xform * vec4(vertex_ddy, 0.0)).xz; + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_decal_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif - float fade = pow(1.0 - (uv_local.y > 0.0 ? uv_local.y : -uv_local.y), uv_local.y > 0.0 ? decals.data[decal_index].upper_fade : decals.data[decal_index].lower_fade); + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + uint decal_index = 32 * i + bit; - if (decals.data[decal_index].normal_fade > 0.0) { - fade *= smoothstep(decals.data[decal_index].normal_fade, 1.0, dot(normal_interp, decals.data[decal_index].normal) * 0.5 + 0.5); - } + if (!bool(decals.data[decal_index].mask & instances.data[instance_index].layer_mask)) { + continue; //not masked + } - if (decals.data[decal_index].albedo_rect != vec4(0.0)) { - //has albedo - vec4 decal_albedo = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].albedo_rect.zw + decals.data[decal_index].albedo_rect.xy, ddx * decals.data[decal_index].albedo_rect.zw, ddy * decals.data[decal_index].albedo_rect.zw); - decal_albedo *= decals.data[decal_index].modulate; - decal_albedo.a *= fade; - albedo = mix(albedo, decal_albedo.rgb, decal_albedo.a * decals.data[decal_index].albedo_mix); - - if (decals.data[decal_index].normal_rect != vec4(0.0)) { - vec3 decal_normal = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].normal_rect.zw + decals.data[decal_index].normal_rect.xy, ddx * decals.data[decal_index].normal_rect.zw, ddy * decals.data[decal_index].normal_rect.zw).xyz; - decal_normal.xy = decal_normal.xy * vec2(2.0, -2.0) - vec2(1.0, -1.0); //users prefer flipped y normal maps in most authoring software - decal_normal.z = sqrt(max(0.0, 1.0 - dot(decal_normal.xy, decal_normal.xy))); - //convert to view space, use xzy because y is up - decal_normal = (decals.data[decal_index].normal_xform * decal_normal.xzy).xyz; - - normal = normalize(mix(normal, decal_normal, decal_albedo.a)); + vec3 uv_local = (decals.data[decal_index].xform * vec4(vertex, 1.0)).xyz; + if (any(lessThan(uv_local, vec3(0.0, -1.0, 0.0))) || any(greaterThan(uv_local, vec3(1.0)))) { + continue; //out of decal } - if (decals.data[decal_index].orm_rect != vec4(0.0)) { - vec3 decal_orm = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].orm_rect.zw + decals.data[decal_index].orm_rect.xy, ddx * decals.data[decal_index].orm_rect.zw, ddy * decals.data[decal_index].orm_rect.zw).xyz; -#if defined(AO_USED) - ao = mix(ao, decal_orm.r, decal_albedo.a); -#endif - roughness = mix(roughness, decal_orm.g, decal_albedo.a); - metallic = mix(metallic, decal_orm.b, decal_albedo.a); + //we need ddx/ddy for mipmaps, so simulate them + vec2 ddx = (decals.data[decal_index].xform * vec4(vertex_ddx, 0.0)).xz; + vec2 ddy = (decals.data[decal_index].xform * vec4(vertex_ddy, 0.0)).xz; + + float fade = pow(1.0 - (uv_local.y > 0.0 ? uv_local.y : -uv_local.y), uv_local.y > 0.0 ? decals.data[decal_index].upper_fade : decals.data[decal_index].lower_fade); + + if (decals.data[decal_index].normal_fade > 0.0) { + fade *= smoothstep(decals.data[decal_index].normal_fade, 1.0, dot(normal_interp, decals.data[decal_index].normal) * 0.5 + 0.5); } - } - if (decals.data[decal_index].emission_rect != vec4(0.0)) { - //emission is additive, so its independent from albedo - emission += textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].emission_energy * fade; + if (decals.data[decal_index].albedo_rect != vec4(0.0)) { + //has albedo + vec4 decal_albedo = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].albedo_rect.zw + decals.data[decal_index].albedo_rect.xy, ddx * decals.data[decal_index].albedo_rect.zw, ddy * decals.data[decal_index].albedo_rect.zw); + decal_albedo *= decals.data[decal_index].modulate; + decal_albedo.a *= fade; + albedo = mix(albedo, decal_albedo.rgb, decal_albedo.a * decals.data[decal_index].albedo_mix); + + if (decals.data[decal_index].normal_rect != vec4(0.0)) { + vec3 decal_normal = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].normal_rect.zw + decals.data[decal_index].normal_rect.xy, ddx * decals.data[decal_index].normal_rect.zw, ddy * decals.data[decal_index].normal_rect.zw).xyz; + decal_normal.xy = decal_normal.xy * vec2(2.0, -2.0) - vec2(1.0, -1.0); //users prefer flipped y normal maps in most authoring software + decal_normal.z = sqrt(max(0.0, 1.0 - dot(decal_normal.xy, decal_normal.xy))); + //convert to view space, use xzy because y is up + decal_normal = (decals.data[decal_index].normal_xform * decal_normal.xzy).xyz; + + normal = normalize(mix(normal, decal_normal, decal_albedo.a)); + } + + if (decals.data[decal_index].orm_rect != vec4(0.0)) { + vec3 decal_orm = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].orm_rect.zw + decals.data[decal_index].orm_rect.xy, ddx * decals.data[decal_index].orm_rect.zw, ddy * decals.data[decal_index].orm_rect.zw).xyz; + ao = mix(ao, decal_orm.r, decal_albedo.a); + roughness = mix(roughness, decal_orm.g, decal_albedo.a); + metallic = mix(metallic, decal_orm.b, decal_albedo.a); + } + } + + if (decals.data[decal_index].emission_rect != vec4(0.0)) { + //emission is additive, so its independent from albedo + emission += textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].emission_energy * fade; + } } } } + //pack albedo until needed again, saves 2 VGPRs in the meantime + #endif //not render depth /////////////////////// LIGHTING ////////////////////////////// @@ -2091,12 +2216,7 @@ FRAGMENT_SHADER_CODE //radiance - float specular_blob_intensity = 1.0; - -#if defined(SPECULAR_TOON) - specular_blob_intensity *= specular * 2.0; -#endif - +/// GI /// #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) #ifdef USE_LIGHTMAP @@ -2124,10 +2244,10 @@ FRAGMENT_SHADER_CODE } else if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap bool uses_sh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP); - uint ofs = instances.data[instance_index].gi_offset & 0xFFF; + uint ofs = instances.data[instance_index].gi_offset & 0xFFFF; vec3 uvw; uvw.xy = uv2 * instances.data[instance_index].lightmap_uv_scale.zw + instances.data[instance_index].lightmap_uv_scale.xy; - uvw.z = float((instances.data[instance_index].gi_offset >> 12) & 0xFF); + uvw.z = float((instances.data[instance_index].gi_offset >> 16) & 0xFFFF); if (uses_sh) { uvw.z *= 4.0; //SH textures use 4 times more data @@ -2263,17 +2383,17 @@ FRAGMENT_SHADER_CODE if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers - ivec2 coord; + vec2 coord; if (scene_data.gi_upscale_for_msaa) { - ivec2 base_coord = ivec2(gl_FragCoord.xy); - ivec2 closest_coord = base_coord; - float closest_ang = dot(normal, texelFetch(sampler2D(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), base_coord, 0).xyz * 2.0 - 1.0); + vec2 base_coord = screen_uv; + vec2 closest_coord = base_coord; + float closest_ang = dot(normal, textureLod(sampler2D(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), base_coord, 0.0).xyz * 2.0 - 1.0); for (int i = 0; i < 4; i++) { - const ivec2 neighbours[4] = ivec2[](ivec2(-1, 0), ivec2(1, 0), ivec2(0, -1), ivec2(0, 1)); - ivec2 neighbour_coord = base_coord + neighbours[i]; - float neighbour_ang = dot(normal, texelFetch(sampler2D(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), neighbour_coord, 0).xyz * 2.0 - 1.0); + const vec2 neighbours[4] = vec2[](vec2(-1, 0), vec2(1, 0), vec2(0, -1), vec2(0, 1)); + vec2 neighbour_coord = base_coord + neighbours[i] * scene_data.screen_pixel_size; + float neighbour_ang = dot(normal, textureLod(sampler2D(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), neighbour_coord, 0.0).xyz * 2.0 - 1.0); if (neighbour_ang > closest_ang) { closest_ang = neighbour_ang; closest_coord = neighbour_coord; @@ -2283,28 +2403,69 @@ FRAGMENT_SHADER_CODE coord = closest_coord; } else { - coord = ivec2(gl_FragCoord.xy); + coord = screen_uv; } - vec4 buffer_ambient = texelFetch(sampler2D(ambient_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), coord, 0); - vec4 buffer_reflection = texelFetch(sampler2D(reflection_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), coord, 0); + vec4 buffer_ambient = textureLod(sampler2D(ambient_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), coord, 0.0); + vec4 buffer_reflection = textureLod(sampler2D(reflection_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), coord, 0.0); ambient_light = mix(ambient_light, buffer_ambient.rgb, buffer_ambient.a); specular_light = mix(specular_light, buffer_reflection.rgb, buffer_reflection.a); } #endif +#ifndef LOW_END_MODE + if (scene_data.ssao_enabled) { + float ssao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), screen_uv).r; + ao = min(ao, ssao); + ao_light_affect = mix(ao_light_affect, max(ao_light_affect, scene_data.ssao_light_affect), scene_data.ssao_ao_affect); + } +#endif //LOW_END_MODE + { // process reflections vec4 reflection_accum = vec4(0.0, 0.0, 0.0, 0.0); vec4 ambient_accum = vec4(0.0, 0.0, 0.0, 0.0); - uint reflection_probe_count = cluster_cell.z >> CLUSTER_COUNTER_SHIFT; - uint reflection_probe_pointer = cluster_cell.z & CLUSTER_POINTER_MASK; + uint cluster_reflection_offset = cluster_offset + scene_data.cluster_type_size * 3; + + uint item_min; + uint item_max; + uint item_from; + uint item_to; - for (uint i = 0; i < reflection_probe_count; i++) { - uint ref_index = cluster_data.indices[reflection_probe_pointer + i]; - reflection_process(ref_index, vertex, normal, roughness, ambient_light, specular_light, ambient_accum, reflection_accum); + cluster_get_item_range(cluster_reflection_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); + +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif + + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_reflection_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif + + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + uint reflection_index = 32 * i + bit; + + if (!bool(reflections.data[reflection_index].mask & instances.data[instance_index].layer_mask)) { + continue; //not masked + } + + reflection_process(reflection_index, vertex, normal, roughness, ambient_light, specular_light, ambient_accum, reflection_accum); + } } if (reflection_accum.a > 0.0) { @@ -2318,6 +2479,16 @@ FRAGMENT_SHADER_CODE #endif } + //finalize ambient light here + ambient_light *= albedo.rgb; + ambient_light *= ao; + + // convert ao to direct light ao + ao = mix(1.0, ao, ao_light_affect); + + //this saves some VGPRs + vec3 f0 = F0(metallic, specular, albedo); + { #if defined(DIFFUSE_TOON) //simplify for toon, as @@ -2335,24 +2506,39 @@ FRAGMENT_SHADER_CODE float a004 = min(r.x * r.x, exp2(-9.28 * ndotv)) * r.x + r.y; vec2 env = vec2(-1.04, 1.04) * a004 + r.zw; - vec3 f0 = F0(metallic, specular, albedo); specular_light *= env.x * f0 + env.y; #endif } +#endif //GI !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) + +#if !defined(MODE_RENDER_DEPTH) + //this saves some VGPRs + uint orms = packUnorm4x8(vec4(ao, roughness, metallic, specular)); +#endif + +// LIGHTING +#if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) + { //directional light - for (uint i = 0; i < scene_data.directional_light_count; i++) { + // Do shadow and lighting in two passes to reduce register pressure + uint shadow0 = 0; + uint shadow1 = 0; + + for (uint i = 0; i < 8; i++) { + if (i >= scene_data.directional_light_count) { + break; + } + if (!bool(directional_lights.data[i].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } - vec3 shadow_attenuation = vec3(1.0); - -#ifdef LIGHT_TRANSMITTANCE_USED - float transmittance_z = transmittance_depth; -#endif + float shadow = 1.0; +#ifdef USE_SOFT_SHADOWS + //version with soft shadows, more expensive if (directional_lights.data[i].shadow_enabled) { float depth_z = -vertex.z; @@ -2366,8 +2552,6 @@ FRAGMENT_SHADER_CODE normal_bias -= light_dir * dot(light_dir, normal_bias); \ m_var.xyz += normal_bias; - float shadow = 0.0; - if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { vec4 v = vec4(vertex, 1.0); @@ -2388,19 +2572,6 @@ FRAGMENT_SHADER_CODE shadow_color = directional_lights.data[i].shadow_color1.rgb; -#ifdef LIGHT_TRANSMITTANCE_USED - { - vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.x, 1.0); - vec4 trans_coord = directional_lights.data[i].shadow_matrix1 * trans_vertex; - trans_coord /= trans_coord.w; - - float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; - shadow_z *= directional_lights.data[i].shadow_z_range.x; - float z = trans_coord.z * directional_lights.data[i].shadow_z_range.x; - - transmittance_z = z - shadow_z; - } -#endif } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { vec4 v = vec4(vertex, 1.0); @@ -2420,19 +2591,6 @@ FRAGMENT_SHADER_CODE } shadow_color = directional_lights.data[i].shadow_color2.rgb; -#ifdef LIGHT_TRANSMITTANCE_USED - { - vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.y, 1.0); - vec4 trans_coord = directional_lights.data[i].shadow_matrix2 * trans_vertex; - trans_coord /= trans_coord.w; - - float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; - shadow_z *= directional_lights.data[i].shadow_z_range.y; - float z = trans_coord.z * directional_lights.data[i].shadow_z_range.y; - - transmittance_z = z - shadow_z; - } -#endif } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { vec4 v = vec4(vertex, 1.0); @@ -2452,19 +2610,6 @@ FRAGMENT_SHADER_CODE } shadow_color = directional_lights.data[i].shadow_color3.rgb; -#ifdef LIGHT_TRANSMITTANCE_USED - { - vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.z, 1.0); - vec4 trans_coord = directional_lights.data[i].shadow_matrix3 * trans_vertex; - trans_coord /= trans_coord.w; - - float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; - shadow_z *= directional_lights.data[i].shadow_z_range.z; - float z = trans_coord.z * directional_lights.data[i].shadow_z_range.z; - - transmittance_z = z - shadow_z; - } -#endif } else { vec4 v = vec4(vertex, 1.0); @@ -2485,20 +2630,6 @@ FRAGMENT_SHADER_CODE } shadow_color = directional_lights.data[i].shadow_color4.rgb; - -#ifdef LIGHT_TRANSMITTANCE_USED - { - vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.w, 1.0); - vec4 trans_coord = directional_lights.data[i].shadow_matrix4 * trans_vertex; - trans_coord /= trans_coord.w; - - float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; - shadow_z *= directional_lights.data[i].shadow_z_range.w; - float z = trans_coord.z * directional_lights.data[i].shadow_z_range.w; - - transmittance_z = z - shadow_z; - } -#endif } if (directional_lights.data[i].blend_splits) { @@ -2572,130 +2703,407 @@ FRAGMENT_SHADER_CODE shadow = mix(shadow, 1.0, smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, vertex.z)); //done with negative values for performance - shadow_attenuation = mix(shadow_color, vec3(1.0), shadow); +#undef BIAS_FUNC + } +#else + // Soft shadow disabled version + + if (directional_lights.data[i].shadow_enabled) { + float depth_z = -vertex.z; + + vec4 pssm_coord; + vec3 light_dir = directional_lights.data[i].direction; + vec3 base_normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(light_dir, -normalize(normal_interp)))); + +#define BIAS_FUNC(m_var, m_idx) \ + m_var.xyz += light_dir * directional_lights.data[i].shadow_bias[m_idx]; \ + vec3 normal_bias = base_normal_bias * directional_lights.data[i].shadow_normal_bias[m_idx]; \ + normal_bias -= light_dir * dot(light_dir, normal_bias); \ + m_var.xyz += normal_bias; + + if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { + vec4 v = vec4(vertex, 1.0); + + BIAS_FUNC(v, 0) + + pssm_coord = (directional_lights.data[i].shadow_matrix1 * v); +#ifdef LIGHT_TRANSMITTANCE_USED + { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.x, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix1 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.x; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.x; + + transmittance_z = z - shadow_z; + } +#endif + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { + vec4 v = vec4(vertex, 1.0); + + BIAS_FUNC(v, 1) + + pssm_coord = (directional_lights.data[i].shadow_matrix2 * v); +#ifdef LIGHT_TRANSMITTANCE_USED + { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.y, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix2 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.y; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.y; + + transmittance_z = z - shadow_z; + } +#endif + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { + vec4 v = vec4(vertex, 1.0); + + BIAS_FUNC(v, 2) + + pssm_coord = (directional_lights.data[i].shadow_matrix3 * v); +#ifdef LIGHT_TRANSMITTANCE_USED + { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.z, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix3 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.z; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.z; + + transmittance_z = z - shadow_z; + } +#endif + + } else { + vec4 v = vec4(vertex, 1.0); + + BIAS_FUNC(v, 3) + + pssm_coord = (directional_lights.data[i].shadow_matrix4 * v); +#ifdef LIGHT_TRANSMITTANCE_USED + { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.w, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix4 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.w; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.w; + + transmittance_z = z - shadow_z; + } +#endif + } + + pssm_coord /= pssm_coord.w; + + shadow = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); + + if (directional_lights.data[i].blend_splits) { + float pssm_blend; + + if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { + vec4 v = vec4(vertex, 1.0); + BIAS_FUNC(v, 1) + pssm_coord = (directional_lights.data[i].shadow_matrix2 * v); + pssm_blend = smoothstep(0.0, directional_lights.data[i].shadow_split_offsets.x, depth_z); + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { + vec4 v = vec4(vertex, 1.0); + BIAS_FUNC(v, 2) + pssm_coord = (directional_lights.data[i].shadow_matrix3 * v); + pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.x, directional_lights.data[i].shadow_split_offsets.y, depth_z); + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { + vec4 v = vec4(vertex, 1.0); + BIAS_FUNC(v, 3) + pssm_coord = (directional_lights.data[i].shadow_matrix4 * v); + pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.y, directional_lights.data[i].shadow_split_offsets.z, depth_z); + } else { + pssm_blend = 0.0; //if no blend, same coord will be used (divide by z will result in same value, and already cached) + } + + pssm_coord /= pssm_coord.w; + + float shadow2 = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); + shadow = mix(shadow, shadow2, pssm_blend); + } + + shadow = mix(shadow, 1.0, smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, vertex.z)); //done with negative values for performance #undef BIAS_FUNC } +#endif + + if (i < 4) { + shadow0 |= uint(clamp(shadow * 255.0, 0.0, 255.0)) << (i * 8); + } else { + shadow1 |= uint(clamp(shadow * 255.0, 0.0, 255.0)) << ((i - 4) * 8); + } + } + + for (uint i = 0; i < 8; i++) { + if (i >= scene_data.directional_light_count) { + break; + } + + if (!bool(directional_lights.data[i].mask & instances.data[instance_index].layer_mask)) { + continue; //not masked + } + +#ifdef LIGHT_TRANSMITTANCE_USED + float transmittance_z = transmittance_depth; + + if (directional_lights.data[i].shadow_enabled) { + float depth_z = -vertex.z; + + if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.x, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix1 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.x; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.x; + + transmittance_z = z - shadow_z; + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.y, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix2 * trans_vertex; + trans_coord /= trans_coord.w; - light_compute(normal, directional_lights.data[i].direction, normalize(view), directional_lights.data[i].size, directional_lights.data[i].color * directional_lights.data[i].energy, 1.0, shadow_attenuation, albedo, roughness, metallic, specular, directional_lights.data[i].specular * specular_blob_intensity, + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.y; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.y; + + transmittance_z = z - shadow_z; + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.z, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix3 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.z; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.z; + + transmittance_z = z - shadow_z; + + } else { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.w, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix4 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.w; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.w; + + transmittance_z = z - shadow_z; + } +#endif + + float shadow = 1.0; + + if (i < 4) { + shadow = float(shadow0 >> (i * 8) & 0xFF) / 255.0; + } else { + shadow = float(shadow1 >> ((i - 4) * 8) & 0xFF) / 255.0; + } + + blur_shadow(shadow); + + light_compute(normal, directional_lights.data[i].direction, normalize(view), directional_lights.data[i].color * directional_lights.data[i].energy, shadow, f0, orms, 1.0, #ifdef LIGHT_BACKLIGHT_USED - backlight, + backlight, #endif #ifdef LIGHT_TRANSMITTANCE_USED - transmittance_color, - transmittance_depth, - transmittance_curve, - transmittance_boost, - transmittance_z, + transmittance_color, + transmittance_depth, + transmittance_curve, + transmittance_boost, + transmittance_z, #endif #ifdef LIGHT_RIM_USED - rim, rim_tint, + rim, rim_tint, albedo, #endif #ifdef LIGHT_CLEARCOAT_USED - clearcoat, clearcoat_gloss, + clearcoat, clearcoat_gloss, #endif #ifdef LIGHT_ANISOTROPY_USED - binormal, tangent, anisotropy, + binormal, tangent, anisotropy, +#endif +#ifdef USE_SOFT_SHADOW + directional_lights.data[i].size, #endif #ifdef USE_SHADOW_TO_OPACITY - alpha, + alpha, #endif - diffuse_light, - specular_light); + diffuse_light, + specular_light); + } } - } - { //omni lights + { //omni lights - uint omni_light_count = cluster_cell.x >> CLUSTER_COUNTER_SHIFT; - uint omni_light_pointer = cluster_cell.x & CLUSTER_POINTER_MASK; + uint cluster_omni_offset = cluster_offset; - for (uint i = 0; i < omni_light_count; i++) { - uint light_index = cluster_data.indices[omni_light_pointer + i]; + uint item_min; + uint item_max; + uint item_from; + uint item_to; - if (!bool(lights.data[light_index].mask & instances.data[instance_index].layer_mask)) { - continue; //not masked - } + cluster_get_item_range(cluster_omni_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); + +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif + + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_omni_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif - light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, albedo, roughness, metallic, specular, specular_blob_intensity, + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + uint light_index = 32 * i + bit; + + if (!bool(omni_lights.data[light_index].mask & instances.data[instance_index].layer_mask)) { + continue; //not masked + } + + float shadow = light_process_omni_shadow(light_index, vertex, view); + + shadow = blur_shadow(shadow); + + light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow, #ifdef LIGHT_BACKLIGHT_USED - backlight, + backlight, #endif #ifdef LIGHT_TRANSMITTANCE_USED - transmittance_color, - transmittance_depth, - transmittance_curve, - transmittance_boost, + transmittance_color, + transmittance_depth, + transmittance_curve, + transmittance_boost, #endif #ifdef LIGHT_RIM_USED - rim, - rim_tint, + rim, + rim_tint, + albedo, #endif #ifdef LIGHT_CLEARCOAT_USED - clearcoat, clearcoat_gloss, + clearcoat, clearcoat_gloss, #endif #ifdef LIGHT_ANISOTROPY_USED - tangent, binormal, anisotropy, + tangent, binormal, anisotropy, #endif #ifdef USE_SHADOW_TO_OPACITY - alpha, + alpha, #endif - diffuse_light, specular_light); + diffuse_light, specular_light); + } + } } - } - { //spot lights - uint spot_light_count = cluster_cell.y >> CLUSTER_COUNTER_SHIFT; - uint spot_light_pointer = cluster_cell.y & CLUSTER_POINTER_MASK; + { //spot lights - for (uint i = 0; i < spot_light_count; i++) { - uint light_index = cluster_data.indices[spot_light_pointer + i]; + uint cluster_spot_offset = cluster_offset + scene_data.cluster_type_size; - if (!bool(lights.data[light_index].mask & instances.data[instance_index].layer_mask)) { - continue; //not masked - } + uint item_min; + uint item_max; + uint item_from; + uint item_to; + + cluster_get_item_range(cluster_spot_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); - light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, albedo, roughness, metallic, specular, specular_blob_intensity, +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif + + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_spot_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif + + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + + uint light_index = 32 * i + bit; + + if (!bool(spot_lights.data[light_index].mask & instances.data[instance_index].layer_mask)) { + continue; //not masked + } + + float shadow = light_process_spot_shadow(light_index, vertex, view); + + shadow = blur_shadow(shadow); + + light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow, #ifdef LIGHT_BACKLIGHT_USED - backlight, + backlight, #endif #ifdef LIGHT_TRANSMITTANCE_USED - transmittance_color, - transmittance_depth, - transmittance_curve, - transmittance_boost, + transmittance_color, + transmittance_depth, + transmittance_curve, + transmittance_boost, #endif #ifdef LIGHT_RIM_USED - rim, - rim_tint, + rim, + rim_tint, + albedo, #endif #ifdef LIGHT_CLEARCOAT_USED - clearcoat, clearcoat_gloss, + clearcoat, clearcoat_gloss, #endif #ifdef LIGHT_ANISOTROPY_USED - tangent, binormal, anisotropy, + tangent, binormal, anisotropy, #endif #ifdef USE_SHADOW_TO_OPACITY - alpha, + alpha, #endif - diffuse_light, specular_light); + diffuse_light, specular_light); + } + } } - } #ifdef USE_SHADOW_TO_OPACITY - alpha = min(alpha, clamp(length(ambient_light), 0.0, 1.0)); + alpha = min(alpha, clamp(length(ambient_light), 0.0, 1.0)); #if defined(ALPHA_SCISSOR_USED) - if (alpha < alpha_scissor) { - discard; - } + if (alpha < alpha_scissor) { + discard; + } #endif // ALPHA_SCISSOR_USED #ifdef USE_OPAQUE_PREPASS - if (alpha < opaque_prepass_threshold) { - discard; - } + if (alpha < opaque_prepass_threshold) { + discard; + } #endif // USE_OPAQUE_PREPASS @@ -2707,173 +3115,149 @@ FRAGMENT_SHADER_CODE #ifdef MODE_RENDER_SDF - { - vec3 local_pos = (scene_data.sdf_to_bounds * vec4(vertex, 1.0)).xyz; - ivec3 grid_pos = scene_data.sdf_offset + ivec3(local_pos * vec3(scene_data.sdf_size)); - - uint albedo16 = 0x1; //solid flag - albedo16 |= clamp(uint(albedo.r * 31.0), 0, 31) << 11; - albedo16 |= clamp(uint(albedo.g * 31.0), 0, 31) << 6; - albedo16 |= clamp(uint(albedo.b * 31.0), 0, 31) << 1; - - imageStore(albedo_volume_grid, grid_pos, uvec4(albedo16)); - - uint facing_bits = 0; - const vec3 aniso_dir[6] = vec3[]( - vec3(1, 0, 0), - vec3(0, 1, 0), - vec3(0, 0, 1), - vec3(-1, 0, 0), - vec3(0, -1, 0), - vec3(0, 0, -1)); - - vec3 cam_normal = mat3(scene_data.camera_matrix) * normalize(normal_interp); - - float closest_dist = -1e20; - - for (uint i = 0; i < 6; i++) { - float d = dot(cam_normal, aniso_dir[i]); - if (d > closest_dist) { - closest_dist = d; - facing_bits = (1 << i); + { + vec3 local_pos = (scene_data.sdf_to_bounds * vec4(vertex, 1.0)).xyz; + ivec3 grid_pos = scene_data.sdf_offset + ivec3(local_pos * vec3(scene_data.sdf_size)); + + uint albedo16 = 0x1; //solid flag + albedo16 |= clamp(uint(albedo.r * 31.0), 0, 31) << 11; + albedo16 |= clamp(uint(albedo.g * 31.0), 0, 31) << 6; + albedo16 |= clamp(uint(albedo.b * 31.0), 0, 31) << 1; + + imageStore(albedo_volume_grid, grid_pos, uvec4(albedo16)); + + uint facing_bits = 0; + const vec3 aniso_dir[6] = vec3[]( + vec3(1, 0, 0), + vec3(0, 1, 0), + vec3(0, 0, 1), + vec3(-1, 0, 0), + vec3(0, -1, 0), + vec3(0, 0, -1)); + + vec3 cam_normal = mat3(scene_data.camera_matrix) * normalize(normal_interp); + + float closest_dist = -1e20; + + for (uint i = 0; i < 6; i++) { + float d = dot(cam_normal, aniso_dir[i]); + if (d > closest_dist) { + closest_dist = d; + facing_bits = (1 << i); + } } - } - imageAtomicOr(geom_facing_grid, grid_pos, facing_bits); //store facing bits + imageAtomicOr(geom_facing_grid, grid_pos, facing_bits); //store facing bits - if (length(emission) > 0.001) { - float lumas[6]; - vec3 light_total = vec3(0); + if (length(emission) > 0.001) { + float lumas[6]; + vec3 light_total = vec3(0); - for (int i = 0; i < 6; i++) { - float strength = max(0.0, dot(cam_normal, aniso_dir[i])); - vec3 light = emission * strength; - light_total += light; - lumas[i] = max(light.r, max(light.g, light.b)); - } + for (int i = 0; i < 6; i++) { + float strength = max(0.0, dot(cam_normal, aniso_dir[i])); + vec3 light = emission * strength; + light_total += light; + lumas[i] = max(light.r, max(light.g, light.b)); + } - float luma_total = max(light_total.r, max(light_total.g, light_total.b)); + float luma_total = max(light_total.r, max(light_total.g, light_total.b)); - uint light_aniso = 0; + uint light_aniso = 0; - for (int i = 0; i < 6; i++) { - light_aniso |= min(31, uint((lumas[i] / luma_total) * 31.0)) << (i * 5); - } + for (int i = 0; i < 6; i++) { + light_aniso |= min(31, uint((lumas[i] / luma_total) * 31.0)) << (i * 5); + } - //compress to RGBE9995 to save space + //compress to RGBE9995 to save space - const float pow2to9 = 512.0f; - const float B = 15.0f; - const float N = 9.0f; - const float LN2 = 0.6931471805599453094172321215; + const float pow2to9 = 512.0f; + const float B = 15.0f; + const float N = 9.0f; + const float LN2 = 0.6931471805599453094172321215; - float cRed = clamp(light_total.r, 0.0, 65408.0); - float cGreen = clamp(light_total.g, 0.0, 65408.0); - float cBlue = clamp(light_total.b, 0.0, 65408.0); + float cRed = clamp(light_total.r, 0.0, 65408.0); + float cGreen = clamp(light_total.g, 0.0, 65408.0); + float cBlue = clamp(light_total.b, 0.0, 65408.0); - float cMax = max(cRed, max(cGreen, cBlue)); + float cMax = max(cRed, max(cGreen, cBlue)); - float expp = max(-B - 1.0f, floor(log(cMax) / LN2)) + 1.0f + B; + float expp = max(-B - 1.0f, floor(log(cMax) / LN2)) + 1.0f + B; - float sMax = floor((cMax / pow(2.0f, expp - B - N)) + 0.5f); + float sMax = floor((cMax / pow(2.0f, expp - B - N)) + 0.5f); - float exps = expp + 1.0f; + float exps = expp + 1.0f; - if (0.0 <= sMax && sMax < pow2to9) { - exps = expp; - } + if (0.0 <= sMax && sMax < pow2to9) { + exps = expp; + } - float sRed = floor((cRed / pow(2.0f, exps - B - N)) + 0.5f); - float sGreen = floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f); - float sBlue = floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f); - //store as 8985 to have 2 extra neighbour bits - uint light_rgbe = ((uint(sRed) & 0x1FF) >> 1) | ((uint(sGreen) & 0x1FF) << 8) | (((uint(sBlue) & 0x1FF) >> 1) << 17) | ((uint(exps) & 0x1F) << 25); + float sRed = floor((cRed / pow(2.0f, exps - B - N)) + 0.5f); + float sGreen = floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f); + float sBlue = floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f); + //store as 8985 to have 2 extra neighbour bits + uint light_rgbe = ((uint(sRed) & 0x1FF) >> 1) | ((uint(sGreen) & 0x1FF) << 8) | (((uint(sBlue) & 0x1FF) >> 1) << 17) | ((uint(exps) & 0x1F) << 25); - imageStore(emission_grid, grid_pos, uvec4(light_rgbe)); - imageStore(emission_aniso_grid, grid_pos, uvec4(light_aniso)); + imageStore(emission_grid, grid_pos, uvec4(light_rgbe)); + imageStore(emission_aniso_grid, grid_pos, uvec4(light_aniso)); + } } - } #endif #ifdef MODE_RENDER_MATERIAL - albedo_output_buffer.rgb = albedo; - albedo_output_buffer.a = alpha; + albedo_output_buffer.rgb = albedo; + albedo_output_buffer.a = alpha; - normal_output_buffer.rgb = normal * 0.5 + 0.5; - normal_output_buffer.a = 0.0; - depth_output_buffer.r = -vertex.z; + normal_output_buffer.rgb = normal * 0.5 + 0.5; + normal_output_buffer.a = 0.0; + depth_output_buffer.r = -vertex.z; -#if defined(AO_USED) - orm_output_buffer.r = ao; -#else - orm_output_buffer.r = 0.0; -#endif - orm_output_buffer.g = roughness; - orm_output_buffer.b = metallic; - orm_output_buffer.a = sss_strength; + orm_output_buffer.r = ao; + orm_output_buffer.g = roughness; + orm_output_buffer.b = metallic; + orm_output_buffer.a = sss_strength; - emission_output_buffer.rgb = emission; - emission_output_buffer.a = 0.0; + emission_output_buffer.rgb = emission; + emission_output_buffer.a = 0.0; #endif #ifdef MODE_RENDER_NORMAL_ROUGHNESS - normal_roughness_output_buffer = vec4(normal * 0.5 + 0.5, roughness); + normal_roughness_output_buffer = vec4(normal * 0.5 + 0.5, roughness); #ifdef MODE_RENDER_GIPROBE - if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes - uint index1 = instances.data[instance_index].gi_offset & 0xFFFF; - uint index2 = instances.data[instance_index].gi_offset >> 16; - giprobe_buffer.x = index1 & 0xFF; - giprobe_buffer.y = index2 & 0xFF; - } else { - giprobe_buffer.x = 0xFF; - giprobe_buffer.y = 0xFF; - } + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes + uint index1 = instances.data[instance_index].gi_offset & 0xFFFF; + uint index2 = instances.data[instance_index].gi_offset >> 16; + giprobe_buffer.x = index1 & 0xFF; + giprobe_buffer.y = index2 & 0xFF; + } else { + giprobe_buffer.x = 0xFF; + giprobe_buffer.y = 0xFF; + } #endif -#endif //MODE_RENDER_NORMAL +#endif //MODE_RENDER_NORMAL_ROUGHNESS //nothing happens, so a tree-ssa optimizer will result in no fragment shader :) #else - specular_light *= scene_data.reflection_multiplier; - ambient_light *= albedo; //ambient must be multiplied by albedo at the end - -//ambient occlusion -#if defined(AO_USED) - -#ifndef LOW_END_MODE - if (scene_data.ssao_enabled && scene_data.ssao_ao_affect > 0.0) { - float ssao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), screen_uv).r; - ao = mix(ao, min(ao, ssao), scene_data.ssao_ao_affect); - ao_light_affect = mix(ao_light_affect, max(ao_light_affect, scene_data.ssao_light_affect), scene_data.ssao_ao_affect); - } -#endif //LOW_END_MODE - - ambient_light = mix(scene_data.ao_color.rgb, ambient_light, ao); - ao_light_affect = mix(1.0, ao, ao_light_affect); - specular_light = mix(scene_data.ao_color.rgb, specular_light, ao_light_affect); - diffuse_light = mix(scene_data.ao_color.rgb, diffuse_light, ao_light_affect); -#else - -#ifndef LOW_END_MODE - if (scene_data.ssao_enabled) { - float ao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), screen_uv).r; - ambient_light = mix(scene_data.ao_color.rgb, ambient_light, ao); - float ao_light_affect = mix(1.0, ao, scene_data.ssao_light_affect); - specular_light = mix(scene_data.ao_color.rgb, specular_light, ao_light_affect); - diffuse_light = mix(scene_data.ao_color.rgb, diffuse_light, ao_light_affect); - } -#endif //LOW_END_MODE + // multiply by albedo + diffuse_light *= albedo; // ambient must be multiplied by albedo at the end -#endif // AO_USED + // apply direct light AO + ao = unpackUnorm4x8(orms).x; + specular_light *= ao; + diffuse_light *= ao; - // base color remapping - diffuse_light *= 1.0 - metallic; // TODO: avoid all diffuse and ambient light calculations when metallic == 1 up to this point + // apply metallic + metallic = unpackUnorm4x8(orms).z; + diffuse_light *= 1.0 - metallic; ambient_light *= 1.0 - metallic; + //restore fog + fog = vec4(unpackHalf2x16(fog_rg), unpackHalf2x16(fog_ba)); + #ifdef MODE_MULTIPLE_RENDER_TARGETS #ifdef MODE_UNSHADED @@ -2889,25 +3273,8 @@ FRAGMENT_SHADER_CODE specular_buffer = vec4(specular_light, metallic); #endif - // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky. - if (scene_data.fog_enabled) { - vec4 fog = fog_process(vertex); - diffuse_buffer.rgb = mix(diffuse_buffer.rgb, fog.rgb, fog.a); - specular_buffer.rgb = mix(specular_buffer.rgb, vec3(0.0), fog.a); - } - -#ifndef LOW_END_MODE - if (scene_data.volumetric_fog_enabled) { - vec4 fog = volumetric_fog_process(screen_uv, -vertex.z); - diffuse_buffer.rgb = mix(diffuse_buffer.rgb, fog.rgb, fog.a); - specular_buffer.rgb = mix(specular_buffer.rgb, vec3(0.0), fog.a); - } -#endif // LOW_END_MODE - -#if defined(CUSTOM_FOG_USED) - diffuse_buffer.rgb = mix(diffuse_buffer.rgb, custom_fog.rgb, custom_fog.a); - specular_buffer.rgb = mix(specular_buffer.rgb, vec3(0.0), custom_fog.a); -#endif //CUSTOM_FOG_USED + diffuse_buffer.rgb = mix(diffuse_buffer.rgb, fog.rgb, fog.a); + specular_buffer.rgb = mix(specular_buffer.rgb, vec3(0.0), fog.a); #else //MODE_MULTIPLE_RENDER_TARGETS @@ -2919,22 +3286,10 @@ FRAGMENT_SHADER_CODE #endif //USE_NO_SHADING // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky. - if (scene_data.fog_enabled) { - vec4 fog = fog_process(vertex); - frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); - } -#ifndef LOW_END_MODE - if (scene_data.volumetric_fog_enabled) { - vec4 fog = volumetric_fog_process(screen_uv, -vertex.z); - frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); - } -#endif - -#if defined(CUSTOM_FOG_USED) - frag_color.rgb = mix(frag_color.rgb, custom_fog.rgb, custom_fog.a); -#endif //CUSTOM_FOG_USED + frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); + ; #endif //MODE_MULTIPLE_RENDER_TARGETS #endif //MODE_RENDER_DEPTH -} + } diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl index 17ed22f58a..d78890fa9e 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl @@ -3,6 +3,15 @@ #define MAX_GI_PROBES 8 +#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic) + +#extension GL_KHR_shader_subgroup_ballot : enable +#extension GL_KHR_shader_subgroup_arithmetic : enable + +#define USE_SUBGROUPS + +#endif + #include "cluster_data_inc.glsl" #if !defined(MODE_RENDER_DEPTH) || defined(MODE_RENDER_MATERIAL) || defined(MODE_RENDER_SDF) || defined(MODE_RENDER_NORMAL_ROUGHNESS) || defined(MODE_RENDER_GIPROBE) || defined(TANGENT_USED) || defined(NORMAL_MAP_USED) @@ -13,8 +22,9 @@ layout(push_constant, binding = 0, std430) uniform DrawCall { uint instance_index; - uint pad; //16 bits minimum size - vec2 bake_uv2_offset; //used for bake to uv2, ignored otherwise + uint uv_offset; + uint pad0; + uint pad1; } draw_call; @@ -33,91 +43,13 @@ draw_call; #define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_REPEAT 10 #define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_REPEAT 11 -layout(set = 0, binding = 1) uniform sampler material_samplers[12]; - -layout(set = 0, binding = 2) uniform sampler shadow_sampler; - #define SDFGI_MAX_CASCADES 8 -layout(set = 0, binding = 3, std140) uniform SceneData { - mat4 projection_matrix; - mat4 inv_projection_matrix; - - mat4 camera_matrix; - mat4 inv_camera_matrix; - - vec2 viewport_size; - vec2 screen_pixel_size; - - //use vec4s because std140 doesnt play nice with vec2s, z and w are wasted - vec4 directional_penumbra_shadow_kernel[32]; - vec4 directional_soft_shadow_kernel[32]; - vec4 penumbra_shadow_kernel[32]; - vec4 soft_shadow_kernel[32]; - - uint directional_penumbra_shadow_samples; - uint directional_soft_shadow_samples; - uint penumbra_shadow_samples; - uint soft_shadow_samples; - - vec4 ambient_light_color_energy; - - float ambient_color_sky_mix; - bool use_ambient_light; - bool use_ambient_cubemap; - bool use_reflection_cubemap; - - mat3 radiance_inverse_xform; - - vec2 shadow_atlas_pixel_size; - vec2 directional_shadow_pixel_size; - - uint directional_light_count; - float dual_paraboloid_side; - float z_far; - float z_near; - - bool ssao_enabled; - float ssao_light_affect; - float ssao_ao_affect; - bool roughness_limiter_enabled; - - float roughness_limiter_amount; - float roughness_limiter_limit; - uvec2 roughness_limiter_pad; - - vec4 ao_color; - - mat4 sdf_to_bounds; - - ivec3 sdf_offset; - bool material_uv2_mode; - - ivec3 sdf_size; - bool gi_upscale_for_msaa; - - bool volumetric_fog_enabled; - float volumetric_fog_inv_length; - float volumetric_fog_detail_spread; - uint volumetric_fog_pad; - - bool fog_enabled; - float fog_density; - float fog_height; - float fog_height_density; - - vec3 fog_light_color; - float fog_sun_scatter; - - float fog_aerial_perspective; - - float time; - float reflection_multiplier; // one normally, zero when rendering reflections +/* Set 1: Base Pass (never changes) */ - bool pancake_shadows; -} +layout(set = 0, binding = 1) uniform sampler material_samplers[12]; -scene_data; +layout(set = 0, binding = 2) uniform sampler shadow_sampler; #define INSTANCE_FLAGS_USE_GI_BUFFERS (1 << 6) #define INSTANCE_FLAGS_USE_SDFGI (1 << 7) @@ -134,33 +66,24 @@ scene_data; #define INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK 0x7 #define INSTANCE_FLAGS_SKELETON (1 << 19) +#define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 20) -struct InstanceData { - mat4 transform; - mat4 normal_transform; - uint flags; - uint instance_uniforms_ofs; //base offset in global buffer for instance variables - uint gi_offset; //GI information when using lightmapping (VCT or lightmap index) - uint layer_mask; - vec4 lightmap_uv_scale; -}; - -layout(set = 0, binding = 4, std430) restrict readonly buffer Instances { - InstanceData data[]; +layout(set = 0, binding = 3, std430) restrict readonly buffer OmniLights { + LightData data[]; } -instances; +omni_lights; -layout(set = 0, binding = 5, std430) restrict readonly buffer Lights { +layout(set = 0, binding = 4, std430) restrict readonly buffer SpotLights { LightData data[]; } -lights; +spot_lights; -layout(set = 0, binding = 6) buffer restrict readonly ReflectionProbeData { +layout(set = 0, binding = 5) buffer restrict readonly ReflectionProbeData { ReflectionData data[]; } reflections; -layout(set = 0, binding = 7, std140) uniform DirectionalLights { +layout(set = 0, binding = 6, std140) uniform DirectionalLights { DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; } directional_lights; @@ -172,40 +95,29 @@ struct Lightmap { mat3 normal_xform; }; -layout(set = 0, binding = 10, std140) restrict readonly buffer Lightmaps { +layout(set = 0, binding = 7, std140) restrict readonly buffer Lightmaps { Lightmap data[]; } lightmaps; -layout(set = 0, binding = 11) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES]; - struct LightmapCapture { vec4 sh[9]; }; -layout(set = 0, binding = 12, std140) restrict readonly buffer LightmapCaptures { +layout(set = 0, binding = 8, std140) restrict readonly buffer LightmapCaptures { LightmapCapture data[]; } lightmap_captures; -layout(set = 0, binding = 13) uniform texture2D decal_atlas; -layout(set = 0, binding = 14) uniform texture2D decal_atlas_srgb; +layout(set = 0, binding = 9) uniform texture2D decal_atlas; +layout(set = 0, binding = 10) uniform texture2D decal_atlas_srgb; -layout(set = 0, binding = 15, std430) restrict readonly buffer Decals { +layout(set = 0, binding = 11, std430) restrict readonly buffer Decals { DecalData data[]; } decals; -layout(set = 0, binding = 16) uniform utexture3D cluster_texture; - -layout(set = 0, binding = 17, std430) restrict readonly buffer ClusterData { - uint indices[]; -} -cluster_data; - -layout(set = 0, binding = 18) uniform texture2D directional_shadow_atlas; - -layout(set = 0, binding = 19, std430) restrict readonly buffer GlobalVariableData { +layout(set = 0, binding = 12, std430) restrict readonly buffer GlobalVariableData { vec4 data[]; } global_variables; @@ -219,7 +131,7 @@ struct SDFGIProbeCascadeData { float to_cell; // 1/bounds * grid_size }; -layout(set = 0, binding = 20, std140) uniform SDFGI { +layout(set = 0, binding = 13, std140) uniform SDFGI { vec3 grid_size; uint max_cascades; @@ -249,38 +161,140 @@ sdfgi; #endif //LOW_END_MODE -// decal atlas +/* Set 2: Render Pass (changes per render pass) */ + +layout(set = 1, binding = 0, std140) uniform SceneData { + mat4 projection_matrix; + mat4 inv_projection_matrix; + + mat4 camera_matrix; + mat4 inv_camera_matrix; + + vec2 viewport_size; + vec2 screen_pixel_size; + + uint cluster_shift; + uint cluster_width; + uint cluster_type_size; + uint max_cluster_element_count_div_32; + + //use vec4s because std140 doesnt play nice with vec2s, z and w are wasted + vec4 directional_penumbra_shadow_kernel[32]; + vec4 directional_soft_shadow_kernel[32]; + vec4 penumbra_shadow_kernel[32]; + vec4 soft_shadow_kernel[32]; + + uint directional_penumbra_shadow_samples; + uint directional_soft_shadow_samples; + uint penumbra_shadow_samples; + uint soft_shadow_samples; + + vec4 ambient_light_color_energy; + + float ambient_color_sky_mix; + bool use_ambient_light; + bool use_ambient_cubemap; + bool use_reflection_cubemap; -/* Set 1, Radiance */ + mat3 radiance_inverse_xform; + + vec2 shadow_atlas_pixel_size; + vec2 directional_shadow_pixel_size; + + uint directional_light_count; + float dual_paraboloid_side; + float z_far; + float z_near; + + bool ssao_enabled; + float ssao_light_affect; + float ssao_ao_affect; + bool roughness_limiter_enabled; + + float roughness_limiter_amount; + float roughness_limiter_limit; + uvec2 roughness_limiter_pad; + + vec4 ao_color; + + mat4 sdf_to_bounds; + + ivec3 sdf_offset; + bool material_uv2_mode; + + ivec3 sdf_size; + bool gi_upscale_for_msaa; + + bool volumetric_fog_enabled; + float volumetric_fog_inv_length; + float volumetric_fog_detail_spread; + uint volumetric_fog_pad; + + bool fog_enabled; + float fog_density; + float fog_height; + float fog_height_density; + + vec3 fog_light_color; + float fog_sun_scatter; + + float fog_aerial_perspective; + + float time; + float reflection_multiplier; // one normally, zero when rendering reflections + + bool pancake_shadows; +} + +scene_data; + +struct InstanceData { + mat4 transform; + uint flags; + uint instance_uniforms_ofs; //base offset in global buffer for instance variables + uint gi_offset; //GI information when using lightmapping (VCT or lightmap index) + uint layer_mask; + vec4 lightmap_uv_scale; +}; + +layout(set = 1, binding = 1, std430) buffer restrict readonly InstanceDataBuffer { + InstanceData data[]; +} +instances; #ifdef USE_RADIANCE_CUBEMAP_ARRAY -layout(set = 1, binding = 0) uniform textureCubeArray radiance_cubemap; +layout(set = 1, binding = 2) uniform textureCubeArray radiance_cubemap; #else -layout(set = 1, binding = 0) uniform textureCube radiance_cubemap; +layout(set = 1, binding = 2) uniform textureCube radiance_cubemap; #endif -/* Set 2, Reflection and Shadow Atlases (view dependent) */ +layout(set = 1, binding = 3) uniform textureCubeArray reflection_atlas; -layout(set = 1, binding = 1) uniform textureCubeArray reflection_atlas; +layout(set = 1, binding = 4) uniform texture2D shadow_atlas; -layout(set = 1, binding = 2) uniform texture2D shadow_atlas; +layout(set = 1, binding = 5) uniform texture2D directional_shadow_atlas; -#ifndef LOW_END_MODE -layout(set = 1, binding = 3) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; +layout(set = 1, binding = 6) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES]; + +#ifndef LOW_END_MOD +layout(set = 1, binding = 7) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; #endif -/* Set 3, Render Buffers */ +layout(set = 1, binding = 8, std430) buffer restrict readonly ClusterBuffer { + uint data[]; +} +cluster_buffer; #ifdef MODE_RENDER_SDF -layout(r16ui, set = 1, binding = 4) uniform restrict writeonly uimage3D albedo_volume_grid; -layout(r32ui, set = 1, binding = 5) uniform restrict writeonly uimage3D emission_grid; -layout(r32ui, set = 1, binding = 6) uniform restrict writeonly uimage3D emission_aniso_grid; -layout(r32ui, set = 1, binding = 7) uniform restrict uimage3D geom_facing_grid; +layout(r16ui, set = 1, binding = 9) uniform restrict writeonly uimage3D albedo_volume_grid; +layout(r32ui, set = 1, binding = 10) uniform restrict writeonly uimage3D emission_grid; +layout(r32ui, set = 1, binding = 11) uniform restrict writeonly uimage3D emission_aniso_grid; +layout(r32ui, set = 1, binding = 12) uniform restrict uimage3D geom_facing_grid; //still need to be present for shaders that use it, so remap them to something #define depth_buffer shadow_atlas @@ -289,17 +303,17 @@ layout(r32ui, set = 1, binding = 7) uniform restrict uimage3D geom_facing_grid; #else -layout(set = 1, binding = 4) uniform texture2D depth_buffer; -layout(set = 1, binding = 5) uniform texture2D color_buffer; +layout(set = 1, binding = 9) uniform texture2D depth_buffer; +layout(set = 1, binding = 10) uniform texture2D color_buffer; #ifndef LOW_END_MODE -layout(set = 1, binding = 6) uniform texture2D normal_roughness_buffer; -layout(set = 1, binding = 7) uniform texture2D ao_buffer; -layout(set = 1, binding = 8) uniform texture2D ambient_buffer; -layout(set = 1, binding = 9) uniform texture2D reflection_buffer; -layout(set = 1, binding = 10) uniform texture2DArray sdfgi_lightprobe_texture; -layout(set = 1, binding = 11) uniform texture3D sdfgi_occlusion_cascades; +layout(set = 1, binding = 11) uniform texture2D normal_roughness_buffer; +layout(set = 1, binding = 12) uniform texture2D ao_buffer; +layout(set = 1, binding = 13) uniform texture2D ambient_buffer; +layout(set = 1, binding = 14) uniform texture2D reflection_buffer; +layout(set = 1, binding = 15) uniform texture2DArray sdfgi_lightprobe_texture; +layout(set = 1, binding = 16) uniform texture3D sdfgi_occlusion_cascades; struct GIProbeData { mat4 xform; @@ -317,22 +331,22 @@ struct GIProbeData { uint mipmaps; }; -layout(set = 1, binding = 12, std140) uniform GIProbes { +layout(set = 1, binding = 17, std140) uniform GIProbes { GIProbeData data[MAX_GI_PROBES]; } gi_probes; -layout(set = 1, binding = 13) uniform texture3D volumetric_fog_texture; +layout(set = 1, binding = 18) uniform texture3D volumetric_fog_texture; #endif // LOW_END_MODE #endif -/* Set 4 Skeleton & Instancing (Multimesh) */ +/* Set 2 Skeleton & Instancing (can change per item) */ layout(set = 2, binding = 0, std430) restrict readonly buffer Transforms { vec4 data[]; } transforms; -/* Set 5 User Material */ +/* Set 3 User Material */ diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl index 813ea29fa1..e4c3f3a84b 100644 --- a/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl +++ b/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl @@ -97,6 +97,8 @@ void main() { float blend = 0.0; #if 1 + // No interpolation + vec3 inv_dir = 1.0 / ray_dir; float rough = 0.5; @@ -161,114 +163,11 @@ void main() { hit_light *= (dot(max(vec3(0.0), (hit_normal * hit_aniso0)), vec3(1.0)) + dot(max(vec3(0.0), (-hit_normal * hit_aniso1)), vec3(1.0))); - if (blend > 0.0) { - light = mix(light, hit_light, blend); - blend = 0.0; - } else { - light = hit_light; - - //process blend - float blend_from = (float(params.probe_axis_size - 1) / 2.0) - 2.5; - float blend_to = blend_from + 2.0; - - vec3 cam_pos = params.cam_transform[3].xyz - cascades.data[i].offset; - cam_pos *= cascades.data[i].to_cell; - - pos += ray_dir * min(advance, max_advance); - vec3 inner_pos = pos - cam_pos; - - inner_pos = inner_pos * float(params.probe_axis_size - 1) / params.grid_size.x; - - float len = length(inner_pos); - - inner_pos = abs(normalize(inner_pos)); - len *= max(inner_pos.x, max(inner_pos.y, inner_pos.z)); - - if (len >= blend_from) { - blend = smoothstep(blend_from, blend_to, len); - - pos /= cascades.data[i].to_cell; - pos += cascades.data[i].offset; - ray_pos = pos; - hit = false; //continue trace for blend - - continue; - } - } + light = hit_light; break; } - light = mix(light, vec3(0.0), blend); - -#else - - vec3 inv_dir = 1.0 / ray_dir; - - bool hit = false; - vec4 light_accum = vec4(0.0); - - float blend_size = (params.grid_size.x / float(params.probe_axis_size - 1)) * 0.5; - - float radius_sizes[MAX_CASCADES]; - for (uint i = 0; i < params.max_cascades; i++) { - radius_sizes[i] = (1.0 / cascades.data[i].to_cell) * (params.grid_size.x * 0.5 - blend_size); - } - - float max_distance = radius_sizes[params.max_cascades - 1]; - float advance = 0; - while (advance < max_distance) { - for (uint i = 0; i < params.max_cascades; i++) { - if (advance < radius_sizes[i]) { - vec3 pos = (ray_pos + ray_dir * advance) - cascades.data[i].offset; - pos *= cascades.data[i].to_cell * pos_to_uvw; - - float distance = texture(sampler3D(sdf_cascades[i], linear_sampler), pos).r * 255.0 - 1.0; - - vec4 hit_light = vec4(0.0); - if (distance < 1.0) { - hit_light.a = max(0.0, 1.0 - distance); - hit_light.rgb = texture(sampler3D(light_cascades[i], linear_sampler), pos).rgb; - hit_light.rgb *= hit_light.a; - } - - distance /= cascades.data[i].to_cell; - - if (i < (params.max_cascades - 1)) { - pos = (ray_pos + ray_dir * advance) - cascades.data[i + 1].offset; - pos *= cascades.data[i + 1].to_cell * pos_to_uvw; - - float distance2 = texture(sampler3D(sdf_cascades[i + 1], linear_sampler), pos).r * 255.0 - 1.0; - - vec4 hit_light2 = vec4(0.0); - if (distance2 < 1.0) { - hit_light2.a = max(0.0, 1.0 - distance2); - hit_light2.rgb = texture(sampler3D(light_cascades[i + 1], linear_sampler), pos).rgb; - hit_light2.rgb *= hit_light2.a; - } - - float prev_radius = i == 0 ? 0.0 : radius_sizes[i - 1]; - float blend = (advance - prev_radius) / (radius_sizes[i] - prev_radius); - - distance2 /= cascades.data[i + 1].to_cell; - - hit_light = mix(hit_light, hit_light2, blend); - distance = mix(distance, distance2, blend); - } - - light_accum += hit_light; - advance += distance; - break; - } - } - - if (light_accum.a > 0.98) { - break; - } - } - - light = light_accum.rgb / light_accum.a; - #endif imageStore(screen_buffer, screen_pos, vec4(linear_to_srgb(light), 1.0)); diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl index 61e4bf5e18..5e8934adb4 100644 --- a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl +++ b/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl @@ -67,8 +67,8 @@ struct Light { float attenuation; uint type; - float spot_angle; - float spot_attenuation; + float cos_spot_angle; + float inv_spot_attenuation; float radius; vec4 shadow_color; @@ -112,11 +112,23 @@ vec2 octahedron_encode(vec3 n) { return n.xy; } +float get_omni_attenuation(float distance, float inv_range, float decay) { + float nd = distance * inv_range; + nd *= nd; + nd *= nd; // nd^4 + nd = max(1.0 - nd, 0.0); + nd *= nd; // nd^2 + return nd * pow(max(distance, 0.0001), -decay); +} + void main() { uint voxel_index = uint(gl_GlobalInvocationID.x); //used for skipping voxels every N frames - voxel_index = params.process_offset + voxel_index * params.process_increment; + if (params.process_increment > 1) { + voxel_index *= params.process_increment; + voxel_index += params.process_offset; + } if (voxel_index >= dispatch_data.total_count) { return; @@ -134,10 +146,78 @@ void main() { uint voxel_albedo = process_voxels.data[voxel_index].albedo; vec3 albedo = vec3(uvec3(voxel_albedo >> 10, voxel_albedo >> 5, voxel_albedo) & uvec3(0x1F)) / float(0x1F); - vec3 light_accum[6]; - + vec3 light_accum[6] = vec3[](vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0)); uint valid_aniso = (voxel_albedo >> 15) & 0x3F; + const vec3 aniso_dir[6] = vec3[]( + vec3(1, 0, 0), + vec3(0, 1, 0), + vec3(0, 0, 1), + vec3(-1, 0, 0), + vec3(0, -1, 0), + vec3(0, 0, -1)); + + // Add indirect light first, in order to save computation resources +#ifdef MODE_PROCESS_DYNAMIC + if (params.multibounce) { + vec3 pos = (vec3(positioni) + vec3(0.5)) * float(params.probe_axis_size - 1) / params.grid_size; + ivec3 probe_base_pos = ivec3(pos); + + float weight_accum[6] = float[](0, 0, 0, 0, 0, 0); + + ivec3 tex_pos = ivec3(probe_base_pos.xy, int(params.cascade)); + tex_pos.x += probe_base_pos.z * int(params.probe_axis_size); + + tex_pos.xy = tex_pos.xy * (OCT_SIZE + 2) + ivec2(1); + + vec3 base_tex_posf = vec3(tex_pos); + vec2 tex_pixel_size = 1.0 / vec2(ivec2((OCT_SIZE + 2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE + 2) * params.probe_axis_size)); + vec3 probe_uv_offset = (ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx; + + for (uint j = 0; j < 8; j++) { + ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); + ivec3 probe_posi = probe_base_pos; + probe_posi += offset; + + // Compute weight + + vec3 probe_pos = vec3(probe_posi); + vec3 probe_to_pos = pos - probe_pos; + vec3 probe_dir = normalize(-probe_to_pos); + + // Compute lightprobe texture position + + vec3 trilinear = vec3(1.0) - abs(probe_to_pos); + + for (uint k = 0; k < 6; k++) { + if (bool(valid_aniso & (1 << k))) { + vec3 n = aniso_dir[k]; + float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(n, probe_dir)); + + vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0); + tex_posf.xy *= tex_pixel_size; + + vec3 pos_uvw = tex_posf; + pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy; + pos_uvw.x += float(offset.z) * probe_uv_offset.z; + vec3 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb; + + light_accum[k] += indirect_light * weight; + weight_accum[k] += weight; + } + } + } + + for (uint k = 0; k < 6; k++) { + if (weight_accum[k] > 0.0) { + light_accum[k] /= weight_accum[k]; + light_accum[k] *= albedo; + } + } + } + +#endif + { uint rgbe = process_voxels.data[voxel_index].light; @@ -153,18 +233,10 @@ void main() { uint aniso = process_voxels.data[voxel_index].light_aniso; for (uint i = 0; i < 6; i++) { float strength = ((aniso >> (i * 5)) & 0x1F) / float(0x1F); - light_accum[i] = l * strength; + light_accum[i] += l * strength; } } - const vec3 aniso_dir[6] = vec3[]( - vec3(1, 0, 0), - vec3(0, 1, 0), - vec3(0, 0, 1), - vec3(-1, 0, 0), - vec3(0, -1, 0), - vec3(0, 0, -1)); - // Raytrace light vec3 pos_to_uvw = 1.0 / params.grid_size; @@ -184,22 +256,26 @@ void main() { direction = normalize(rel_vec); light_distance = length(rel_vec); rel_vec.y /= params.y_mult; - attenuation = pow(clamp(1.0 - length(rel_vec) / lights.data[i].radius, 0.0, 1.0), lights.data[i].attenuation); + attenuation = get_omni_attenuation(light_distance, 1.0 / lights.data[i].radius, lights.data[i].attenuation); + } break; case LIGHT_TYPE_SPOT: { vec3 rel_vec = lights.data[i].position - position; direction = normalize(rel_vec); light_distance = length(rel_vec); rel_vec.y /= params.y_mult; - attenuation = pow(clamp(1.0 - length(rel_vec) / lights.data[i].radius, 0.0, 1.0), lights.data[i].attenuation); - - float angle = acos(dot(normalize(rel_vec), -lights.data[i].direction)); - if (angle > lights.data[i].spot_angle) { - attenuation = 0.0; - } else { - float d = clamp(angle / lights.data[i].spot_angle, 0, 1); - attenuation *= pow(1.0 - d, lights.data[i].spot_attenuation); + attenuation = get_omni_attenuation(light_distance, 1.0 / lights.data[i].radius, lights.data[i].attenuation); + + float cos_spot_angle = lights.data[i].cos_spot_angle; + float cos_angle = dot(-direction, lights.data[i].direction); + + if (cos_angle < cos_spot_angle) { + continue; } + + float scos = max(cos_angle, cos_spot_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cos_spot_angle)); + attenuation *= 1.0 - pow(spot_rim, lights.data[i].inv_spot_attenuation); } break; } @@ -282,65 +358,6 @@ void main() { } } - // Add indirect light - - if (params.multibounce) { - vec3 pos = (vec3(positioni) + vec3(0.5)) * float(params.probe_axis_size - 1) / params.grid_size; - ivec3 probe_base_pos = ivec3(pos); - - vec4 probe_accum[6] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); - float weight_accum[6] = float[](0, 0, 0, 0, 0, 0); - - ivec3 tex_pos = ivec3(probe_base_pos.xy, int(params.cascade)); - tex_pos.x += probe_base_pos.z * int(params.probe_axis_size); - - tex_pos.xy = tex_pos.xy * (OCT_SIZE + 2) + ivec2(1); - - vec3 base_tex_posf = vec3(tex_pos); - vec2 tex_pixel_size = 1.0 / vec2(ivec2((OCT_SIZE + 2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE + 2) * params.probe_axis_size)); - vec3 probe_uv_offset = (ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx; - - for (uint j = 0; j < 8; j++) { - ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); - ivec3 probe_posi = probe_base_pos; - probe_posi += offset; - - // Compute weight - - vec3 probe_pos = vec3(probe_posi); - vec3 probe_to_pos = pos - probe_pos; - vec3 probe_dir = normalize(-probe_to_pos); - - // Compute lightprobe texture position - - vec3 trilinear = vec3(1.0) - abs(probe_to_pos); - - for (uint k = 0; k < 6; k++) { - if (bool(valid_aniso & (1 << k))) { - vec3 n = aniso_dir[k]; - float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(n, probe_dir)); - - vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0); - tex_posf.xy *= tex_pixel_size; - - vec3 pos_uvw = tex_posf; - pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy; - pos_uvw.x += float(offset.z) * probe_uv_offset.z; - vec4 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0); - - probe_accum[k] += indirect_light * weight; - weight_accum[k] += weight; - } - } - } - - for (uint k = 0; k < 6; k++) { - if (weight_accum[k] > 0.0) { - light_accum[k] += probe_accum[k].rgb * albedo / weight_accum[k]; - } - } - } - // Store the light in the light texture float lumas[6]; diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl index d516ab22c3..007e4c113a 100644 --- a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl +++ b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl @@ -39,8 +39,11 @@ layout(rgba32i, set = 0, binding = 13) uniform restrict iimage2D lightprobe_aver layout(rgba16f, set = 0, binding = 14) uniform restrict writeonly image2DArray lightprobe_ambient_texture; +#ifdef USE_CUBEMAP_ARRAY +layout(set = 1, binding = 0) uniform textureCubeArray sky_irradiance; +#else layout(set = 1, binding = 0) uniform textureCube sky_irradiance; - +#endif layout(set = 1, binding = 1) uniform sampler linear_sampler_mipmaps; #define HISTORY_BITS 10 @@ -136,12 +139,24 @@ uint rgbe_encode(vec3 color) { return (uint(sRed) & 0x1FF) | ((uint(sGreen) & 0x1FF) << 9) | ((uint(sBlue) & 0x1FF) << 18) | ((uint(exps) & 0x1F) << 27); } +struct SH { +#if (SH_SIZE == 16) + float c[48]; +#else + float c[28]; +#endif +}; + +shared SH sh_accum[64]; //8x8 + void main() { ivec2 pos = ivec2(gl_GlobalInvocationID.xy); if (any(greaterThanEqual(pos, params.image_size))) { //too large, do nothing return; } + uint probe_index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 8; + #ifdef MODE_PROCESS float probe_cell_size = float(params.grid_size.x / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; @@ -154,27 +169,9 @@ void main() { vec3 probe_pos = cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size; vec3 pos_to_uvw = 1.0 / params.grid_size; - vec4 probe_sh_accum[SH_SIZE] = vec4[]( - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0) -#if (SH_SIZE == 16) - , - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0) -#endif - ); + for (uint i = 0; i < SH_SIZE * 3; i++) { + sh_accum[probe_index].c[i] = 0.0; + } // quickly ensure each probe has a different "offset" for the vogel function, based on integer world position uvec3 h3 = hash3(uvec3(params.world_offset + probe_cell)); @@ -195,14 +192,12 @@ void main() { vec3 inv_dir = 1.0 / ray_dir; bool hit = false; - vec3 hit_normal; - vec3 hit_light; - vec3 hit_aniso0; - vec3 hit_aniso1; + uint hit_cascade; float bias = params.ray_bias; vec3 abs_ray_dir = abs(ray_dir); ray_pos += ray_dir * 1.0 / max(abs_ray_dir.x, max(abs_ray_dir.y, abs_ray_dir.z)) * bias / cascades.data[params.cascade].to_cell; + vec3 uvw; for (uint j = params.cascade; j < params.max_cascades; j++) { //convert to local bounds @@ -221,14 +216,12 @@ void main() { float advance = 0.0; - vec3 uvw; - while (advance < max_advance) { //read how much to advance from SDF uvw = (pos + ray_dir * advance) * pos_to_uvw; float distance = texture(sampler3D(sdf_cascades[j], linear_sampler), uvw).r * 255.0 - 1.0; - if (distance < 0.001) { + if (distance < 0.05) { //consider hit hit = true; break; @@ -238,17 +231,7 @@ void main() { } if (hit) { - const float EPSILON = 0.001; - hit_normal = normalize(vec3( - texture(sampler3D(sdf_cascades[j], linear_sampler), uvw + vec3(EPSILON, 0.0, 0.0)).r - texture(sampler3D(sdf_cascades[j], linear_sampler), uvw - vec3(EPSILON, 0.0, 0.0)).r, - texture(sampler3D(sdf_cascades[j], linear_sampler), uvw + vec3(0.0, EPSILON, 0.0)).r - texture(sampler3D(sdf_cascades[j], linear_sampler), uvw - vec3(0.0, EPSILON, 0.0)).r, - texture(sampler3D(sdf_cascades[j], linear_sampler), uvw + vec3(0.0, 0.0, EPSILON)).r - texture(sampler3D(sdf_cascades[j], linear_sampler), uvw - vec3(0.0, 0.0, EPSILON)).r)); - - hit_light = texture(sampler3D(light_cascades[j], linear_sampler), uvw).rgb; - vec4 aniso0 = texture(sampler3D(aniso0_cascades[j], linear_sampler), uvw); - hit_aniso0 = aniso0.rgb; - hit_aniso1 = vec3(aniso0.a, texture(sampler3D(aniso1_cascades[j], linear_sampler), uvw).rg); - + hit_cascade = j; break; } @@ -261,11 +244,32 @@ void main() { vec4 light; if (hit) { - //one liner magic - light.rgb = hit_light * (dot(max(vec3(0.0), (hit_normal * hit_aniso0)), vec3(1.0)) + dot(max(vec3(0.0), (-hit_normal * hit_aniso1)), vec3(1.0))); - light.a = 1.0; + //avoid reading different texture from different threads + for (uint j = params.cascade; j < params.max_cascades; j++) { + if (j == hit_cascade) { + const float EPSILON = 0.001; + vec3 hit_normal = normalize(vec3( + texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(EPSILON, 0.0, 0.0)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(EPSILON, 0.0, 0.0)).r, + texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(0.0, EPSILON, 0.0)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(0.0, EPSILON, 0.0)).r, + texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(0.0, 0.0, EPSILON)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(0.0, 0.0, EPSILON)).r)); + + vec3 hit_light = texture(sampler3D(light_cascades[hit_cascade], linear_sampler), uvw).rgb; + vec4 aniso0 = texture(sampler3D(aniso0_cascades[hit_cascade], linear_sampler), uvw); + vec3 hit_aniso0 = aniso0.rgb; + vec3 hit_aniso1 = vec3(aniso0.a, texture(sampler3D(aniso1_cascades[hit_cascade], linear_sampler), uvw).rg); + + //one liner magic + light.rgb = hit_light * (dot(max(vec3(0.0), (hit_normal * hit_aniso0)), vec3(1.0)) + dot(max(vec3(0.0), (-hit_normal * hit_aniso1)), vec3(1.0))); + light.a = 1.0; + } + } + } else if (params.sky_mode == SKY_MODE_SKY) { +#ifdef USE_CUBEMAP_ARRAY + light.rgb = textureLod(samplerCubeArray(sky_irradiance, linear_sampler_mipmaps), vec4(ray_dir, 0.0), 2.0).rgb; //use second mipmap because we dont usually throw a lot of rays, so this compensates +#else light.rgb = textureLod(samplerCube(sky_irradiance, linear_sampler_mipmaps), ray_dir, 2.0).rgb; //use second mipmap because we dont usually throw a lot of rays, so this compensates +#endif light.rgb *= params.sky_energy; light.a = 0.0; @@ -278,33 +282,33 @@ void main() { } vec3 ray_dir2 = ray_dir * ray_dir; - float c[SH_SIZE] = float[]( - - 0.282095, //l0 - 0.488603 * ray_dir.y, //l1n1 - 0.488603 * ray_dir.z, //l1n0 - 0.488603 * ray_dir.x, //l1p1 - 1.092548 * ray_dir.x * ray_dir.y, //l2n2 - 1.092548 * ray_dir.y * ray_dir.z, //l2n1 - 0.315392 * (3.0 * ray_dir2.z - 1.0), //l20 - 1.092548 * ray_dir.x * ray_dir.z, //l2p1 - 0.546274 * (ray_dir2.x - ray_dir2.y) //l2p2 + +#define SH_ACCUM(m_idx, m_value) \ + { \ + vec3 l = light.rgb * (m_value); \ + sh_accum[probe_index].c[m_idx * 3 + 0] += l.r; \ + sh_accum[probe_index].c[m_idx * 3 + 1] += l.g; \ + sh_accum[probe_index].c[m_idx * 3 + 2] += l.b; \ + } + SH_ACCUM(0, 0.282095); //l0 + SH_ACCUM(1, 0.488603 * ray_dir.y); //l1n1 + SH_ACCUM(2, 0.488603 * ray_dir.z); //l1n0 + SH_ACCUM(3, 0.488603 * ray_dir.x); //l1p1 + SH_ACCUM(4, 1.092548 * ray_dir.x * ray_dir.y); //l2n2 + SH_ACCUM(5, 1.092548 * ray_dir.y * ray_dir.z); //l2n1 + SH_ACCUM(6, 0.315392 * (3.0 * ray_dir2.z - 1.0)); //l20 + SH_ACCUM(7, 1.092548 * ray_dir.x * ray_dir.z); //l2p1 + SH_ACCUM(8, 0.546274 * (ray_dir2.x - ray_dir2.y)); //l2p2 #if (SH_SIZE == 16) - , - 0.590043 * ray_dir.y * (3.0f * ray_dir2.x - ray_dir2.y), - 2.890611 * ray_dir.y * ray_dir.x * ray_dir.z, - 0.646360 * ray_dir.y * (-1.0f + 5.0f * ray_dir2.z), - 0.373176 * (5.0f * ray_dir2.z * ray_dir.z - 3.0f * ray_dir.z), - 0.457045 * ray_dir.x * (-1.0f + 5.0f * ray_dir2.z), - 1.445305 * (ray_dir2.x - ray_dir2.y) * ray_dir.z, - 0.590043 * ray_dir.x * (ray_dir2.x - 3.0f * ray_dir2.y) + SH_ACCUM(9, 0.590043 * ray_dir.y * (3.0f * ray_dir2.x - ray_dir2.y)); + SH_ACCUM(10, 2.890611 * ray_dir.y * ray_dir.x * ray_dir.z); + SH_ACCUM(11, 0.646360 * ray_dir.y * (-1.0f + 5.0f * ray_dir2.z)); + SH_ACCUM(12, 0.373176 * (5.0f * ray_dir2.z * ray_dir.z - 3.0f * ray_dir.z)); + SH_ACCUM(13, 0.457045 * ray_dir.x * (-1.0f + 5.0f * ray_dir2.z)); + SH_ACCUM(14, 1.445305 * (ray_dir2.x - ray_dir2.y) * ray_dir.z); + SH_ACCUM(15, 0.590043 * ray_dir.x * (ray_dir2.x - 3.0f * ray_dir2.y)); #endif - ); - - for (uint j = 0; j < SH_SIZE; j++) { - probe_sh_accum[j] += light * c[j]; - } } for (uint i = 0; i < SH_SIZE; i++) { @@ -312,7 +316,7 @@ void main() { ivec3 prev_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(params.history_index)); ivec2 average_pos = prev_pos.xy; - vec4 value = probe_sh_accum[i] * 4.0 / float(params.ray_count); + vec4 value = vec4(sh_accum[probe_index].c[i * 3 + 0], sh_accum[probe_index].c[i * 3 + 1], sh_accum[probe_index].c[i * 3 + 2], 1.0) * 4.0 / float(params.ray_count); ivec4 ivalue = clamp(ivec4(value * float(1 << HISTORY_BITS)), -32768, 32767); //clamp to 16 bits, so higher values don't break average @@ -344,37 +348,11 @@ void main() { ivec2 oct_pos = (pos / OCT_SIZE) * (OCT_SIZE + 2) + ivec2(1); ivec2 local_pos = pos % OCT_SIZE; - //fill the spherical harmonic - vec4 sh[SH_SIZE]; - - for (uint i = 0; i < SH_SIZE; i++) { - // store in history texture - ivec2 average_pos = sh_pos + ivec2(0, i); - ivec4 average = imageLoad(lightprobe_average_texture, average_pos); - - sh[i] = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS); - } - //compute the octahedral normal for this texel vec3 normal = octahedron_encode(vec2(local_pos) / float(OCT_SIZE)); - /* + // read the spherical harmonic - const float c1 = 0.429043; - const float c2 = 0.511664; - const float c3 = 0.743125; - const float c4 = 0.886227; - const float c5 = 0.247708; - vec4 light = (c1 * sh[8] * (normal.x * normal.x - normal.y * normal.y) + - c3 * sh[6] * normal.z * normal.z + - c4 * sh[0] - - c5 * sh[6] + - 2.0 * c1 * sh[4] * normal.x * normal.y + - 2.0 * c1 * sh[7] * normal.x * normal.z + - 2.0 * c1 * sh[5] * normal.y * normal.z + - 2.0 * c2 * sh[3] * normal.x + - 2.0 * c2 * sh[1] * normal.y + - 2.0 * c2 * sh[2] * normal.z); -*/ + vec3 normal2 = normal * normal; float c[SH_SIZE] = float[]( @@ -426,7 +404,14 @@ void main() { vec3 radiance = vec3(0.0); for (uint i = 0; i < SH_SIZE; i++) { - vec3 m = sh[i].rgb * c[i] * 4.0; + // store in history texture + ivec2 average_pos = sh_pos + ivec2(0, i); + ivec4 average = imageLoad(lightprobe_average_texture, average_pos); + + vec4 sh = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS); + + vec3 m = sh.rgb * c[i] * 4.0; + irradiance += m * l_mult[i]; radiance += m; } @@ -515,13 +500,15 @@ void main() { //can't scroll, must look for position in parent cascade //to global coords - float probe_cell_size = float(params.grid_size.x / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; + float cell_to_probe = float(params.grid_size.x / float(params.probe_axis_size - 1)); + + float probe_cell_size = cell_to_probe / cascades.data[params.cascade].to_cell; vec3 probe_pos = cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size; //to parent local coords + float probe_cell_size_next = cell_to_probe / cascades.data[params.cascade + 1].to_cell; probe_pos -= cascades.data[params.cascade + 1].offset; - probe_pos *= cascades.data[params.cascade + 1].to_cell; - probe_pos = probe_pos * float(params.probe_axis_size - 1) / float(params.grid_size.x); + probe_pos /= probe_cell_size_next; ivec3 probe_posi = ivec3(probe_pos); //add up all light, no need to use occlusion here, since occlusion will do its work afterwards @@ -574,20 +561,28 @@ void main() { } } else { - // clear and let it re-raytrace, only for the last cascade, which happens very un-often - //scroll + //scroll at the edge of the highest cascade, just copy what is there, + //since its the closest we have anyway + for (uint j = 0; j < params.history_size; j++) { + ivec2 tex_pos; + tex_pos = probe_cell.xy; + tex_pos.x += probe_cell.z * int(params.probe_axis_size); + for (int i = 0; i < SH_SIZE; i++) { // copy from history texture + ivec3 src_pos = ivec3(tex_pos.x, tex_pos.y * SH_SIZE + i, int(j)); ivec3 dst_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(j)); - imageStore(lightprobe_history_scroll_texture, dst_pos, ivec4(0)); + ivec4 value = imageLoad(lightprobe_history_texture, dst_pos); + imageStore(lightprobe_history_scroll_texture, dst_pos, value); } } for (int i = 0; i < SH_SIZE; i++) { // copy from average texture - ivec2 dst_pos = ivec2(pos.x, pos.y * SH_SIZE + i); - imageStore(lightprobe_average_scroll_texture, dst_pos, ivec4(0)); + ivec2 spos = ivec2(pos.x, pos.y * SH_SIZE + i); + ivec4 average = imageLoad(lightprobe_average_texture, spos); + imageStore(lightprobe_average_scroll_texture, spos, average); } } diff --git a/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl b/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl deleted file mode 100644 index 29443ae7db..0000000000 --- a/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl +++ /dev/null @@ -1,105 +0,0 @@ -#[compute] - -#version 450 - -VERSION_DEFINES - -#define BLOCK_SIZE 8 - -layout(local_size_x = BLOCK_SIZE, local_size_y = BLOCK_SIZE, local_size_z = 1) in; - -#ifdef MODE_REDUCE - -shared float tmp_data[BLOCK_SIZE * BLOCK_SIZE]; -const uint swizzle_table[BLOCK_SIZE] = uint[](0, 4, 2, 6, 1, 5, 3, 7); -const uint unswizzle_table[BLOCK_SIZE] = uint[](0, 0, 0, 1, 0, 2, 1, 3); - -#endif - -layout(r32f, set = 0, binding = 0) uniform restrict readonly image2D source_depth; -layout(r32f, set = 0, binding = 1) uniform restrict writeonly image2D dst_depth; - -layout(push_constant, binding = 1, std430) uniform Params { - ivec2 source_size; - ivec2 source_offset; - uint min_size; - uint gaussian_kernel_version; - ivec2 filter_dir; -} -params; - -void main() { -#ifdef MODE_REDUCE - - uvec2 pos = gl_LocalInvocationID.xy; - - ivec2 image_offset = params.source_offset; - ivec2 image_pos = image_offset + ivec2(gl_GlobalInvocationID.xy); - uint dst_t = swizzle_table[pos.y] * BLOCK_SIZE + swizzle_table[pos.x]; - tmp_data[dst_t] = imageLoad(source_depth, min(image_pos, params.source_size - ivec2(1))).r; - ivec2 image_size = params.source_size; - - uint t = pos.y * BLOCK_SIZE + pos.x; - - //neighbours - uint size = BLOCK_SIZE; - - do { - groupMemoryBarrier(); - barrier(); - - size >>= 1; - image_size >>= 1; - image_offset >>= 1; - - if (all(lessThan(pos, uvec2(size)))) { - uint nx = t + size; - uint ny = t + (BLOCK_SIZE * size); - uint nxy = ny + size; - - tmp_data[t] += tmp_data[nx]; - tmp_data[t] += tmp_data[ny]; - tmp_data[t] += tmp_data[nxy]; - tmp_data[t] /= 4.0; - } - - } while (size > params.min_size); - - if (all(lessThan(pos, uvec2(size)))) { - image_pos = ivec2(unswizzle_table[size + pos.x], unswizzle_table[size + pos.y]); - image_pos += image_offset + ivec2(gl_WorkGroupID.xy) * int(size); - - image_size = max(ivec2(1), image_size); //in case image size became 0 - - if (all(lessThan(image_pos, uvec2(image_size)))) { - imageStore(dst_depth, image_pos, vec4(tmp_data[t])); - } - } -#endif - -#ifdef MODE_FILTER - - ivec2 image_pos = params.source_offset + ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(image_pos, params.source_size))) { - return; - } - - ivec2 clamp_min = ivec2(params.source_offset); - ivec2 clamp_max = ivec2(params.source_size) - 1; - - //gaussian kernel, size 9, sigma 4 - const int kernel_size = 9; - const float gaussian_kernel[kernel_size * 3] = float[]( - 0.000229, 0.005977, 0.060598, 0.241732, 0.382928, 0.241732, 0.060598, 0.005977, 0.000229, - 0.028532, 0.067234, 0.124009, 0.179044, 0.20236, 0.179044, 0.124009, 0.067234, 0.028532, - 0.081812, 0.101701, 0.118804, 0.130417, 0.134535, 0.130417, 0.118804, 0.101701, 0.081812); - float accum = 0.0; - for (int i = 0; i < kernel_size; i++) { - ivec2 ofs = clamp(image_pos + params.filter_dir * (i - kernel_size / 2), clamp_min, clamp_max); - accum += imageLoad(source_depth, ofs).r * gaussian_kernel[params.gaussian_kernel_version + i]; - } - - imageStore(dst_depth, image_pos, vec4(accum)); - -#endif -} diff --git a/servers/rendering/renderer_rd/shaders/ssao.glsl b/servers/rendering/renderer_rd/shaders/ssao.glsl index 315ef8fa13..231f8f91ec 100644 --- a/servers/rendering/renderer_rd/shaders/ssao.glsl +++ b/servers/rendering/renderer_rd/shaders/ssao.glsl @@ -88,7 +88,7 @@ counter; layout(rg8, set = 2, binding = 0) uniform restrict writeonly image2D dest_image; // This push_constant is full - 128 bytes - if you need to add more data, consider adding to the uniform buffer instead -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, binding = 3, std430) uniform Params { ivec2 screen_size; int pass; int quality; diff --git a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl index 13b162f0c9..e7ba8feb80 100644 --- a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl +++ b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl @@ -4,6 +4,15 @@ VERSION_DEFINES +/* Do not use subgroups here, seems there is not much advantage and causes glitches +#extension GL_KHR_shader_subgroup_ballot: enable +#extension GL_KHR_shader_subgroup_arithmetic: enable + +#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic) +#define USE_SUBGROUPS +#endif +*/ + #if defined(MODE_FOG) || defined(MODE_FILTER) layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; @@ -23,22 +32,25 @@ layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; layout(set = 0, binding = 1) uniform texture2D shadow_atlas; layout(set = 0, binding = 2) uniform texture2D directional_shadow_atlas; -layout(set = 0, binding = 3, std430) restrict readonly buffer Lights { +layout(set = 0, binding = 3, std430) restrict readonly buffer OmniLights { LightData data[]; } -lights; +omni_lights; -layout(set = 0, binding = 4, std140) uniform DirectionalLights { +layout(set = 0, binding = 4, std430) restrict readonly buffer SpotLights { + LightData data[]; +} +spot_lights; + +layout(set = 0, binding = 5, std140) uniform DirectionalLights { DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; } directional_lights; -layout(set = 0, binding = 5) uniform utexture3D cluster_texture; - -layout(set = 0, binding = 6, std430) restrict readonly buffer ClusterData { - uint indices[]; +layout(set = 0, binding = 6, std430) buffer restrict readonly ClusterBuffer { + uint data[]; } -cluster_data; +cluster_buffer; layout(set = 0, binding = 7) uniform sampler linear_sampler; @@ -132,7 +144,7 @@ layout(set = 1, binding = 2) uniform texture3D sdfgi_occlusion_texture; #endif //SDFGI -layout(push_constant, binding = 0, std430) uniform Params { +layout(set = 0, binding = 14, std140) uniform Params { vec2 fog_frustum_size_begin; vec2 fog_frustum_size_end; @@ -150,12 +162,24 @@ layout(push_constant, binding = 0, std430) uniform Params { float detail_spread; float gi_inject; uint max_gi_probes; - uint pad; + uint cluster_type_size; + + vec2 screen_size; + uint cluster_shift; + uint cluster_width; + + uint max_cluster_element_count_div_32; + bool use_temporal_reprojection; + uint temporal_frame; + float temporal_blend; mat3x4 cam_rotation; + mat4 to_prev_view; } params; +layout(set = 0, binding = 15) uniform texture3D prev_density_texture; + float get_depth_at_pos(float cell_depth_size, int z) { float d = float(z) * cell_depth_size + cell_depth_size * 0.5; //center of voxels d = pow(d, params.detail_spread); @@ -169,6 +193,51 @@ vec3 hash3f(uvec3 x) { return vec3(x & 0xFFFFF) / vec3(float(0xFFFFF)); } +float get_omni_attenuation(float distance, float inv_range, float decay) { + float nd = distance * inv_range; + nd *= nd; + nd *= nd; // nd^4 + nd = max(1.0 - nd, 0.0); + nd *= nd; // nd^2 + return nd * pow(max(distance, 0.0001), -decay); +} + +void cluster_get_item_range(uint p_offset, out uint item_min, out uint item_max, out uint item_from, out uint item_to) { + uint item_min_max = cluster_buffer.data[p_offset]; + item_min = item_min_max & 0xFFFF; + item_max = item_min_max >> 16; + ; + + item_from = item_min >> 5; + item_to = (item_max == 0) ? 0 : ((item_max - 1) >> 5) + 1; //side effect of how it is stored, as item_max 0 means no elements +} + +uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) { + int local_min = clamp(int(z_min) - int(i) * 32, 0, 31); + int mask_width = min(int(z_max) - int(z_min), 32 - local_min); + return bitfieldInsert(uint(0), uint(0xFFFFFFFF), local_min, mask_width); +} + +#define TEMPORAL_FRAMES 16 + +const vec3 halton_map[TEMPORAL_FRAMES] = vec3[]( + vec3(0.5, 0.33333333, 0.2), + vec3(0.25, 0.66666667, 0.4), + vec3(0.75, 0.11111111, 0.6), + vec3(0.125, 0.44444444, 0.8), + vec3(0.625, 0.77777778, 0.04), + vec3(0.375, 0.22222222, 0.24), + vec3(0.875, 0.55555556, 0.44), + vec3(0.0625, 0.88888889, 0.64), + vec3(0.5625, 0.03703704, 0.84), + vec3(0.3125, 0.37037037, 0.08), + vec3(0.8125, 0.7037037, 0.28), + vec3(0.1875, 0.14814815, 0.48), + vec3(0.6875, 0.48148148, 0.68), + vec3(0.4375, 0.81481481, 0.88), + vec3(0.9375, 0.25925926, 0.12), + vec3(0.03125, 0.59259259, 0.32)); + void main() { vec3 fog_cell_size = 1.0 / vec3(params.fog_volume_size); @@ -184,6 +253,12 @@ void main() { //posf += mix(vec3(0.0),vec3(1.0),0.3) * hash3f(uvec3(pos)) * 2.0 - 1.0; vec3 fog_unit_pos = posf * fog_cell_size + fog_cell_size * 0.5; //center of voxels + + uvec2 screen_pos = uvec2(fog_unit_pos.xy * params.screen_size); + uvec2 cluster_pos = screen_pos >> params.cluster_shift; + uint cluster_offset = (params.cluster_width * cluster_pos.y + cluster_pos.x) * (params.max_cluster_element_count_div_32 + 32); + //positions in screen are too spread apart, no hopes for optimizing with subgroups + fog_unit_pos.z = pow(fog_unit_pos.z, params.detail_spread); vec3 view_pos; @@ -191,6 +266,47 @@ void main() { view_pos.z = -params.fog_frustum_end * fog_unit_pos.z; view_pos.y = -view_pos.y; + vec4 reprojected_density = vec4(0.0); + float reproject_amount = 0.0; + + if (params.use_temporal_reprojection) { + vec3 prev_view = (params.to_prev_view * vec4(view_pos, 1.0)).xyz; + //undo transform into prev view + prev_view.y = -prev_view.y; + //z back to unit size + prev_view.z /= -params.fog_frustum_end; + //xy back to unit size + prev_view.xy /= mix(params.fog_frustum_size_begin, params.fog_frustum_size_end, vec2(prev_view.z)); + prev_view.xy = prev_view.xy * 0.5 + 0.5; + //z back to unspread value + prev_view.z = pow(prev_view.z, 1.0 / params.detail_spread); + + if (all(greaterThan(prev_view, vec3(0.0))) && all(lessThan(prev_view, vec3(1.0)))) { + //reprojectinon fits + + reprojected_density = textureLod(sampler3D(prev_density_texture, linear_sampler), prev_view, 0.0); + reproject_amount = params.temporal_blend; + + // Since we can reproject, now we must jitter the current view pos. + // This is done here because cells that can't reproject should not jitter. + + fog_unit_pos = posf * fog_cell_size + fog_cell_size * halton_map[params.temporal_frame]; //center of voxels, offset by halton table + + screen_pos = uvec2(fog_unit_pos.xy * params.screen_size); + cluster_pos = screen_pos >> params.cluster_shift; + cluster_offset = (params.cluster_width * cluster_pos.y + cluster_pos.x) * (params.max_cluster_element_count_div_32 + 32); + //positions in screen are too spread apart, no hopes for optimizing with subgroups + + fog_unit_pos.z = pow(fog_unit_pos.z, params.detail_spread); + + view_pos.xy = (fog_unit_pos.xy * 2.0 - 1.0) * mix(params.fog_frustum_size_begin, params.fog_frustum_size_end, vec2(fog_unit_pos.z)); + view_pos.z = -params.fog_frustum_end * fog_unit_pos.z; + view_pos.y = -view_pos.y; + } + } + + uint cluster_z = uint(clamp((abs(view_pos.z) / params.z_far) * 32.0, 0.0, 31.0)); + vec3 total_light = params.light_color; float total_density = params.base_density; @@ -257,108 +373,160 @@ void main() { //compute lights from cluster - vec3 cluster_pos; - cluster_pos.xy = fog_unit_pos.xy; - cluster_pos.z = clamp((abs(view_pos.z) - params.z_near) / (params.z_far - params.z_near), 0.0, 1.0); + { //omni lights - uvec4 cluster_cell = texture(usampler3D(cluster_texture, linear_sampler), cluster_pos); + uint cluster_omni_offset = cluster_offset; - uint omni_light_count = cluster_cell.x >> CLUSTER_COUNTER_SHIFT; - uint omni_light_pointer = cluster_cell.x & CLUSTER_POINTER_MASK; + uint item_min; + uint item_max; + uint item_from; + uint item_to; - for (uint i = 0; i < omni_light_count; i++) { - uint light_index = cluster_data.indices[omni_light_pointer + i]; + cluster_get_item_range(cluster_omni_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); - vec3 light_pos = lights.data[i].position; - float d = distance(lights.data[i].position, view_pos) * lights.data[i].inv_radius; - vec3 shadow_attenuation = vec3(1.0); +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif - if (d < 1.0) { - vec2 attenuation_energy = unpackHalf2x16(lights.data[i].attenuation_energy); - vec4 color_specular = unpackUnorm4x8(lights.data[i].color_specular); + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_omni_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif - float attenuation = pow(max(1.0 - d, 0.0), attenuation_energy.x); + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + uint light_index = 32 * i + bit; - vec3 light = attenuation_energy.y * color_specular.rgb / M_PI; + //if (!bool(omni_omni_lights.data[light_index].mask & draw_call.layer_mask)) { + // continue; //not masked + //} - vec4 shadow_color_enabled = unpackUnorm4x8(lights.data[i].shadow_color_enabled); + vec3 light_pos = omni_lights.data[light_index].position; + float d = distance(omni_lights.data[light_index].position, view_pos); + float shadow_attenuation = 1.0; - if (shadow_color_enabled.a > 0.5) { - //has shadow - vec4 v = vec4(view_pos, 1.0); + if (d * omni_lights.data[light_index].inv_radius < 1.0) { + float attenuation = get_omni_attenuation(d, omni_lights.data[light_index].inv_radius, omni_lights.data[light_index].attenuation); - vec4 splane = (lights.data[i].shadow_matrix * v); - float shadow_len = length(splane.xyz); //need to remember shadow len from here + vec3 light = omni_lights.data[light_index].color / M_PI; - splane.xyz = normalize(splane.xyz); - vec4 clamp_rect = lights.data[i].atlas_rect; + if (omni_lights.data[light_index].shadow_enabled) { + //has shadow + vec4 v = vec4(view_pos, 1.0); - if (splane.z >= 0.0) { - splane.z += 1.0; + vec4 splane = (omni_lights.data[light_index].shadow_matrix * v); + float shadow_len = length(splane.xyz); //need to remember shadow len from here - clamp_rect.y += clamp_rect.w; + splane.xyz = normalize(splane.xyz); + vec4 clamp_rect = omni_lights.data[light_index].atlas_rect; - } else { - splane.z = 1.0 - splane.z; - } + if (splane.z >= 0.0) { + splane.z += 1.0; + + clamp_rect.y += clamp_rect.w; - splane.xy /= splane.z; + } else { + splane.z = 1.0 - splane.z; + } - splane.xy = splane.xy * 0.5 + 0.5; - splane.z = shadow_len * lights.data[i].inv_radius; - splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; - splane.w = 1.0; //needed? i think it should be 1 already + splane.xy /= splane.z; - float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; - float shadow = exp(min(0.0, (depth - splane.z)) / lights.data[i].inv_radius * lights.data[i].shadow_volumetric_fog_fade); + splane.xy = splane.xy * 0.5 + 0.5; + splane.z = shadow_len * omni_lights.data[light_index].inv_radius; + splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; + splane.w = 1.0; //needed? i think it should be 1 already - shadow_attenuation = mix(shadow_color_enabled.rgb, vec3(1.0), shadow); + float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; + + shadow_attenuation = exp(min(0.0, (depth - splane.z)) / omni_lights.data[light_index].inv_radius * omni_lights.data[light_index].shadow_volumetric_fog_fade); + } + total_light += light * attenuation * shadow_attenuation; + } } - total_light += light * attenuation * shadow_attenuation; } } - uint spot_light_count = cluster_cell.y >> CLUSTER_COUNTER_SHIFT; - uint spot_light_pointer = cluster_cell.y & CLUSTER_POINTER_MASK; + { //spot lights - for (uint i = 0; i < spot_light_count; i++) { - uint light_index = cluster_data.indices[spot_light_pointer + i]; + uint cluster_spot_offset = cluster_offset + params.cluster_type_size; - vec3 light_pos = lights.data[i].position; - vec3 light_rel_vec = lights.data[i].position - view_pos; - float d = length(light_rel_vec) * lights.data[i].inv_radius; - vec3 shadow_attenuation = vec3(1.0); + uint item_min; + uint item_max; + uint item_from; + uint item_to; - if (d < 1.0) { - vec2 attenuation_energy = unpackHalf2x16(lights.data[i].attenuation_energy); - vec4 color_specular = unpackUnorm4x8(lights.data[i].color_specular); + cluster_get_item_range(cluster_spot_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); - float attenuation = pow(max(1.0 - d, 0.0), attenuation_energy.x); +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif - vec3 spot_dir = lights.data[i].direction; - vec2 spot_att_angle = unpackHalf2x16(lights.data[i].cone_attenuation_angle); - float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_att_angle.y); - float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_att_angle.y)); - attenuation *= 1.0 - pow(spot_rim, spot_att_angle.x); + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_spot_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif - vec3 light = attenuation_energy.y * color_specular.rgb / M_PI; + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif - vec4 shadow_color_enabled = unpackUnorm4x8(lights.data[i].shadow_color_enabled); + //if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) { + // continue; //not masked + //} - if (shadow_color_enabled.a > 0.5) { - //has shadow - vec4 v = vec4(view_pos, 1.0); + uint light_index = 32 * i + bit; - vec4 splane = (lights.data[i].shadow_matrix * v); - splane /= splane.w; + vec3 light_pos = spot_lights.data[light_index].position; + vec3 light_rel_vec = spot_lights.data[light_index].position - view_pos; + float d = length(light_rel_vec); + float shadow_attenuation = 1.0; - float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; - float shadow = exp(min(0.0, (depth - splane.z)) / lights.data[i].inv_radius * lights.data[i].shadow_volumetric_fog_fade); + if (d * spot_lights.data[light_index].inv_radius < 1.0) { + float attenuation = get_omni_attenuation(d, spot_lights.data[light_index].inv_radius, spot_lights.data[light_index].attenuation); - shadow_attenuation = mix(shadow_color_enabled.rgb, vec3(1.0), shadow); - } + vec3 spot_dir = spot_lights.data[light_index].direction; + float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_lights.data[light_index].cone_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_lights.data[light_index].cone_angle)); + attenuation *= 1.0 - pow(spot_rim, spot_lights.data[light_index].cone_attenuation); + + vec3 light = spot_lights.data[light_index].color / M_PI; + + if (spot_lights.data[light_index].shadow_enabled) { + //has shadow + vec4 v = vec4(view_pos, 1.0); + + vec4 splane = (spot_lights.data[light_index].shadow_matrix * v); + splane /= splane.w; - total_light += light * attenuation * shadow_attenuation; + float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; + + shadow_attenuation = exp(min(0.0, (depth - splane.z)) / spot_lights.data[light_index].inv_radius * spot_lights.data[light_index].shadow_volumetric_fog_fade); + } + + total_light += light * attenuation * shadow_attenuation; + } + } } } @@ -461,7 +629,11 @@ void main() { #endif - imageStore(density_map, pos, vec4(total_light, total_density)); + vec4 final_density = vec4(total_light, total_density); + + final_density = mix(final_density, reprojected_density, reproject_amount); + + imageStore(density_map, pos, final_density); #endif #ifdef MODE_FOG |