diff options
Diffstat (limited to 'servers/rendering/renderer_rd')
70 files changed, 52938 insertions, 0 deletions
diff --git a/servers/rendering/renderer_rd/SCsub b/servers/rendering/renderer_rd/SCsub new file mode 100644 index 0000000000..6a2e682c67 --- /dev/null +++ b/servers/rendering/renderer_rd/SCsub @@ -0,0 +1,7 @@ +#!/usr/bin/env python + +Import("env") + +env.add_source_files(env.servers_sources, "*.cpp") + +SConscript("shaders/SCsub") diff --git a/servers/rendering/renderer_rd/cluster_builder_rd.cpp b/servers/rendering/renderer_rd/cluster_builder_rd.cpp new file mode 100644 index 0000000000..0fdd864d47 --- /dev/null +++ b/servers/rendering/renderer_rd/cluster_builder_rd.cpp @@ -0,0 +1,555 @@ +/*************************************************************************/ +/* cluster_builder_rd.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "cluster_builder_rd.h" +#include "servers/rendering/rendering_device.h" +#include "servers/rendering/rendering_server_globals.h" + +ClusterBuilderSharedDataRD::ClusterBuilderSharedDataRD() { + RD::VertexFormatID vertex_format; + + { + Vector<RD::VertexAttribute> attributes; + { + RD::VertexAttribute va; + va.format = RD::DATA_FORMAT_R32G32B32_SFLOAT; + va.stride = sizeof(float) * 3; + attributes.push_back(va); + } + vertex_format = RD::get_singleton()->vertex_format_create(attributes); + } + + { + Vector<String> versions; + versions.push_back(""); + cluster_render.cluster_render_shader.initialize(versions); + cluster_render.shader_version = cluster_render.cluster_render_shader.version_create(); + cluster_render.shader = cluster_render.cluster_render_shader.version_get_shader(cluster_render.shader_version, 0); + cluster_render.shader_pipelines[ClusterRender::PIPELINE_NORMAL] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, RD::get_singleton()->framebuffer_format_create_empty(), vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), RD::PipelineColorBlendState(), 0); + RD::PipelineMultisampleState ms; + ms.sample_count = RD::TEXTURE_SAMPLES_4; + cluster_render.shader_pipelines[ClusterRender::PIPELINE_MSAA] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, RD::get_singleton()->framebuffer_format_create_empty(), vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), ms, RD::PipelineDepthStencilState(), RD::PipelineColorBlendState(), 0); + } + { + Vector<String> versions; + versions.push_back(""); + cluster_store.cluster_store_shader.initialize(versions); + cluster_store.shader_version = cluster_store.cluster_store_shader.version_create(); + cluster_store.shader = cluster_store.cluster_store_shader.version_get_shader(cluster_store.shader_version, 0); + cluster_store.shader_pipeline = RD::get_singleton()->compute_pipeline_create(cluster_store.shader); + } + { + Vector<String> versions; + versions.push_back(""); + cluster_debug.cluster_debug_shader.initialize(versions); + cluster_debug.shader_version = cluster_debug.cluster_debug_shader.version_create(); + cluster_debug.shader = cluster_debug.cluster_debug_shader.version_get_shader(cluster_debug.shader_version, 0); + cluster_debug.shader_pipeline = RD::get_singleton()->compute_pipeline_create(cluster_debug.shader); + } + + { // SPHERE + static const uint32_t icosphere_vertex_count = 42; + static const float icosphere_vertices[icosphere_vertex_count * 3] = { + 0, 0, -1, 0.7236073, -0.5257253, -0.4472195, -0.276388, -0.8506492, -0.4472199, -0.8944262, 0, -0.4472156, -0.276388, 0.8506492, -0.4472199, 0.7236073, 0.5257253, -0.4472195, 0.276388, -0.8506492, 0.4472199, -0.7236073, -0.5257253, 0.4472195, -0.7236073, 0.5257253, 0.4472195, 0.276388, 0.8506492, 0.4472199, 0.8944262, 0, 0.4472156, 0, 0, 1, -0.1624555, -0.4999952, -0.8506544, 0.4253227, -0.3090114, -0.8506542, 0.2628688, -0.8090116, -0.5257377, 0.8506479, 0, -0.5257359, 0.4253227, 0.3090114, -0.8506542, -0.5257298, 0, -0.8506517, -0.6881894, -0.4999969, -0.5257362, -0.1624555, 0.4999952, -0.8506544, -0.6881894, 0.4999969, -0.5257362, 0.2628688, 0.8090116, -0.5257377, 0.9510579, -0.3090126, 0, 0.9510579, 0.3090126, 0, 0, -1, 0, 0.5877856, -0.8090167, 0, -0.9510579, -0.3090126, 0, -0.5877856, -0.8090167, 0, -0.5877856, 0.8090167, 0, -0.9510579, 0.3090126, 0, 0.5877856, 0.8090167, 0, 0, 1, 0, 0.6881894, -0.4999969, 0.5257362, -0.2628688, -0.8090116, 0.5257377, -0.8506479, 0, 0.5257359, -0.2628688, 0.8090116, 0.5257377, 0.6881894, 0.4999969, 0.5257362, 0.1624555, -0.4999952, 0.8506544, 0.5257298, 0, 0.8506517, -0.4253227, -0.3090114, 0.8506542, -0.4253227, 0.3090114, 0.8506542, 0.1624555, 0.4999952, 0.8506544 + }; + static const uint32_t icosphere_triangle_count = 80; + static const uint32_t icosphere_triangle_indices[icosphere_triangle_count * 3] = { + 0, 13, 12, 1, 13, 15, 0, 12, 17, 0, 17, 19, 0, 19, 16, 1, 15, 22, 2, 14, 24, 3, 18, 26, 4, 20, 28, 5, 21, 30, 1, 22, 25, 2, 24, 27, 3, 26, 29, 4, 28, 31, 5, 30, 23, 6, 32, 37, 7, 33, 39, 8, 34, 40, 9, 35, 41, 10, 36, 38, 38, 41, 11, 38, 36, 41, 36, 9, 41, 41, 40, 11, 41, 35, 40, 35, 8, 40, 40, 39, 11, 40, 34, 39, 34, 7, 39, 39, 37, 11, 39, 33, 37, 33, 6, 37, 37, 38, 11, 37, 32, 38, 32, 10, 38, 23, 36, 10, 23, 30, 36, 30, 9, 36, 31, 35, 9, 31, 28, 35, 28, 8, 35, 29, 34, 8, 29, 26, 34, 26, 7, 34, 27, 33, 7, 27, 24, 33, 24, 6, 33, 25, 32, 6, 25, 22, 32, 22, 10, 32, 30, 31, 9, 30, 21, 31, 21, 4, 31, 28, 29, 8, 28, 20, 29, 20, 3, 29, 26, 27, 7, 26, 18, 27, 18, 2, 27, 24, 25, 6, 24, 14, 25, 14, 1, 25, 22, 23, 10, 22, 15, 23, 15, 5, 23, 16, 21, 5, 16, 19, 21, 19, 4, 21, 19, 20, 4, 19, 17, 20, 17, 3, 20, 17, 18, 3, 17, 12, 18, 12, 2, 18, 15, 16, 5, 15, 13, 16, 13, 0, 16, 12, 14, 2, 12, 13, 14, 13, 1, 14 + }; + + Vector<uint8_t> vertex_data; + vertex_data.resize(sizeof(float) * icosphere_vertex_count * 3); + copymem(vertex_data.ptrw(), icosphere_vertices, vertex_data.size()); + + sphere_vertex_buffer = RD::get_singleton()->vertex_buffer_create(vertex_data.size(), vertex_data); + + Vector<uint8_t> index_data; + index_data.resize(sizeof(uint32_t) * icosphere_triangle_count * 3); + copymem(index_data.ptrw(), icosphere_triangle_indices, index_data.size()); + + sphere_index_buffer = RD::get_singleton()->index_buffer_create(icosphere_triangle_count * 3, RD::INDEX_BUFFER_FORMAT_UINT32, index_data); + + Vector<RID> buffers; + buffers.push_back(sphere_vertex_buffer); + + sphere_vertex_array = RD::get_singleton()->vertex_array_create(icosphere_vertex_count, vertex_format, buffers); + + sphere_index_array = RD::get_singleton()->index_array_create(sphere_index_buffer, 0, icosphere_triangle_count * 3); + + float min_d = 1e20; + for (uint32_t i = 0; i < icosphere_triangle_count; i++) { + Vector3 vertices[3]; + for (uint32_t j = 0; j < 3; j++) { + uint32_t index = icosphere_triangle_indices[i * 3 + j]; + for (uint32_t k = 0; k < 3; k++) { + vertices[j][k] = icosphere_vertices[index * 3 + k]; + } + } + Plane p(vertices[0], vertices[1], vertices[2]); + min_d = MIN(Math::abs(p.d), min_d); + } + sphere_overfit = 1.0 / min_d; + } + + { // CONE + static const uint32_t cone_vertex_count = 99; + static const float cone_vertices[cone_vertex_count * 3] = { + 0, 1, -1, 0.1950903, 0.9807853, -1, 0.3826835, 0.9238795, -1, 0.5555703, 0.8314696, -1, 0.7071068, 0.7071068, -1, 0.8314697, 0.5555702, -1, 0.9238795, 0.3826834, -1, 0.9807853, 0.1950903, -1, 1, 0, -1, 0.9807853, -0.1950902, -1, 0.9238796, -0.3826833, -1, 0.8314697, -0.5555702, -1, 0.7071068, -0.7071068, -1, 0.5555702, -0.8314697, -1, 0.3826833, -0.9238796, -1, 0.1950901, -0.9807853, -1, -3.25841e-7, -1, -1, -0.1950907, -0.9807852, -1, -0.3826839, -0.9238793, -1, -0.5555707, -0.8314693, -1, -0.7071073, -0.7071063, -1, -0.83147, -0.5555697, -1, -0.9238799, -0.3826827, -1, 0, 0, 0, -0.9807854, -0.1950894, -1, -1, 9.65599e-7, -1, -0.9807851, 0.1950913, -1, -0.9238791, 0.3826845, -1, -0.8314689, 0.5555713, -1, -0.7071059, 0.7071077, -1, -0.5555691, 0.8314704, -1, -0.3826821, 0.9238801, -1, -0.1950888, 0.9807856, -1 + }; + static const uint32_t cone_triangle_count = 62; + static const uint32_t cone_triangle_indices[cone_triangle_count * 3] = { + 0, 23, 1, 1, 23, 2, 2, 23, 3, 3, 23, 4, 4, 23, 5, 5, 23, 6, 6, 23, 7, 7, 23, 8, 8, 23, 9, 9, 23, 10, 10, 23, 11, 11, 23, 12, 12, 23, 13, 13, 23, 14, 14, 23, 15, 15, 23, 16, 16, 23, 17, 17, 23, 18, 18, 23, 19, 19, 23, 20, 20, 23, 21, 21, 23, 22, 22, 23, 24, 24, 23, 25, 25, 23, 26, 26, 23, 27, 27, 23, 28, 28, 23, 29, 29, 23, 30, 30, 23, 31, 31, 23, 32, 32, 23, 0, 7, 15, 24, 32, 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 3, 6, 7, 3, 7, 8, 9, 9, 10, 7, 10, 11, 7, 11, 12, 15, 12, 13, 15, 13, 14, 15, 15, 16, 17, 17, 18, 19, 19, 20, 24, 20, 21, 24, 21, 22, 24, 24, 25, 26, 26, 27, 28, 28, 29, 30, 30, 31, 32, 32, 1, 3, 15, 17, 24, 17, 19, 24, 24, 26, 32, 26, 28, 32, 28, 30, 32, 32, 3, 7, 7, 11, 15, 32, 7, 24 + }; + + Vector<uint8_t> vertex_data; + vertex_data.resize(sizeof(float) * cone_vertex_count * 3); + copymem(vertex_data.ptrw(), cone_vertices, vertex_data.size()); + + cone_vertex_buffer = RD::get_singleton()->vertex_buffer_create(vertex_data.size(), vertex_data); + + Vector<uint8_t> index_data; + index_data.resize(sizeof(uint32_t) * cone_triangle_count * 3); + copymem(index_data.ptrw(), cone_triangle_indices, index_data.size()); + + cone_index_buffer = RD::get_singleton()->index_buffer_create(cone_triangle_count * 3, RD::INDEX_BUFFER_FORMAT_UINT32, index_data); + + Vector<RID> buffers; + buffers.push_back(cone_vertex_buffer); + + cone_vertex_array = RD::get_singleton()->vertex_array_create(cone_vertex_count, vertex_format, buffers); + + cone_index_array = RD::get_singleton()->index_array_create(cone_index_buffer, 0, cone_triangle_count * 3); + + float min_d = 1e20; + for (uint32_t i = 0; i < cone_triangle_count; i++) { + Vector3 vertices[3]; + int32_t zero_index = -1; + for (uint32_t j = 0; j < 3; j++) { + uint32_t index = cone_triangle_indices[i * 3 + j]; + for (uint32_t k = 0; k < 3; k++) { + vertices[j][k] = cone_vertices[index * 3 + k]; + } + if (vertices[j] == Vector3()) { + zero_index = j; + } + } + + if (zero_index != -1) { + Vector3 a = vertices[(zero_index + 1) % 3]; + Vector3 b = vertices[(zero_index + 2) % 3]; + Vector3 c = a + Vector3(0, 0, 1); + Plane p(a, b, c); + min_d = MIN(Math::abs(p.d), min_d); + } + } + cone_overfit = 1.0 / min_d; + } + + { // BOX + static const uint32_t box_vertex_count = 8; + static const float box_vertices[box_vertex_count * 3] = { + -1, -1, -1, -1, -1, 1, -1, 1, -1, -1, 1, 1, 1, -1, -1, 1, -1, 1, 1, 1, -1, 1, 1, 1 + }; + static const uint32_t box_triangle_count = 12; + static const uint32_t box_triangle_indices[box_triangle_count * 3] = { + 1, 2, 0, 3, 6, 2, 7, 4, 6, 5, 0, 4, 6, 0, 2, 3, 5, 7, 1, 3, 2, 3, 7, 6, 7, 5, 4, 5, 1, 0, 6, 4, 0, 3, 1, 5 + }; + + Vector<uint8_t> vertex_data; + vertex_data.resize(sizeof(float) * box_vertex_count * 3); + copymem(vertex_data.ptrw(), box_vertices, vertex_data.size()); + + box_vertex_buffer = RD::get_singleton()->vertex_buffer_create(vertex_data.size(), vertex_data); + + Vector<uint8_t> index_data; + index_data.resize(sizeof(uint32_t) * box_triangle_count * 3); + copymem(index_data.ptrw(), box_triangle_indices, index_data.size()); + + box_index_buffer = RD::get_singleton()->index_buffer_create(box_triangle_count * 3, RD::INDEX_BUFFER_FORMAT_UINT32, index_data); + + Vector<RID> buffers; + buffers.push_back(box_vertex_buffer); + + box_vertex_array = RD::get_singleton()->vertex_array_create(box_vertex_count, vertex_format, buffers); + + box_index_array = RD::get_singleton()->index_array_create(box_index_buffer, 0, box_triangle_count * 3); + } +} +ClusterBuilderSharedDataRD::~ClusterBuilderSharedDataRD() { + RD::get_singleton()->free(sphere_vertex_buffer); + RD::get_singleton()->free(sphere_index_buffer); + RD::get_singleton()->free(cone_vertex_buffer); + RD::get_singleton()->free(cone_index_buffer); + RD::get_singleton()->free(box_vertex_buffer); + RD::get_singleton()->free(box_index_buffer); + + cluster_render.cluster_render_shader.version_free(cluster_render.shader_version); + cluster_store.cluster_store_shader.version_free(cluster_store.shader_version); + cluster_debug.cluster_debug_shader.version_free(cluster_debug.shader_version); +} + +///////////////////////////// + +void ClusterBuilderRD::_clear() { + if (cluster_buffer.is_null()) { + return; //nothing to clear + } + RD::get_singleton()->free(cluster_buffer); + RD::get_singleton()->free(cluster_render_buffer); + RD::get_singleton()->free(element_buffer); + cluster_buffer = RID(); + cluster_render_buffer = RID(); + element_buffer = RID(); + + memfree(render_elements); + + render_elements = nullptr; + render_element_max = 0; + render_element_count = 0; + + RD::get_singleton()->free(framebuffer); + framebuffer = RID(); + + cluster_render_uniform_set = RID(); + cluster_store_uniform_set = RID(); +} + +void ClusterBuilderRD::setup(Size2i p_screen_size, uint32_t p_max_elements, RID p_depth_buffer, RID p_depth_buffer_sampler, RID p_color_buffer) { + ERR_FAIL_COND(p_max_elements == 0); + ERR_FAIL_COND(p_screen_size.x < 1); + ERR_FAIL_COND(p_screen_size.y < 1); + + _clear(); + + screen_size = p_screen_size; + + cluster_screen_size.width = (p_screen_size.width - 1) / cluster_size + 1; + cluster_screen_size.height = (p_screen_size.height - 1) / cluster_size + 1; + + max_elements_by_type = p_max_elements; + if (max_elements_by_type % 32) { //need to be 32 aligned + max_elements_by_type += 32 - (max_elements_by_type % 32); + } + + cluster_buffer_size = cluster_screen_size.x * cluster_screen_size.y * (max_elements_by_type / 32 + 32) * ELEMENT_TYPE_MAX * 4; + + render_element_max = max_elements_by_type * ELEMENT_TYPE_MAX; + + uint32_t element_tag_bits_size = render_element_max / 32; + uint32_t element_tag_depth_bits_size = render_element_max; + cluster_render_buffer_size = cluster_screen_size.x * cluster_screen_size.y * (element_tag_bits_size + element_tag_depth_bits_size) * 4; // tag bits (element was used) and tag depth (depth range in which it was used) + + cluster_render_buffer = RD::get_singleton()->storage_buffer_create(cluster_render_buffer_size); + cluster_buffer = RD::get_singleton()->storage_buffer_create(cluster_buffer_size); + + render_elements = (RenderElementData *)memalloc(sizeof(RenderElementData *) * render_element_max); + render_element_count = 0; + + element_buffer = RD::get_singleton()->storage_buffer_create(sizeof(RenderElementData) * render_element_max); + + uint32_t div_value = 1 << divisor; + if (use_msaa) { + framebuffer = RD::get_singleton()->framebuffer_create_empty(p_screen_size / div_value, RD::TEXTURE_SAMPLES_4); + } else { + framebuffer = RD::get_singleton()->framebuffer_create_empty(p_screen_size / div_value); + } + + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 1; + u.ids.push_back(state_uniform); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + u.ids.push_back(element_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 3; + u.ids.push_back(cluster_render_buffer); + uniforms.push_back(u); + } + + cluster_render_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shared->cluster_render.shader, 0); + } + + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 1; + u.ids.push_back(cluster_render_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + u.ids.push_back(cluster_buffer); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 3; + u.ids.push_back(element_buffer); + uniforms.push_back(u); + } + + cluster_store_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shared->cluster_store.shader, 0); + } + + if (p_color_buffer.is_valid()) { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 1; + u.ids.push_back(cluster_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.ids.push_back(p_color_buffer); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 3; + u.ids.push_back(p_depth_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 4; + u.ids.push_back(p_depth_buffer_sampler); + uniforms.push_back(u); + } + + debug_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shared->cluster_debug.shader, 0); + } else { + debug_uniform_set = RID(); + } +} + +void ClusterBuilderRD::begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection, bool p_flip_y) { + view_xform = p_view_transform.affine_inverse(); + projection = p_cam_projection; + z_near = projection.get_z_near(); + z_far = projection.get_z_far(); + orthogonal = p_cam_projection.is_orthogonal(); + adjusted_projection = projection; + if (!orthogonal) { + adjusted_projection.adjust_perspective_znear(0.0001); + } + + CameraMatrix correction; + correction.set_depth_correction(p_flip_y); + projection = correction * projection; + adjusted_projection = correction * adjusted_projection; + + //reset counts + render_element_count = 0; + for (uint32_t i = 0; i < ELEMENT_TYPE_MAX; i++) { + cluster_count_by_type[i] = 0; + } +} + +void ClusterBuilderRD::bake_cluster() { + RENDER_TIMESTAMP(">Bake Cluster"); + + RD::get_singleton()->draw_command_begin_label("Bake Light Cluster"); + + //clear cluster buffer + RD::get_singleton()->buffer_clear(cluster_buffer, 0, cluster_buffer_size, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + + if (render_element_count > 0) { + //clear render buffer + RD::get_singleton()->buffer_clear(cluster_render_buffer, 0, cluster_render_buffer_size, RD::BARRIER_MASK_RASTER); + + { //fill state uniform + + StateUniform state; + + RendererStorageRD::store_camera(adjusted_projection, state.projection); + state.inv_z_far = 1.0 / z_far; + state.screen_to_clusters_shift = get_shift_from_power_of_2(cluster_size); + state.screen_to_clusters_shift -= divisor; //screen is smaller, shift one less + + state.cluster_screen_width = cluster_screen_size.x; + state.cluster_depth_offset = (render_element_max / 32); + state.cluster_data_size = state.cluster_depth_offset + render_element_max; + + RD::get_singleton()->buffer_update(state_uniform, 0, sizeof(StateUniform), &state, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + } + + //update instances + + RD::get_singleton()->buffer_update(element_buffer, 0, sizeof(RenderElementData) * render_element_count, render_elements, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + + RENDER_TIMESTAMP("Render Elements"); + + //render elements + { + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD); + ClusterBuilderSharedDataRD::ClusterRender::PushConstant push_constant = {}; + + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, shared->cluster_render.shader_pipelines[use_msaa ? ClusterBuilderSharedDataRD::ClusterRender::PIPELINE_MSAA : ClusterBuilderSharedDataRD::ClusterRender::PIPELINE_NORMAL]); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, cluster_render_uniform_set, 0); + + for (uint32_t i = 0; i < render_element_count;) { + push_constant.base_index = i; + switch (render_elements[i].type) { + case ELEMENT_TYPE_OMNI_LIGHT: { + RD::get_singleton()->draw_list_bind_vertex_array(draw_list, shared->sphere_vertex_array); + RD::get_singleton()->draw_list_bind_index_array(draw_list, shared->sphere_index_array); + } break; + case ELEMENT_TYPE_SPOT_LIGHT: { + RD::get_singleton()->draw_list_bind_vertex_array(draw_list, shared->cone_vertex_array); + RD::get_singleton()->draw_list_bind_index_array(draw_list, shared->cone_index_array); + } break; + case ELEMENT_TYPE_DECAL: + case ELEMENT_TYPE_REFLECTION_PROBE: { + RD::get_singleton()->draw_list_bind_vertex_array(draw_list, shared->box_vertex_array); + RD::get_singleton()->draw_list_bind_index_array(draw_list, shared->box_index_array); + } break; + } + + RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterRender::PushConstant)); + + uint32_t instances = 1; +#if 0 + for (uint32_t j = i+1; j < element_count; j++) { + if (elements[i].type!=elements[j].type) { + break; + } + instances++; + } +#endif + RD::get_singleton()->draw_list_draw(draw_list, true, instances); + i += instances; + } + RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_COMPUTE); + } + //store elements + RENDER_TIMESTAMP("Pack Elements"); + + { + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shared->cluster_store.shader_pipeline); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cluster_store_uniform_set, 0); + + ClusterBuilderSharedDataRD::ClusterStore::PushConstant push_constant; + push_constant.cluster_render_data_size = render_element_max / 32 + render_element_max; + push_constant.max_render_element_count_div_32 = render_element_max / 32; + push_constant.cluster_screen_size[0] = cluster_screen_size.x; + push_constant.cluster_screen_size[1] = cluster_screen_size.y; + push_constant.render_element_count_div_32 = render_element_count > 0 ? (render_element_count - 1) / 32 + 1 : 0; + push_constant.max_cluster_element_count_div_32 = max_elements_by_type / 32; + push_constant.pad1 = 0; + push_constant.pad2 = 0; + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterStore::PushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cluster_screen_size.x, cluster_screen_size.y, 1); + + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + } + } else { + RD::get_singleton()->barrier(RD::BARRIER_MASK_TRANSFER, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + } + RENDER_TIMESTAMP("<Bake Cluster"); + RD::get_singleton()->draw_command_end_label(); +} + +void ClusterBuilderRD::debug(ElementType p_element) { + ERR_FAIL_COND(debug_uniform_set.is_null()); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shared->cluster_debug.shader_pipeline); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, debug_uniform_set, 0); + + ClusterBuilderSharedDataRD::ClusterDebug::PushConstant push_constant; + push_constant.screen_size[0] = screen_size.x; + push_constant.screen_size[1] = screen_size.y; + push_constant.cluster_screen_size[0] = cluster_screen_size.x; + push_constant.cluster_screen_size[1] = cluster_screen_size.y; + push_constant.cluster_shift = get_shift_from_power_of_2(cluster_size); + push_constant.cluster_type = p_element; + push_constant.orthogonal = orthogonal; + push_constant.z_far = z_far; + push_constant.z_near = z_near; + push_constant.max_cluster_element_count_div_32 = max_elements_by_type / 32; + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterDebug::PushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, screen_size.x, screen_size.y, 1); + + RD::get_singleton()->compute_list_end(); +} + +RID ClusterBuilderRD::get_cluster_buffer() const { + return cluster_buffer; +} + +uint32_t ClusterBuilderRD::get_cluster_size() const { + return cluster_size; +} + +uint32_t ClusterBuilderRD::get_max_cluster_elements() const { + return max_elements_by_type; +} + +void ClusterBuilderRD::set_shared(ClusterBuilderSharedDataRD *p_shared) { + shared = p_shared; +} + +ClusterBuilderRD::ClusterBuilderRD() { + state_uniform = RD::get_singleton()->uniform_buffer_create(sizeof(StateUniform)); +} + +ClusterBuilderRD::~ClusterBuilderRD() { + _clear(); + RD::get_singleton()->free(state_uniform); +} diff --git a/servers/rendering/renderer_rd/cluster_builder_rd.h b/servers/rendering/renderer_rd/cluster_builder_rd.h new file mode 100644 index 0000000000..dc1707b534 --- /dev/null +++ b/servers/rendering/renderer_rd/cluster_builder_rd.h @@ -0,0 +1,378 @@ +/*************************************************************************/ +/* cluster_builder_rd.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef CLUSTER_BUILDER_RD_H +#define CLUSTER_BUILDER_RD_H + +#include "servers/rendering/renderer_rd/renderer_storage_rd.h" +#include "servers/rendering/renderer_rd/shaders/cluster_debug.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/cluster_render.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/cluster_store.glsl.gen.h" + +class ClusterBuilderSharedDataRD { + friend class ClusterBuilderRD; + + RID sphere_vertex_buffer; + RID sphere_vertex_array; + RID sphere_index_buffer; + RID sphere_index_array; + float sphere_overfit = 0.0; //because an icosphere is not a perfect sphere, we need to enlarge it to cover the sphere area + + RID cone_vertex_buffer; + RID cone_vertex_array; + RID cone_index_buffer; + RID cone_index_array; + float cone_overfit = 0.0; //because an cone mesh is not a perfect sphere, we need to enlarge it to cover the actual cone area + + RID box_vertex_buffer; + RID box_vertex_array; + RID box_index_buffer; + RID box_index_array; + + enum Divisor { + DIVISOR_1, + DIVISOR_2, + DIVISOR_4, + }; + + struct ClusterRender { + struct PushConstant { + uint32_t base_index; + uint32_t pad0; + uint32_t pad1; + uint32_t pad2; + }; + + ClusterRenderShaderRD cluster_render_shader; + RID shader_version; + RID shader; + enum PipelineVersion { + PIPELINE_NORMAL, + PIPELINE_MSAA, + PIPELINE_MAX + }; + + RID shader_pipelines[PIPELINE_MAX]; + } cluster_render; + + struct ClusterStore { + struct PushConstant { + uint32_t cluster_render_data_size; // how much data for a single cluster takes + uint32_t max_render_element_count_div_32; //divided by 32 + uint32_t cluster_screen_size[2]; + uint32_t render_element_count_div_32; //divided by 32 + uint32_t max_cluster_element_count_div_32; //divided by 32 + uint32_t pad1; + uint32_t pad2; + }; + + ClusterStoreShaderRD cluster_store_shader; + RID shader_version; + RID shader; + RID shader_pipeline; + } cluster_store; + + struct ClusterDebug { + struct PushConstant { + uint32_t screen_size[2]; + uint32_t cluster_screen_size[2]; + + uint32_t cluster_shift; + uint32_t cluster_type; + float z_near; + float z_far; + + uint32_t orthogonal; + uint32_t max_cluster_element_count_div_32; + uint32_t pad1; + uint32_t pad2; + }; + + ClusterDebugShaderRD cluster_debug_shader; + RID shader_version; + RID shader; + RID shader_pipeline; + } cluster_debug; + +public: + ClusterBuilderSharedDataRD(); + ~ClusterBuilderSharedDataRD(); +}; + +class ClusterBuilderRD { +public: + enum LightType { + LIGHT_TYPE_OMNI, + LIGHT_TYPE_SPOT + }; + + enum BoxType { + BOX_TYPE_REFLECTION_PROBE, + BOX_TYPE_DECAL, + }; + + enum ElementType { + ELEMENT_TYPE_OMNI_LIGHT, + ELEMENT_TYPE_SPOT_LIGHT, + ELEMENT_TYPE_DECAL, + ELEMENT_TYPE_REFLECTION_PROBE, + ELEMENT_TYPE_MAX, + + }; + +private: + ClusterBuilderSharedDataRD *shared = nullptr; + + struct RenderElementData { + uint32_t type; //0-4 + uint32_t touches_near; + uint32_t touches_far; + uint32_t original_index; + float transform_inv[12]; //transposed transform for less space + float scale[3]; + uint32_t pad; + }; + + uint32_t cluster_count_by_type[ELEMENT_TYPE_MAX] = {}; + uint32_t max_elements_by_type = 0; + + RenderElementData *render_elements = nullptr; + uint32_t render_element_count = 0; + uint32_t render_element_max = 0; + + Transform view_xform; + CameraMatrix adjusted_projection; + CameraMatrix projection; + float z_far = 0; + float z_near = 0; + bool orthogonal = false; + + enum Divisor { + DIVISOR_1, + DIVISOR_2, + DIVISOR_4, + }; + + uint32_t cluster_size = 32; + bool use_msaa = true; + Divisor divisor = DIVISOR_4; + + Size2i screen_size; + Size2i cluster_screen_size; + + RID framebuffer; + RID cluster_render_buffer; //used for creating + RID cluster_buffer; //used for rendering + RID element_buffer; //used for storing, to hint element touches far plane or near plane + uint32_t cluster_render_buffer_size = 0; + uint32_t cluster_buffer_size = 0; + + RID cluster_render_uniform_set; + RID cluster_store_uniform_set; + + //persistent data + + void _clear(); + + struct StateUniform { + float projection[16]; + float inv_z_far; + uint32_t screen_to_clusters_shift; // shift to obtain coordinates in block indices + uint32_t cluster_screen_width; // + uint32_t cluster_data_size; // how much data for a single cluster takes + uint32_t cluster_depth_offset; + uint32_t pad0; + uint32_t pad1; + uint32_t pad2; + }; + + RID state_uniform; + + RID debug_uniform_set; + +public: + void setup(Size2i p_screen_size, uint32_t p_max_elements, RID p_depth_buffer, RID p_depth_buffer_sampler, RID p_color_buffer); + + void begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection, bool p_flip_y); + + _FORCE_INLINE_ void add_light(LightType p_type, const Transform &p_transform, float p_radius, float p_spot_aperture) { + if (p_type == LIGHT_TYPE_OMNI && cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT] == max_elements_by_type) { + return; //max number elements reached + } + if (p_type == LIGHT_TYPE_SPOT && cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT] == max_elements_by_type) { + return; //max number elements reached + } + + RenderElementData &e = render_elements[render_element_count]; + + Transform xform = view_xform * p_transform; + + float radius = xform.basis.get_uniform_scale(); + if (radius > 0.98 || radius < 1.02) { + xform.basis.orthonormalize(); + } + + radius *= p_radius; + + if (p_type == LIGHT_TYPE_OMNI) { + radius *= shared->sphere_overfit; // overfit icosphere + + //omni + float depth = -xform.origin.z; + if (orthogonal) { + e.touches_near = (depth - radius) < z_near; + } else { + //contains camera inside light + float radius2 = radius * shared->sphere_overfit; // overfit again for outer size (camera may be outside actual sphere but behind an icosphere vertex) + e.touches_near = xform.origin.length_squared() < radius2 * radius2; + } + + e.touches_far = (depth + radius) > z_far; + e.scale[0] = radius; + e.scale[1] = radius; + e.scale[2] = radius; + e.type = ELEMENT_TYPE_OMNI_LIGHT; + e.original_index = cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT]; + + RendererStorageRD::store_transform_transposed_3x4(xform, e.transform_inv); + + cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT]++; + + } else { + //spot + radius *= shared->cone_overfit; // overfit icosphere + + real_t len = Math::tan(Math::deg2rad(p_spot_aperture)) * radius; + //approximate, probably better to use a cone support function + float max_d = -1e20; + float min_d = 1e20; +#define CONE_MINMAX(m_x, m_y) \ + { \ + float d = -xform.xform(Vector3(len * m_x, len * m_y, -radius)).z; \ + min_d = MIN(d, min_d); \ + max_d = MAX(d, max_d); \ + } + + CONE_MINMAX(1, 1); + CONE_MINMAX(-1, 1); + CONE_MINMAX(-1, -1); + CONE_MINMAX(1, -1); + + if (orthogonal) { + e.touches_near = min_d < z_near; + } else { + //contains camera inside light + Plane base_plane(xform.origin, -xform.basis.get_axis(Vector3::AXIS_Z)); + float dist = base_plane.distance_to(Vector3()); + if (dist >= 0 && dist < radius) { + //inside, check angle + float angle = Math::rad2deg(Math::acos((-xform.origin.normalized()).dot(-xform.basis.get_axis(Vector3::AXIS_Z)))); + e.touches_near = angle < p_spot_aperture * 1.05; //overfit aperture a little due to cone overfit + } else { + e.touches_near = false; + } + } + + e.touches_far = max_d > z_far; + + e.scale[0] = len * shared->cone_overfit; + e.scale[1] = len * shared->cone_overfit; + e.scale[2] = radius; + + e.type = ELEMENT_TYPE_SPOT_LIGHT; + e.original_index = cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]; //use omni since they share index + + RendererStorageRD::store_transform_transposed_3x4(xform, e.transform_inv); + + cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]++; + } + + render_element_count++; + } + + _FORCE_INLINE_ void add_box(BoxType p_box_type, const Transform &p_transform, const Vector3 &p_half_extents) { + if (p_box_type == BOX_TYPE_DECAL && cluster_count_by_type[ELEMENT_TYPE_DECAL] == max_elements_by_type) { + return; //max number elements reached + } + if (p_box_type == BOX_TYPE_REFLECTION_PROBE && cluster_count_by_type[ELEMENT_TYPE_REFLECTION_PROBE] == max_elements_by_type) { + return; //max number elements reached + } + + RenderElementData &e = render_elements[render_element_count]; + Transform xform = view_xform * p_transform; + + //extract scale and scale the matrix by it, makes things simpler + Vector3 scale = p_half_extents; + for (uint32_t i = 0; i < 3; i++) { + float s = xform.basis.elements[i].length(); + scale[i] *= s; + xform.basis.elements[i] /= s; + }; + + float box_depth = Math::abs(xform.basis.xform_inv(Vector3(0, 0, -1)).dot(scale)); + float depth = -xform.origin.z; + + if (orthogonal) { + e.touches_near = depth - box_depth < z_near; + } else { + //contains camera inside box + Vector3 inside = xform.xform_inv(Vector3(0, 0, 0)).abs(); + e.touches_near = inside.x < scale.x && inside.y < scale.y && inside.z < scale.z; + } + + e.touches_far = depth + box_depth > z_far; + + e.scale[0] = scale.x; + e.scale[1] = scale.y; + e.scale[2] = scale.z; + + e.type = (p_box_type == BOX_TYPE_DECAL) ? ELEMENT_TYPE_DECAL : ELEMENT_TYPE_REFLECTION_PROBE; + e.original_index = cluster_count_by_type[e.type]; + + RendererStorageRD::store_transform_transposed_3x4(xform, e.transform_inv); + + cluster_count_by_type[e.type]++; + render_element_count++; + } + + void bake_cluster(); + void debug(ElementType p_element); + + RID get_cluster_buffer() const; + uint32_t get_cluster_size() const; + uint32_t get_max_cluster_elements() const; + + void set_shared(ClusterBuilderSharedDataRD *p_shared); + + ClusterBuilderRD(); + ~ClusterBuilderRD(); +}; + +#endif // CLUSTER_BUILDER_H diff --git a/servers/rendering/renderer_rd/effects_rd.cpp b/servers/rendering/renderer_rd/effects_rd.cpp new file mode 100644 index 0000000000..bc304aedd8 --- /dev/null +++ b/servers/rendering/renderer_rd/effects_rd.cpp @@ -0,0 +1,2009 @@ +/*************************************************************************/ +/* effects_rd.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "effects_rd.h" + +#include "core/config/project_settings.h" +#include "core/math/math_defs.h" +#include "core/os/os.h" + +#include "thirdparty/misc/cubemap_coeffs.h" + +static _FORCE_INLINE_ void store_transform_3x3(const Basis &p_basis, float *p_array) { + p_array[0] = p_basis.elements[0][0]; + p_array[1] = p_basis.elements[1][0]; + p_array[2] = p_basis.elements[2][0]; + p_array[3] = 0; + p_array[4] = p_basis.elements[0][1]; + p_array[5] = p_basis.elements[1][1]; + p_array[6] = p_basis.elements[2][1]; + p_array[7] = 0; + p_array[8] = p_basis.elements[0][2]; + p_array[9] = p_basis.elements[1][2]; + p_array[10] = p_basis.elements[2][2]; + p_array[11] = 0; +} + +static _FORCE_INLINE_ void store_camera(const CameraMatrix &p_mtx, float *p_array) { + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + p_array[i * 4 + j] = p_mtx.matrix[i][j]; + } + } +} + +RID EffectsRD::_get_uniform_set_from_image(RID p_image) { + if (image_to_uniform_set_cache.has(p_image)) { + RID uniform_set = image_to_uniform_set_cache[p_image]; + if (RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + return uniform_set; + } + } + Vector<RD::Uniform> uniforms; + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 0; + u.ids.push_back(p_image); + uniforms.push_back(u); + //any thing with the same configuration (one texture in binding 0 for set 0), is good + RID uniform_set = RD::get_singleton()->uniform_set_create(uniforms, luminance_reduce.shader.version_get_shader(luminance_reduce.shader_version, 0), 1); + + image_to_uniform_set_cache[p_image] = uniform_set; + + return uniform_set; +} + +RID EffectsRD::_get_uniform_set_from_texture(RID p_texture, bool p_use_mipmaps) { + if (texture_to_uniform_set_cache.has(p_texture)) { + RID uniform_set = texture_to_uniform_set_cache[p_texture]; + if (RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + return uniform_set; + } + } + + Vector<RD::Uniform> uniforms; + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 0; + u.ids.push_back(p_use_mipmaps ? default_mipmap_sampler : default_sampler); + u.ids.push_back(p_texture); + uniforms.push_back(u); + //anything with the same configuration (one texture in binding 0 for set 0), is good + RID uniform_set = RD::get_singleton()->uniform_set_create(uniforms, tonemap.shader.version_get_shader(tonemap.shader_version, 0), 0); + + texture_to_uniform_set_cache[p_texture] = uniform_set; + + return uniform_set; +} + +RID EffectsRD::_get_compute_uniform_set_from_texture(RID p_texture, bool p_use_mipmaps) { + if (texture_to_compute_uniform_set_cache.has(p_texture)) { + RID uniform_set = texture_to_compute_uniform_set_cache[p_texture]; + if (RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + return uniform_set; + } + } + + Vector<RD::Uniform> uniforms; + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 0; + u.ids.push_back(p_use_mipmaps ? default_mipmap_sampler : default_sampler); + u.ids.push_back(p_texture); + uniforms.push_back(u); + //any thing with the same configuration (one texture in binding 0 for set 0), is good + RID uniform_set = RD::get_singleton()->uniform_set_create(uniforms, luminance_reduce.shader.version_get_shader(luminance_reduce.shader_version, 0), 0); + + texture_to_compute_uniform_set_cache[p_texture] = uniform_set; + + return uniform_set; +} + +RID EffectsRD::_get_compute_uniform_set_from_texture_and_sampler(RID p_texture, RID p_sampler) { + TextureSamplerPair tsp; + tsp.texture = p_texture; + tsp.sampler = p_sampler; + + if (texture_sampler_to_compute_uniform_set_cache.has(tsp)) { + RID uniform_set = texture_sampler_to_compute_uniform_set_cache[tsp]; + if (RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + return uniform_set; + } + } + + Vector<RD::Uniform> uniforms; + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 0; + u.ids.push_back(p_sampler); + u.ids.push_back(p_texture); + uniforms.push_back(u); + //any thing with the same configuration (one texture in binding 0 for set 0), is good + RID uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.blur_shader.version_get_shader(ssao.blur_shader_version, 0), 0); + + texture_sampler_to_compute_uniform_set_cache[tsp] = uniform_set; + + return uniform_set; +} + +RID EffectsRD::_get_compute_uniform_set_from_texture_pair(RID p_texture1, RID p_texture2, bool p_use_mipmaps) { + TexturePair tp; + tp.texture1 = p_texture1; + tp.texture2 = p_texture2; + + if (texture_pair_to_compute_uniform_set_cache.has(tp)) { + RID uniform_set = texture_pair_to_compute_uniform_set_cache[tp]; + if (RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + return uniform_set; + } + } + + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 0; + u.ids.push_back(p_use_mipmaps ? default_mipmap_sampler : default_sampler); + u.ids.push_back(p_texture1); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 1; + u.ids.push_back(p_use_mipmaps ? default_mipmap_sampler : default_sampler); + u.ids.push_back(p_texture2); + uniforms.push_back(u); + } + //any thing with the same configuration (one texture in binding 0 for set 0), is good + RID uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssr_scale.shader.version_get_shader(ssr_scale.shader_version, 0), 1); + + texture_pair_to_compute_uniform_set_cache[tp] = uniform_set; + + return uniform_set; +} + +RID EffectsRD::_get_compute_uniform_set_from_image_pair(RID p_texture1, RID p_texture2) { + TexturePair tp; + tp.texture1 = p_texture1; + tp.texture2 = p_texture2; + + if (image_pair_to_compute_uniform_set_cache.has(tp)) { + RID uniform_set = image_pair_to_compute_uniform_set_cache[tp]; + if (RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + return uniform_set; + } + } + + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 0; + u.ids.push_back(p_texture1); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.ids.push_back(p_texture2); + uniforms.push_back(u); + } + //any thing with the same configuration (one texture in binding 0 for set 0), is good + RID uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssr_scale.shader.version_get_shader(ssr_scale.shader_version, 0), 3); + + image_pair_to_compute_uniform_set_cache[tp] = uniform_set; + + return uniform_set; +} + +void EffectsRD::copy_to_atlas_fb(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2 &p_uv_rect, RD::DrawListID p_draw_list, bool p_flip_y, bool p_panorama) { + zeromem(©_to_fb.push_constant, sizeof(CopyToFbPushConstant)); + + copy_to_fb.push_constant.use_section = true; + copy_to_fb.push_constant.section[0] = p_uv_rect.position.x; + copy_to_fb.push_constant.section[1] = p_uv_rect.position.y; + copy_to_fb.push_constant.section[2] = p_uv_rect.size.x; + copy_to_fb.push_constant.section[3] = p_uv_rect.size.y; + + if (p_flip_y) { + copy_to_fb.push_constant.flip_y = true; + } + + RD::DrawListID draw_list = p_draw_list; + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, copy_to_fb.pipelines[p_panorama ? COPY_TO_FB_COPY_PANORAMA_TO_DP : COPY_TO_FB_COPY].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer))); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_source_rd_texture), 0); + RD::get_singleton()->draw_list_bind_index_array(draw_list, index_array); + RD::get_singleton()->draw_list_set_push_constant(draw_list, ©_to_fb.push_constant, sizeof(CopyToFbPushConstant)); + RD::get_singleton()->draw_list_draw(draw_list, true); +} + +void EffectsRD::copy_to_fb_rect(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2i &p_rect, bool p_flip_y, bool p_force_luminance, bool p_alpha_to_zero, bool p_srgb, RID p_secondary) { + zeromem(©_to_fb.push_constant, sizeof(CopyToFbPushConstant)); + + if (p_flip_y) { + copy_to_fb.push_constant.flip_y = true; + } + if (p_force_luminance) { + copy_to_fb.push_constant.force_luminance = true; + } + if (p_alpha_to_zero) { + copy_to_fb.push_constant.alpha_to_zero = true; + } + if (p_srgb) { + copy_to_fb.push_constant.srgb = true; + } + + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD, Vector<Color>(), 1.0, 0, p_rect); + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, copy_to_fb.pipelines[p_secondary.is_valid() ? COPY_TO_FB_COPY2 : COPY_TO_FB_COPY].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer))); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_source_rd_texture), 0); + if (p_secondary.is_valid()) { + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_secondary), 1); + } + RD::get_singleton()->draw_list_bind_index_array(draw_list, index_array); + RD::get_singleton()->draw_list_set_push_constant(draw_list, ©_to_fb.push_constant, sizeof(CopyToFbPushConstant)); + RD::get_singleton()->draw_list_draw(draw_list, true); + RD::get_singleton()->draw_list_end(); +} + +void EffectsRD::copy_to_rect(RID p_source_rd_texture, RID p_dest_texture, const Rect2i &p_rect, bool p_flip_y, bool p_force_luminance, bool p_all_source, bool p_8_bit_dst, bool p_alpha_to_one) { + zeromem(©.push_constant, sizeof(CopyPushConstant)); + if (p_flip_y) { + copy.push_constant.flags |= COPY_FLAG_FLIP_Y; + } + + if (p_force_luminance) { + copy.push_constant.flags |= COPY_FLAG_FORCE_LUMINANCE; + } + + if (p_all_source) { + copy.push_constant.flags |= COPY_FLAG_ALL_SOURCE; + } + + if (p_alpha_to_one) { + copy.push_constant.flags |= COPY_FLAG_ALPHA_TO_ONE; + } + + copy.push_constant.section[0] = 0; + copy.push_constant.section[1] = 0; + copy.push_constant.section[2] = p_rect.size.width; + copy.push_constant.section[3] = p_rect.size.height; + copy.push_constant.target[0] = p_rect.position.x; + copy.push_constant.target[1] = p_rect.position.y; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8_bit_dst ? COPY_MODE_SIMPLY_COPY_8BIT : COPY_MODE_SIMPLY_COPY]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); + RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_rect.size.width, p_rect.size.height, 1); + RD::get_singleton()->compute_list_end(); +} + +void EffectsRD::copy_cubemap_to_panorama(RID p_source_cube, RID p_dest_panorama, const Size2i &p_panorama_size, float p_lod, bool p_is_array) { + zeromem(©.push_constant, sizeof(CopyPushConstant)); + + copy.push_constant.section[0] = 0; + copy.push_constant.section[1] = 0; + copy.push_constant.section[2] = p_panorama_size.width; + copy.push_constant.section[3] = p_panorama_size.height; + copy.push_constant.target[0] = 0; + copy.push_constant.target[1] = 0; + copy.push_constant.camera_z_far = p_lod; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_is_array ? COPY_MODE_CUBE_ARRAY_TO_PANORAMA : COPY_MODE_CUBE_TO_PANORAMA]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_cube), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_panorama), 3); + RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_panorama_size.width, p_panorama_size.height, 1); + RD::get_singleton()->compute_list_end(); +} + +void EffectsRD::copy_depth_to_rect_and_linearize(RID p_source_rd_texture, RID p_dest_texture, const Rect2i &p_rect, bool p_flip_y, float p_z_near, float p_z_far) { + zeromem(©.push_constant, sizeof(CopyPushConstant)); + if (p_flip_y) { + copy.push_constant.flags |= COPY_FLAG_FLIP_Y; + } + + copy.push_constant.section[0] = 0; + copy.push_constant.section[1] = 0; + copy.push_constant.section[2] = p_rect.size.width; + copy.push_constant.section[3] = p_rect.size.height; + copy.push_constant.target[0] = p_rect.position.x; + copy.push_constant.target[1] = p_rect.position.y; + copy.push_constant.camera_z_far = p_z_far; + copy.push_constant.camera_z_near = p_z_near; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[COPY_MODE_LINEARIZE_DEPTH]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); + RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_rect.size.width, p_rect.size.height, 1); + RD::get_singleton()->compute_list_end(); +} + +void EffectsRD::copy_depth_to_rect(RID p_source_rd_texture, RID p_dest_texture, const Rect2i &p_rect, bool p_flip_y) { + zeromem(©.push_constant, sizeof(CopyPushConstant)); + if (p_flip_y) { + copy.push_constant.flags |= COPY_FLAG_FLIP_Y; + } + + copy.push_constant.section[0] = 0; + copy.push_constant.section[1] = 0; + copy.push_constant.section[2] = p_rect.size.width; + copy.push_constant.section[3] = p_rect.size.height; + copy.push_constant.target[0] = p_rect.position.x; + copy.push_constant.target[1] = p_rect.position.y; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[COPY_MODE_SIMPLY_COPY_DEPTH]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); + RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_rect.size.width, p_rect.size.height, 1); + RD::get_singleton()->compute_list_end(); +} + +void EffectsRD::set_color(RID p_dest_texture, const Color &p_color, const Rect2i &p_region, bool p_8bit_dst) { + zeromem(©.push_constant, sizeof(CopyPushConstant)); + + copy.push_constant.section[0] = 0; + copy.push_constant.section[1] = 0; + copy.push_constant.section[2] = p_region.size.width; + copy.push_constant.section[3] = p_region.size.height; + copy.push_constant.target[0] = p_region.position.x; + copy.push_constant.target[1] = p_region.position.y; + copy.push_constant.set_color[0] = p_color.r; + copy.push_constant.set_color[1] = p_color.g; + copy.push_constant.set_color[2] = p_color.b; + copy.push_constant.set_color[3] = p_color.a; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8bit_dst ? COPY_MODE_SET_COLOR_8BIT : COPY_MODE_SET_COLOR]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); + RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1); + RD::get_singleton()->compute_list_end(); +} + +void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back_texture, const Rect2i &p_region, bool p_8bit_dst) { + zeromem(©.push_constant, sizeof(CopyPushConstant)); + + uint32_t base_flags = 0; + copy.push_constant.section[0] = p_region.position.x; + copy.push_constant.section[1] = p_region.position.y; + copy.push_constant.section[2] = p_region.size.width; + copy.push_constant.section[3] = p_region.size.height; + + //HORIZONTAL + RD::DrawListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8bit_dst ? COPY_MODE_GAUSSIAN_COPY_8BIT : COPY_MODE_GAUSSIAN_COPY]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_back_texture), 3); + + copy.push_constant.flags = base_flags | COPY_FLAG_HORIZONTAL; + RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1); + + RD::get_singleton()->compute_list_add_barrier(compute_list); + + //VERTICAL + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_back_texture), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_texture), 3); + + copy.push_constant.flags = base_flags; + RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1); + RD::get_singleton()->compute_list_end(); +} + +void EffectsRD::gaussian_glow(RID p_source_rd_texture, RID p_back_texture, const Size2i &p_size, float p_strength, bool p_high_quality, bool p_first_pass, float p_luminance_cap, float p_exposure, float p_bloom, float p_hdr_bleed_treshold, float p_hdr_bleed_scale, RID p_auto_exposure, float p_auto_exposure_grey) { + zeromem(©.push_constant, sizeof(CopyPushConstant)); + + CopyMode copy_mode = p_first_pass && p_auto_exposure.is_valid() ? COPY_MODE_GAUSSIAN_GLOW_AUTO_EXPOSURE : COPY_MODE_GAUSSIAN_GLOW; + uint32_t base_flags = 0; + + copy.push_constant.section[2] = p_size.x; + copy.push_constant.section[3] = p_size.y; + + copy.push_constant.glow_strength = p_strength; + copy.push_constant.glow_bloom = p_bloom; + copy.push_constant.glow_hdr_threshold = p_hdr_bleed_treshold; + copy.push_constant.glow_hdr_scale = p_hdr_bleed_scale; + copy.push_constant.glow_exposure = p_exposure; + copy.push_constant.glow_white = 0; //actually unused + copy.push_constant.glow_luminance_cap = p_luminance_cap; + + copy.push_constant.glow_auto_exposure_grey = p_auto_exposure_grey; //unused also + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[copy_mode]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_back_texture), 3); + if (p_auto_exposure.is_valid() && p_first_pass) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_auto_exposure), 1); + } + + copy.push_constant.flags = base_flags | (p_first_pass ? COPY_FLAG_GLOW_FIRST_PASS : 0) | (p_high_quality ? COPY_FLAG_HIGH_QUALITY_GLOW : 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_size.width, p_size.height, 1); + RD::get_singleton()->compute_list_end(); +} + +void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, RenderingServer::EnvironmentSSRRoughnessQuality p_roughness_quality, RID p_blur_radius, RID p_blur_radius2, RID p_metallic, const Color &p_metallic_mask, RID p_depth, RID p_scale_depth, RID p_scale_normal, RID p_output, RID p_output_blur, const Size2i &p_screen_size, int p_max_steps, float p_fade_in, float p_fade_out, float p_tolerance, const CameraMatrix &p_camera) { + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + { //scale color and depth to half + ssr_scale.push_constant.camera_z_far = p_camera.get_z_far(); + ssr_scale.push_constant.camera_z_near = p_camera.get_z_near(); + ssr_scale.push_constant.orthogonal = p_camera.is_orthogonal(); + ssr_scale.push_constant.filter = false; //enabling causes arctifacts + ssr_scale.push_constant.screen_size[0] = p_screen_size.x; + ssr_scale.push_constant.screen_size[1] = p_screen_size.y; + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssr_scale.pipeline); + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_diffuse), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture_pair(p_depth, p_normal_roughness), 1); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_output_blur), 2); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_scale_depth, p_scale_normal), 3); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_scale.push_constant, sizeof(ScreenSpaceReflectionScalePushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); + + RD::get_singleton()->compute_list_add_barrier(compute_list); + } + + { + ssr.push_constant.camera_z_far = p_camera.get_z_far(); + ssr.push_constant.camera_z_near = p_camera.get_z_near(); + ssr.push_constant.orthogonal = p_camera.is_orthogonal(); + ssr.push_constant.screen_size[0] = p_screen_size.x; + ssr.push_constant.screen_size[1] = p_screen_size.y; + ssr.push_constant.curve_fade_in = p_fade_in; + ssr.push_constant.distance_fade = p_fade_out; + ssr.push_constant.num_steps = p_max_steps; + ssr.push_constant.depth_tolerance = p_tolerance; + ssr.push_constant.use_half_res = true; + ssr.push_constant.proj_info[0] = -2.0f / (p_screen_size.width * p_camera.matrix[0][0]); + ssr.push_constant.proj_info[1] = -2.0f / (p_screen_size.height * p_camera.matrix[1][1]); + ssr.push_constant.proj_info[2] = (1.0f - p_camera.matrix[0][2]) / p_camera.matrix[0][0]; + ssr.push_constant.proj_info[3] = (1.0f + p_camera.matrix[1][2]) / p_camera.matrix[1][1]; + ssr.push_constant.metallic_mask[0] = CLAMP(p_metallic_mask.r * 255.0, 0, 255); + ssr.push_constant.metallic_mask[1] = CLAMP(p_metallic_mask.g * 255.0, 0, 255); + ssr.push_constant.metallic_mask[2] = CLAMP(p_metallic_mask.b * 255.0, 0, 255); + ssr.push_constant.metallic_mask[3] = CLAMP(p_metallic_mask.a * 255.0, 0, 255); + store_camera(p_camera, ssr.push_constant.projection); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssr.pipelines[(p_roughness_quality != RS::ENV_SSR_ROUGNESS_QUALITY_DISABLED) ? SCREEN_SPACE_REFLECTION_ROUGH : SCREEN_SPACE_REFLECTION_NORMAL]); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr.push_constant, sizeof(ScreenSpaceReflectionPushConstant)); + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_output_blur, p_scale_depth), 0); + + if (p_roughness_quality != RS::ENV_SSR_ROUGNESS_QUALITY_DISABLED) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_output, p_blur_radius), 1); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture_pair(p_metallic, p_normal_roughness), 3); + } else { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_output), 1); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_metallic), 3); + } + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_scale_normal), 2); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); + } + + if (p_roughness_quality != RS::ENV_SSR_ROUGNESS_QUALITY_DISABLED) { + //blur + + RD::get_singleton()->compute_list_add_barrier(compute_list); + + ssr_filter.push_constant.orthogonal = p_camera.is_orthogonal(); + ssr_filter.push_constant.edge_tolerance = Math::sin(Math::deg2rad(15.0)); + ssr_filter.push_constant.proj_info[0] = -2.0f / (p_screen_size.width * p_camera.matrix[0][0]); + ssr_filter.push_constant.proj_info[1] = -2.0f / (p_screen_size.height * p_camera.matrix[1][1]); + ssr_filter.push_constant.proj_info[2] = (1.0f - p_camera.matrix[0][2]) / p_camera.matrix[0][0]; + ssr_filter.push_constant.proj_info[3] = (1.0f + p_camera.matrix[1][2]) / p_camera.matrix[1][1]; + ssr_filter.push_constant.vertical = 0; + if (p_roughness_quality == RS::ENV_SSR_ROUGNESS_QUALITY_LOW) { + ssr_filter.push_constant.steps = p_max_steps / 3; + ssr_filter.push_constant.increment = 3; + } else if (p_roughness_quality == RS::ENV_SSR_ROUGNESS_QUALITY_MEDIUM) { + ssr_filter.push_constant.steps = p_max_steps / 2; + ssr_filter.push_constant.increment = 2; + } else { + ssr_filter.push_constant.steps = p_max_steps; + ssr_filter.push_constant.increment = 1; + } + + ssr_filter.push_constant.screen_size[0] = p_screen_size.width; + ssr_filter.push_constant.screen_size[1] = p_screen_size.height; + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssr_filter.pipelines[SCREEN_SPACE_REFLECTION_FILTER_HORIZONTAL]); + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_output, p_blur_radius), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_scale_normal), 1); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_output_blur, p_blur_radius2), 2); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_scale_depth), 3); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_filter.push_constant, sizeof(ScreenSpaceReflectionFilterPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); + + RD::get_singleton()->compute_list_add_barrier(compute_list); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssr_filter.pipelines[SCREEN_SPACE_REFLECTION_FILTER_VERTICAL]); + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_output_blur, p_blur_radius2), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_scale_normal), 1); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_output), 2); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_scale_depth), 3); + + ssr_filter.push_constant.vertical = 1; + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_filter.push_constant, sizeof(ScreenSpaceReflectionFilterPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); + } + + RD::get_singleton()->compute_list_end(); +} + +void EffectsRD::sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_depth, const CameraMatrix &p_camera, const Size2i &p_screen_size, float p_scale, float p_depth_scale, RenderingServer::SubSurfaceScatteringQuality p_quality) { + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + Plane p = p_camera.xform4(Plane(1, 0, -1, 1)); + p.normal /= p.d; + float unit_size = p.normal.x; + + { //scale color and depth to half + sss.push_constant.camera_z_far = p_camera.get_z_far(); + sss.push_constant.camera_z_near = p_camera.get_z_near(); + sss.push_constant.orthogonal = p_camera.is_orthogonal(); + sss.push_constant.unit_size = unit_size; + sss.push_constant.screen_size[0] = p_screen_size.x; + sss.push_constant.screen_size[1] = p_screen_size.y; + sss.push_constant.vertical = false; + sss.push_constant.scale = p_scale; + sss.push_constant.depth_scale = p_depth_scale; + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sss.pipelines[p_quality - 1]); + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_diffuse), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_diffuse2), 1); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth), 2); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &sss.push_constant, sizeof(SubSurfaceScatteringPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); + + RD::get_singleton()->compute_list_add_barrier(compute_list); + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_diffuse2), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_diffuse), 1); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth), 2); + + sss.push_constant.vertical = true; + RD::get_singleton()->compute_list_set_push_constant(compute_list, &sss.push_constant, sizeof(SubSurfaceScatteringPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); + + RD::get_singleton()->compute_list_end(); + } +} + +void EffectsRD::merge_specular(RID p_dest_framebuffer, RID p_specular, RID p_base, RID p_reflection) { + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD, Vector<Color>()); + + if (p_reflection.is_valid()) { + if (p_base.is_valid()) { + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, specular_merge.pipelines[SPECULAR_MERGE_SSR].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer))); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_base), 2); + } else { + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, specular_merge.pipelines[SPECULAR_MERGE_ADDITIVE_SSR].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer))); + } + + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_specular), 0); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_reflection), 1); + + } else { + if (p_base.is_valid()) { + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, specular_merge.pipelines[SPECULAR_MERGE_ADD].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer))); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_base), 2); + } else { + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, specular_merge.pipelines[SPECULAR_MERGE_ADDITIVE_ADD].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer))); + } + + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_specular), 0); + } + + RD::get_singleton()->draw_list_bind_index_array(draw_list, index_array); + RD::get_singleton()->draw_list_draw(draw_list, true); + RD::get_singleton()->draw_list_end(); +} + +void EffectsRD::make_mipmap(RID p_source_rd_texture, RID p_dest_texture, const Size2i &p_size) { + zeromem(©.push_constant, sizeof(CopyPushConstant)); + + copy.push_constant.section[0] = 0; + copy.push_constant.section[1] = 0; + copy.push_constant.section[2] = p_size.width; + copy.push_constant.section[3] = p_size.height; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[COPY_MODE_MIPMAP]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); + RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_size.width, p_size.height, 1); + RD::get_singleton()->compute_list_end(); +} + +void EffectsRD::copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuffer, const Rect2 &p_rect, float p_z_near, float p_z_far, bool p_dp_flip) { + CopyToDPPushConstant push_constant; + push_constant.screen_rect[0] = p_rect.position.x; + push_constant.screen_rect[1] = p_rect.position.y; + push_constant.screen_rect[2] = p_rect.size.width; + push_constant.screen_rect[3] = p_rect.size.height; + push_constant.z_far = p_z_far; + push_constant.z_near = p_z_near; + push_constant.z_flip = p_dp_flip; + + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dst_framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ); + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, cube_to_dp.pipeline.get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dst_framebuffer))); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_source_rd_texture), 0); + RD::get_singleton()->draw_list_bind_index_array(draw_list, index_array); + + RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(CopyToDPPushConstant)); + RD::get_singleton()->draw_list_draw(draw_list, true); + RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_TRANSFER); +} + +void EffectsRD::tonemapper(RID p_source_color, RID p_dst_framebuffer, const TonemapSettings &p_settings) { + zeromem(&tonemap.push_constant, sizeof(TonemapPushConstant)); + + tonemap.push_constant.use_bcs = p_settings.use_bcs; + tonemap.push_constant.bcs[0] = p_settings.brightness; + tonemap.push_constant.bcs[1] = p_settings.contrast; + tonemap.push_constant.bcs[2] = p_settings.saturation; + + tonemap.push_constant.use_glow = p_settings.use_glow; + tonemap.push_constant.glow_intensity = p_settings.glow_intensity; + tonemap.push_constant.glow_levels[0] = p_settings.glow_levels[0]; // clean this up to just pass by pointer or something + tonemap.push_constant.glow_levels[1] = p_settings.glow_levels[1]; + tonemap.push_constant.glow_levels[2] = p_settings.glow_levels[2]; + tonemap.push_constant.glow_levels[3] = p_settings.glow_levels[3]; + tonemap.push_constant.glow_levels[4] = p_settings.glow_levels[4]; + tonemap.push_constant.glow_levels[5] = p_settings.glow_levels[5]; + tonemap.push_constant.glow_levels[6] = p_settings.glow_levels[6]; + tonemap.push_constant.glow_texture_size[0] = p_settings.glow_texture_size.x; + tonemap.push_constant.glow_texture_size[1] = p_settings.glow_texture_size.y; + tonemap.push_constant.glow_mode = p_settings.glow_mode; + + int mode = p_settings.glow_use_bicubic_upscale ? TONEMAP_MODE_BICUBIC_GLOW_FILTER : TONEMAP_MODE_NORMAL; + if (p_settings.use_1d_color_correction) { + mode += 2; + } + + tonemap.push_constant.tonemapper = p_settings.tonemap_mode; + tonemap.push_constant.use_auto_exposure = p_settings.use_auto_exposure; + tonemap.push_constant.exposure = p_settings.exposure; + tonemap.push_constant.white = p_settings.white; + tonemap.push_constant.auto_exposure_grey = p_settings.auto_exposure_grey; + + tonemap.push_constant.use_color_correction = p_settings.use_color_correction; + + tonemap.push_constant.use_fxaa = p_settings.use_fxaa; + tonemap.push_constant.use_debanding = p_settings.use_debanding; + tonemap.push_constant.pixel_size[0] = 1.0 / p_settings.texture_size.x; + tonemap.push_constant.pixel_size[1] = 1.0 / p_settings.texture_size.y; + + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dst_framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD); + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, tonemap.pipelines[mode].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dst_framebuffer))); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_source_color), 0); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_settings.exposure_texture), 1); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_settings.glow_texture, true), 2); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_settings.color_correction_texture), 3); + RD::get_singleton()->draw_list_bind_index_array(draw_list, index_array); + + RD::get_singleton()->draw_list_set_push_constant(draw_list, &tonemap.push_constant, sizeof(TonemapPushConstant)); + RD::get_singleton()->draw_list_draw(draw_list, true); + RD::get_singleton()->draw_list_end(); +} + +void EffectsRD::luminance_reduction(RID p_source_texture, const Size2i p_source_size, const Vector<RID> p_reduce, RID p_prev_luminance, float p_min_luminance, float p_max_luminance, float p_adjust, bool p_set) { + luminance_reduce.push_constant.source_size[0] = p_source_size.x; + luminance_reduce.push_constant.source_size[1] = p_source_size.y; + luminance_reduce.push_constant.max_luminance = p_max_luminance; + luminance_reduce.push_constant.min_luminance = p_min_luminance; + luminance_reduce.push_constant.exposure_adjust = p_adjust; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + for (int i = 0; i < p_reduce.size(); i++) { + if (i == 0) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, luminance_reduce.pipelines[LUMINANCE_REDUCE_READ]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_texture), 0); + } else { + RD::get_singleton()->compute_list_add_barrier(compute_list); //needs barrier, wait until previous is done + + if (i == p_reduce.size() - 1 && !p_set) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, luminance_reduce.pipelines[LUMINANCE_REDUCE_WRITE]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_prev_luminance), 2); + } else { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, luminance_reduce.pipelines[LUMINANCE_REDUCE]); + } + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_reduce[i - 1]), 0); + } + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_reduce[i]), 1); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &luminance_reduce.push_constant, sizeof(LuminanceReducePushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, luminance_reduce.push_constant.source_size[0], luminance_reduce.push_constant.source_size[1], 1); + + luminance_reduce.push_constant.source_size[0] = MAX(luminance_reduce.push_constant.source_size[0] / 8, 1); + luminance_reduce.push_constant.source_size[1] = MAX(luminance_reduce.push_constant.source_size[1] / 8, 1); + } + + RD::get_singleton()->compute_list_end(); +} + +void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i &p_base_texture_size, RID p_secondary_texture, RID p_halfsize_texture1, RID p_halfsize_texture2, bool p_dof_far, float p_dof_far_begin, float p_dof_far_size, bool p_dof_near, float p_dof_near_begin, float p_dof_near_size, float p_bokeh_size, RenderingServer::DOFBokehShape p_bokeh_shape, RS::DOFBlurQuality p_quality, bool p_use_jitter, float p_cam_znear, float p_cam_zfar, bool p_cam_orthogonal) { + bokeh.push_constant.blur_far_active = p_dof_far; + bokeh.push_constant.blur_far_begin = p_dof_far_begin; + bokeh.push_constant.blur_far_end = p_dof_far_begin + p_dof_far_size; + + bokeh.push_constant.blur_near_active = p_dof_near; + bokeh.push_constant.blur_near_begin = p_dof_near_begin; + bokeh.push_constant.blur_near_end = MAX(0, p_dof_near_begin - p_dof_near_size); + bokeh.push_constant.use_jitter = p_use_jitter; + bokeh.push_constant.jitter_seed = Math::randf() * 1000.0; + + bokeh.push_constant.z_near = p_cam_znear; + bokeh.push_constant.z_far = p_cam_zfar; + bokeh.push_constant.orthogonal = p_cam_orthogonal; + bokeh.push_constant.blur_size = p_bokeh_size; + + bokeh.push_constant.second_pass = false; + bokeh.push_constant.half_size = false; + + bokeh.push_constant.blur_scale = 0.5; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + /* FIRST PASS */ + // The alpha channel of the source color texture is filled with the expected circle size + // If used for DOF far, the size is positive, if used for near, its negative. + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, bokeh.pipelines[BOKEH_GEN_BLUR_SIZE]); + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_texture), 1); + + bokeh.push_constant.size[0] = p_base_texture_size.x; + bokeh.push_constant.size[1] = p_base_texture_size.y; + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_base_texture_size.x, p_base_texture_size.y, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + + if (p_bokeh_shape == RS::DOF_BOKEH_BOX || p_bokeh_shape == RS::DOF_BOKEH_HEXAGON) { + //second pass + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, bokeh.pipelines[p_bokeh_shape == RS::DOF_BOKEH_BOX ? BOKEH_GEN_BOKEH_BOX : BOKEH_GEN_BOKEH_HEXAGONAL]); + + static const int quality_samples[4] = { 6, 12, 12, 24 }; + + bokeh.push_constant.steps = quality_samples[p_quality]; + + if (p_quality == RS::DOF_BLUR_QUALITY_VERY_LOW || p_quality == RS::DOF_BLUR_QUALITY_LOW) { + //box and hexagon are more or less the same, and they can work in either half (very low and low quality) or full (medium and high quality_ sizes) + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_halfsize_texture1), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_base_texture), 1); + + bokeh.push_constant.size[0] = p_base_texture_size.x >> 1; + bokeh.push_constant.size[1] = p_base_texture_size.y >> 1; + bokeh.push_constant.half_size = true; + bokeh.push_constant.blur_size *= 0.5; + + } else { + //medium and high quality use full size + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_secondary_texture), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_base_texture), 1); + } + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, bokeh.push_constant.size[0], bokeh.push_constant.size[1], 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + + //third pass + bokeh.push_constant.second_pass = true; + + if (p_quality == RS::DOF_BLUR_QUALITY_VERY_LOW || p_quality == RS::DOF_BLUR_QUALITY_LOW) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_halfsize_texture2), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_halfsize_texture1), 1); + } else { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_secondary_texture), 1); + } + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, bokeh.push_constant.size[0], bokeh.push_constant.size[1], 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + + if (p_quality == RS::DOF_BLUR_QUALITY_VERY_LOW || p_quality == RS::DOF_BLUR_QUALITY_LOW) { + //forth pass, upscale for low quality + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, bokeh.pipelines[BOKEH_COMPOSITE]); + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_halfsize_texture2), 1); + + bokeh.push_constant.size[0] = p_base_texture_size.x; + bokeh.push_constant.size[1] = p_base_texture_size.y; + bokeh.push_constant.half_size = false; + bokeh.push_constant.second_pass = false; + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_base_texture_size.x, p_base_texture_size.y, 1); + } + } else { + //circle + + //second pass + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, bokeh.pipelines[BOKEH_GEN_BOKEH_CIRCULAR]); + + static const float quality_scale[4] = { 8.0, 4.0, 1.0, 0.5 }; + + bokeh.push_constant.steps = 0; + bokeh.push_constant.blur_scale = quality_scale[p_quality]; + + //circle always runs in half size, otherwise too expensive + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_halfsize_texture1), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_base_texture), 1); + + bokeh.push_constant.size[0] = p_base_texture_size.x >> 1; + bokeh.push_constant.size[1] = p_base_texture_size.y >> 1; + bokeh.push_constant.half_size = true; + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, bokeh.push_constant.size[0], bokeh.push_constant.size[1], 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + + //circle is just one pass, then upscale + + // upscale + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, bokeh.pipelines[BOKEH_COMPOSITE]); + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_halfsize_texture1), 1); + + bokeh.push_constant.size[0] = p_base_texture_size.x; + bokeh.push_constant.size[1] = p_base_texture_size.y; + bokeh.push_constant.half_size = false; + bokeh.push_constant.second_pass = false; + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_base_texture_size.x, p_base_texture_size.y, 1); + } + + RD::get_singleton()->compute_list_end(); +} + +void EffectsRD::gather_ssao(RD::ComputeListID p_compute_list, const Vector<RID> p_ao_slices, const SSAOSettings &p_settings, bool p_adaptive_base_pass) { + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, ssao.gather_uniform_set, 0); + if ((p_settings.quality == RS::ENV_SSAO_QUALITY_ULTRA) && !p_adaptive_base_pass) { + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, ssao.importance_map_uniform_set, 1); + } + + for (int i = 0; i < 4; i++) { + if ((p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) && ((i == 1) || (i == 2))) { + continue; + } + + ssao.gather_push_constant.pass_coord_offset[0] = i % 2; + ssao.gather_push_constant.pass_coord_offset[1] = i / 2; + ssao.gather_push_constant.pass_uv_offset[0] = ((i % 2) - 0.0) / p_settings.full_screen_size.x; + ssao.gather_push_constant.pass_uv_offset[1] = ((i / 2) - 0.0) / p_settings.full_screen_size.y; + ssao.gather_push_constant.pass = i; + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_uniform_set_from_image(p_ao_slices[i]), 2); + RD::get_singleton()->compute_list_set_push_constant(p_compute_list, &ssao.gather_push_constant, sizeof(SSAOGatherPushConstant)); + + Size2i size = Size2i(p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1), p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)); + + RD::get_singleton()->compute_list_dispatch_threads(p_compute_list, size.x, size.y, 1); + } + RD::get_singleton()->compute_list_add_barrier(p_compute_list); +} + +void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_depth_mipmaps_texture, const Vector<RID> &p_depth_mipmaps, RID p_ao, const Vector<RID> p_ao_slices, RID p_ao_pong, const Vector<RID> p_ao_pong_slices, RID p_upscale_buffer, RID p_importance_map, RID p_importance_map_pong, const CameraMatrix &p_projection, const SSAOSettings &p_settings, bool p_invalidate_uniform_sets) { + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->draw_command_begin_label("SSAO"); + /* FIRST PASS */ + // Downsample and deinterleave the depth buffer. + { + RD::get_singleton()->draw_command_begin_label("Downsample Depth"); + if (p_invalidate_uniform_sets) { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 0; + u.ids.push_back(p_depth_mipmaps[1]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.ids.push_back(p_depth_mipmaps[2]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.ids.push_back(p_depth_mipmaps[3]); + uniforms.push_back(u); + } + ssao.downsample_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.downsample_shader.version_get_shader(ssao.downsample_shader_version, 2), 2); + } + + float depth_linearize_mul = -p_projection.matrix[3][2]; + float depth_linearize_add = p_projection.matrix[2][2]; + if (depth_linearize_mul * depth_linearize_add < 0) { + depth_linearize_add = -depth_linearize_add; + } + + ssao.downsample_push_constant.orthogonal = p_projection.is_orthogonal(); + ssao.downsample_push_constant.z_near = depth_linearize_mul; + ssao.downsample_push_constant.z_far = depth_linearize_add; + if (ssao.downsample_push_constant.orthogonal) { + ssao.downsample_push_constant.z_near = p_projection.get_z_near(); + ssao.downsample_push_constant.z_far = p_projection.get_z_far(); + } + ssao.downsample_push_constant.pixel_size[0] = 1.0 / p_settings.full_screen_size.x; + ssao.downsample_push_constant.pixel_size[1] = 1.0 / p_settings.full_screen_size.y; + ssao.downsample_push_constant.radius_sq = p_settings.radius * p_settings.radius; + + int downsample_pipeline = SSAO_DOWNSAMPLE; + if (p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) { + downsample_pipeline = SSAO_DOWNSAMPLE_HALF; + } else if (p_settings.quality > RS::ENV_SSAO_QUALITY_MEDIUM) { + downsample_pipeline = SSAO_DOWNSAMPLE_MIPMAP; + } + + if (p_settings.half_size) { + downsample_pipeline++; + } + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[downsample_pipeline]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_buffer), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_depth_mipmaps[0]), 1); + if (p_settings.quality > RS::ENV_SSAO_QUALITY_MEDIUM) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, ssao.downsample_uniform_set, 2); + } + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.downsample_push_constant, sizeof(SSAODownsamplePushConstant)); + + Size2i size(MAX(1, p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1)), MAX(1, p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1))); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, size.x, size.y, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + RD::get_singleton()->draw_command_end_label(); // Downsample SSAO + } + + /* SECOND PASS */ + // Sample SSAO + { + RD::get_singleton()->draw_command_begin_label("Gather Samples"); + ssao.gather_push_constant.screen_size[0] = p_settings.full_screen_size.x; + ssao.gather_push_constant.screen_size[1] = p_settings.full_screen_size.y; + + ssao.gather_push_constant.half_screen_pixel_size[0] = 1.0 / p_settings.half_screen_size.x; + ssao.gather_push_constant.half_screen_pixel_size[1] = 1.0 / p_settings.half_screen_size.y; + float tan_half_fov_x = 1.0 / p_projection.matrix[0][0]; + float tan_half_fov_y = 1.0 / p_projection.matrix[1][1]; + ssao.gather_push_constant.NDC_to_view_mul[0] = tan_half_fov_x * 2.0; + ssao.gather_push_constant.NDC_to_view_mul[1] = tan_half_fov_y * -2.0; + ssao.gather_push_constant.NDC_to_view_add[0] = tan_half_fov_x * -1.0; + ssao.gather_push_constant.NDC_to_view_add[1] = tan_half_fov_y; + ssao.gather_push_constant.is_orthogonal = p_projection.is_orthogonal(); + + ssao.gather_push_constant.half_screen_pixel_size_x025[0] = ssao.gather_push_constant.half_screen_pixel_size[0] * 0.25; + ssao.gather_push_constant.half_screen_pixel_size_x025[1] = ssao.gather_push_constant.half_screen_pixel_size[1] * 0.25; + + float radius_near_limit = (p_settings.radius * 1.2f); + if (p_settings.quality <= RS::ENV_SSAO_QUALITY_LOW) { + radius_near_limit *= 1.50f; + + if (p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) { + ssao.gather_push_constant.radius *= 0.8f; + } + if (p_settings.half_size) { + ssao.gather_push_constant.radius *= 0.5f; + } + } + radius_near_limit /= tan_half_fov_y; + ssao.gather_push_constant.radius = p_settings.radius; + ssao.gather_push_constant.intensity = p_settings.intensity; + ssao.gather_push_constant.shadow_power = p_settings.power; + ssao.gather_push_constant.shadow_clamp = 0.98; + ssao.gather_push_constant.fade_out_mul = -1.0 / (p_settings.fadeout_to - p_settings.fadeout_from); + ssao.gather_push_constant.fade_out_add = p_settings.fadeout_from / (p_settings.fadeout_to - p_settings.fadeout_from) + 1.0; + ssao.gather_push_constant.horizon_angle_threshold = p_settings.horizon; + ssao.gather_push_constant.inv_radius_near_limit = 1.0f / radius_near_limit; + ssao.gather_push_constant.neg_inv_radius = -1.0 / ssao.gather_push_constant.radius; + + ssao.gather_push_constant.load_counter_avg_div = 9.0 / float((p_settings.quarter_screen_size.x) * (p_settings.quarter_screen_size.y) * 255); + ssao.gather_push_constant.adaptive_sample_limit = p_settings.adaptive_target; + + ssao.gather_push_constant.detail_intensity = p_settings.detail; + ssao.gather_push_constant.quality = MAX(0, p_settings.quality - 1); + ssao.gather_push_constant.size_multiplier = p_settings.half_size ? 2 : 1; + + if (p_invalidate_uniform_sets) { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 0; + u.ids.push_back(ssao.mirror_sampler); + u.ids.push_back(p_depth_mipmaps_texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.ids.push_back(p_normal_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 2; + u.ids.push_back(ssao.gather_constants_buffer); + uniforms.push_back(u); + } + ssao.gather_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.gather_shader.version_get_shader(ssao.gather_shader_version, 0), 0); + } + + if (p_invalidate_uniform_sets) { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 0; + u.ids.push_back(p_ao_pong); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 1; + u.ids.push_back(default_sampler); + u.ids.push_back(p_importance_map); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + u.ids.push_back(ssao.importance_map_load_counter); + uniforms.push_back(u); + } + ssao.importance_map_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.gather_shader.version_get_shader(ssao.gather_shader_version, 2), 1); + } + + if (p_settings.quality == RS::ENV_SSAO_QUALITY_ULTRA) { + RD::get_singleton()->draw_command_begin_label("Generate Importance Map"); + ssao.importance_map_push_constant.half_screen_pixel_size[0] = 1.0 / p_settings.half_screen_size.x; + ssao.importance_map_push_constant.half_screen_pixel_size[1] = 1.0 / p_settings.half_screen_size.y; + ssao.importance_map_push_constant.intensity = p_settings.intensity; + ssao.importance_map_push_constant.power = p_settings.power; + //base pass + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER_BASE]); + gather_ssao(compute_list, p_ao_pong_slices, p_settings, true); + //generate importance map + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GENERATE_IMPORTANCE_MAP]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao_pong), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map), 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + //process importance map A + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_PROCESS_IMPORTANCE_MAPA]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_importance_map), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map_pong), 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + //process Importance Map B + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_PROCESS_IMPORTANCE_MAPB]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_importance_map_pong), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map), 1); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, ssao.counter_uniform_set, 2); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER_ADAPTIVE]); + RD::get_singleton()->draw_command_end_label(); // Importance Map + } else { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER]); + } + + gather_ssao(compute_list, p_ao_slices, p_settings, false); + RD::get_singleton()->draw_command_end_label(); // Gather SSAO + } + + // /* THIRD PASS */ + // // Blur + // + { + RD::get_singleton()->draw_command_begin_label("Edge Aware Blur"); + ssao.blur_push_constant.edge_sharpness = 1.0 - p_settings.sharpness; + ssao.blur_push_constant.half_screen_pixel_size[0] = 1.0 / p_settings.half_screen_size.x; + ssao.blur_push_constant.half_screen_pixel_size[1] = 1.0 / p_settings.half_screen_size.y; + + int blur_passes = p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW ? p_settings.blur_passes : 1; + + for (int pass = 0; pass < blur_passes; pass++) { + int blur_pipeline = SSAO_BLUR_PASS; + if (p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW) { + if (pass < blur_passes - 2) { + blur_pipeline = SSAO_BLUR_PASS_WIDE; + } + blur_pipeline = SSAO_BLUR_PASS_SMART; + } + + for (int i = 0; i < 4; i++) { + if ((p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) && ((i == 1) || (i == 2))) { + continue; + } + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[blur_pipeline]); + if (pass % 2 == 0) { + if (p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao_slices[i]), 0); + } else { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture_and_sampler(p_ao_slices[i], ssao.mirror_sampler), 0); + } + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_ao_pong_slices[i]), 1); + } else { + if (p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao_pong_slices[i]), 0); + } else { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture_and_sampler(p_ao_pong_slices[i], ssao.mirror_sampler), 0); + } + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_ao_slices[i]), 1); + } + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.blur_push_constant, sizeof(SSAOBlurPushConstant)); + + Size2i size(p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1), p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, size.x, size.y, 1); + } + + if (p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW) { + RD::get_singleton()->compute_list_add_barrier(compute_list); + } + } + RD::get_singleton()->draw_command_end_label(); // Blur + } + + /* FOURTH PASS */ + // Interleave buffers + // back to full size + { + RD::get_singleton()->draw_command_begin_label("Interleave Buffers"); + ssao.interleave_push_constant.inv_sharpness = 1.0 - p_settings.sharpness; + ssao.interleave_push_constant.pixel_size[0] = 1.0 / p_settings.full_screen_size.x; + ssao.interleave_push_constant.pixel_size[1] = 1.0 / p_settings.full_screen_size.y; + ssao.interleave_push_constant.size_modifier = uint32_t(p_settings.half_size ? 4 : 2); + + int interleave_pipeline = SSAO_INTERLEAVE_HALF; + if (p_settings.quality == RS::ENV_SSAO_QUALITY_LOW) { + interleave_pipeline = SSAO_INTERLEAVE; + } else if (p_settings.quality >= RS::ENV_SSAO_QUALITY_MEDIUM) { + interleave_pipeline = SSAO_INTERLEAVE_SMART; + } + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[interleave_pipeline]); + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_upscale_buffer), 0); + if (p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW && p_settings.blur_passes % 2 == 0) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao), 1); + } else { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao_pong), 1); + } + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.interleave_push_constant, sizeof(SSAOInterleavePushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.full_screen_size.x, p_settings.full_screen_size.y, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + RD::get_singleton()->draw_command_end_label(); // Interleave + } + RD::get_singleton()->draw_command_end_label(); //SSAO + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_TRANSFER); //wait for upcoming transfer + + int zero[1] = { 0 }; + RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero, 0); //no barrier +} + +void EffectsRD::roughness_limit(RID p_source_normal, RID p_roughness, const Size2i &p_size, float p_curve) { + roughness_limiter.push_constant.screen_size[0] = p_size.x; + roughness_limiter.push_constant.screen_size[1] = p_size.y; + roughness_limiter.push_constant.curve = p_curve; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, roughness_limiter.pipeline); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_normal), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_roughness), 1); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &roughness_limiter.push_constant, sizeof(RoughnessLimiterPushConstant)); //not used but set anyway + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_size.x, p_size.y, 1); + + RD::get_singleton()->compute_list_end(); +} + +void EffectsRD::cubemap_roughness(RID p_source_rd_texture, RID p_dest_framebuffer, uint32_t p_face_id, uint32_t p_sample_count, float p_roughness, float p_size) { + zeromem(&roughness.push_constant, sizeof(CubemapRoughnessPushConstant)); + + roughness.push_constant.face_id = p_face_id > 9 ? 0 : p_face_id; + roughness.push_constant.roughness = p_roughness; + roughness.push_constant.sample_count = p_sample_count; + roughness.push_constant.use_direct_write = p_roughness == 0.0; + roughness.push_constant.face_size = p_size; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, roughness.pipeline); + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_framebuffer), 1); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &roughness.push_constant, sizeof(CubemapRoughnessPushConstant)); + + int x_groups = (p_size - 1) / 8 + 1; + int y_groups = (p_size - 1) / 8 + 1; + + RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, p_face_id > 9 ? 6 : 1); + + RD::get_singleton()->compute_list_end(); +} + +void EffectsRD::cubemap_downsample(RID p_source_cubemap, RID p_dest_cubemap, const Size2i &p_size) { + cubemap_downsampler.push_constant.face_size = p_size.x; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, cubemap_downsampler.pipeline); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_cubemap), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_cubemap), 1); + + int x_groups = (p_size.x - 1) / 8 + 1; + int y_groups = (p_size.y - 1) / 8 + 1; + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &cubemap_downsampler.push_constant, sizeof(CubemapDownsamplerPushConstant)); + + RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 6); // one z_group for each face + + RD::get_singleton()->compute_list_end(); +} + +void EffectsRD::cubemap_filter(RID p_source_cubemap, Vector<RID> p_dest_cubemap, bool p_use_array) { + Vector<RD::Uniform> uniforms; + for (int i = 0; i < p_dest_cubemap.size(); i++) { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = i; + u.ids.push_back(p_dest_cubemap[i]); + uniforms.push_back(u); + } + if (RD::get_singleton()->uniform_set_is_valid(filter.image_uniform_set)) { + RD::get_singleton()->free(filter.image_uniform_set); + } + filter.image_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, filter.shader.version_get_shader(filter.shader_version, 0), 2); + + int pipeline = p_use_array ? FILTER_MODE_HIGH_QUALITY_ARRAY : FILTER_MODE_HIGH_QUALITY; + pipeline = filter.use_high_quality ? pipeline : pipeline + 1; + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, filter.pipelines[pipeline]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_cubemap, true), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, filter.uniform_set, 1); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, filter.image_uniform_set, 2); + + int x_groups = p_use_array ? 1792 : 342; // (128 * 128 * 7) / 64 : (128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2) / 64 + + RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, 6, 1); // one y_group for each face + + RD::get_singleton()->compute_list_end(); +} + +void EffectsRD::render_sky(RD::DrawListID p_list, float p_time, RID p_fb, RID p_samplers, RID p_fog, PipelineCacheRD *p_pipeline, RID p_uniform_set, RID p_texture_set, const CameraMatrix &p_camera, const Basis &p_orientation, float p_multiplier, const Vector3 &p_position) { + SkyPushConstant sky_push_constant; + + zeromem(&sky_push_constant, sizeof(SkyPushConstant)); + + sky_push_constant.proj[0] = p_camera.matrix[2][0]; + sky_push_constant.proj[1] = p_camera.matrix[0][0]; + sky_push_constant.proj[2] = p_camera.matrix[2][1]; + sky_push_constant.proj[3] = p_camera.matrix[1][1]; + sky_push_constant.position[0] = p_position.x; + sky_push_constant.position[1] = p_position.y; + sky_push_constant.position[2] = p_position.z; + sky_push_constant.multiplier = p_multiplier; + sky_push_constant.time = p_time; + store_transform_3x3(p_orientation, sky_push_constant.orientation); + + RenderingDevice::FramebufferFormatID fb_format = RD::get_singleton()->framebuffer_get_format(p_fb); + + RD::DrawListID draw_list = p_list; + + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, p_pipeline->get_render_pipeline(RD::INVALID_ID, fb_format)); + + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, p_samplers, 0); + if (p_uniform_set.is_valid()) { //material may not have uniform set + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, p_uniform_set, 1); + } + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, p_texture_set, 2); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, p_fog, 3); + + RD::get_singleton()->draw_list_bind_index_array(draw_list, index_array); + + RD::get_singleton()->draw_list_set_push_constant(draw_list, &sky_push_constant, sizeof(SkyPushConstant)); + + RD::get_singleton()->draw_list_draw(draw_list, true); +} + +void EffectsRD::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples, uint32_t p_barrier) { + ResolvePushConstant push_constant; + push_constant.screen_size[0] = p_screen_size.x; + push_constant.screen_size[1] = p_screen_size.y; + push_constant.samples = p_samples; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, resolve.pipelines[p_source_giprobe.is_valid() ? RESOLVE_MODE_GI_GIPROBE : RESOLVE_MODE_GI]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture_pair(p_source_depth, p_source_normal_roughness), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_dest_depth, p_dest_normal_roughness), 1); + if (p_source_giprobe.is_valid()) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_giprobe), 2); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_giprobe), 3); + } + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ResolvePushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.x, p_screen_size.y, 1); + + RD::get_singleton()->compute_list_end(p_barrier); +} + +void EffectsRD::sort_buffer(RID p_uniform_set, int p_size) { + Sort::PushConstant push_constant; + push_constant.total_elements = p_size; + + bool done = true; + + int numThreadGroups = ((p_size - 1) >> 9) + 1; + + if (numThreadGroups > 1) { + done = false; + } + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sort.pipelines[SORT_MODE_BLOCK]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, p_uniform_set, 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(Sort::PushConstant)); + RD::get_singleton()->compute_list_dispatch(compute_list, numThreadGroups, 1, 1); + + int presorted = 512; + + while (!done) { + RD::get_singleton()->compute_list_add_barrier(compute_list); + + done = true; + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sort.pipelines[SORT_MODE_STEP]); + + numThreadGroups = 0; + + if (p_size > presorted) { + if (p_size > presorted * 2) { + done = false; + } + + int pow2 = presorted; + while (pow2 < p_size) { + pow2 *= 2; + } + numThreadGroups = pow2 >> 9; + } + + unsigned int nMergeSize = presorted * 2; + + for (unsigned int nMergeSubSize = nMergeSize >> 1; nMergeSubSize > 256; nMergeSubSize = nMergeSubSize >> 1) { + push_constant.job_params[0] = nMergeSubSize; + if (nMergeSubSize == nMergeSize >> 1) { + push_constant.job_params[1] = (2 * nMergeSubSize - 1); + push_constant.job_params[2] = -1; + } else { + push_constant.job_params[1] = nMergeSubSize; + push_constant.job_params[2] = 1; + } + push_constant.job_params[3] = 0; + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(Sort::PushConstant)); + RD::get_singleton()->compute_list_dispatch(compute_list, numThreadGroups, 1, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + } + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sort.pipelines[SORT_MODE_INNER]); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(Sort::PushConstant)); + RD::get_singleton()->compute_list_dispatch(compute_list, numThreadGroups, 1, 1); + + presorted *= 2; + } + + RD::get_singleton()->compute_list_end(); +} + +EffectsRD::EffectsRD() { + { // Initialize copy + Vector<String> copy_modes; + copy_modes.push_back("\n#define MODE_GAUSSIAN_BLUR\n"); + copy_modes.push_back("\n#define MODE_GAUSSIAN_BLUR\n#define DST_IMAGE_8BIT\n"); + copy_modes.push_back("\n#define MODE_GAUSSIAN_GLOW\n"); + copy_modes.push_back("\n#define MODE_GAUSSIAN_GLOW\n#define GLOW_USE_AUTO_EXPOSURE\n"); + copy_modes.push_back("\n#define MODE_SIMPLE_COPY\n"); + copy_modes.push_back("\n#define MODE_SIMPLE_COPY\n#define DST_IMAGE_8BIT\n"); + copy_modes.push_back("\n#define MODE_SIMPLE_COPY_DEPTH\n"); + copy_modes.push_back("\n#define MODE_SET_COLOR\n"); + copy_modes.push_back("\n#define MODE_SET_COLOR\n#define DST_IMAGE_8BIT\n"); + copy_modes.push_back("\n#define MODE_MIPMAP\n"); + copy_modes.push_back("\n#define MODE_LINEARIZE_DEPTH_COPY\n"); + copy_modes.push_back("\n#define MODE_CUBEMAP_TO_PANORAMA\n"); + copy_modes.push_back("\n#define MODE_CUBEMAP_ARRAY_TO_PANORAMA\n"); + + copy.shader.initialize(copy_modes); + zeromem(©.push_constant, sizeof(CopyPushConstant)); + copy.shader_version = copy.shader.version_create(); + + for (int i = 0; i < COPY_MODE_MAX; i++) { + copy.pipelines[i] = RD::get_singleton()->compute_pipeline_create(copy.shader.version_get_shader(copy.shader_version, i)); + } + } + { + Vector<String> copy_modes; + copy_modes.push_back("\n"); + copy_modes.push_back("\n#define MODE_PANORAMA_TO_DP\n"); + copy_modes.push_back("\n#define MODE_TWO_SOURCES\n"); + + copy_to_fb.shader.initialize(copy_modes); + + copy_to_fb.shader_version = copy_to_fb.shader.version_create(); + + //use additive + + for (int i = 0; i < COPY_TO_FB_MAX; i++) { + copy_to_fb.pipelines[i].setup(copy_to_fb.shader.version_get_shader(copy_to_fb.shader_version, i), RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), RD::PipelineColorBlendState::create_disabled(), 0); + } + } + + { + // Initialize roughness + Vector<String> cubemap_roughness_modes; + cubemap_roughness_modes.push_back(""); + roughness.shader.initialize(cubemap_roughness_modes); + + roughness.shader_version = roughness.shader.version_create(); + + roughness.pipeline = RD::get_singleton()->compute_pipeline_create(roughness.shader.version_get_shader(roughness.shader_version, 0)); + } + + { + // Initialize tonemapper + Vector<String> tonemap_modes; + tonemap_modes.push_back("\n"); + tonemap_modes.push_back("\n#define USE_GLOW_FILTER_BICUBIC\n"); + tonemap_modes.push_back("\n#define USE_1D_LUT\n"); + tonemap_modes.push_back("\n#define USE_GLOW_FILTER_BICUBIC\n#define USE_1D_LUT\n"); + + tonemap.shader.initialize(tonemap_modes); + + tonemap.shader_version = tonemap.shader.version_create(); + + for (int i = 0; i < TONEMAP_MODE_MAX; i++) { + tonemap.pipelines[i].setup(tonemap.shader.version_get_shader(tonemap.shader_version, i), RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), RD::PipelineColorBlendState::create_disabled(), 0); + } + } + + { + // Initialize luminance_reduce + Vector<String> luminance_reduce_modes; + luminance_reduce_modes.push_back("\n#define READ_TEXTURE\n"); + luminance_reduce_modes.push_back("\n"); + luminance_reduce_modes.push_back("\n#define WRITE_LUMINANCE\n"); + + luminance_reduce.shader.initialize(luminance_reduce_modes); + + luminance_reduce.shader_version = luminance_reduce.shader.version_create(); + + for (int i = 0; i < LUMINANCE_REDUCE_MAX; i++) { + luminance_reduce.pipelines[i] = RD::get_singleton()->compute_pipeline_create(luminance_reduce.shader.version_get_shader(luminance_reduce.shader_version, i)); + } + } + + { + // Initialize copier + Vector<String> copy_modes; + copy_modes.push_back("\n"); + + cube_to_dp.shader.initialize(copy_modes); + + cube_to_dp.shader_version = cube_to_dp.shader.version_create(); + RID shader = cube_to_dp.shader.version_get_shader(cube_to_dp.shader_version, 0); + RD::PipelineDepthStencilState dss; + dss.enable_depth_test = true; + dss.depth_compare_operator = RD::COMPARE_OP_ALWAYS; + dss.enable_depth_write = true; + cube_to_dp.pipeline.setup(shader, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), dss, RD::PipelineColorBlendState(), 0); + } + + { + // Initialize bokeh + Vector<String> bokeh_modes; + bokeh_modes.push_back("\n#define MODE_GEN_BLUR_SIZE\n"); + bokeh_modes.push_back("\n#define MODE_BOKEH_BOX\n"); + bokeh_modes.push_back("\n#define MODE_BOKEH_HEXAGONAL\n"); + bokeh_modes.push_back("\n#define MODE_BOKEH_CIRCULAR\n"); + bokeh_modes.push_back("\n#define MODE_COMPOSITE_BOKEH\n"); + + bokeh.shader.initialize(bokeh_modes); + + bokeh.shader_version = bokeh.shader.version_create(); + + for (int i = 0; i < BOKEH_MAX; i++) { + bokeh.pipelines[i] = RD::get_singleton()->compute_pipeline_create(bokeh.shader.version_get_shader(bokeh.shader_version, i)); + } + } + + { + // Initialize ssao + + RD::SamplerState sampler; + sampler.mag_filter = RD::SAMPLER_FILTER_NEAREST; + sampler.min_filter = RD::SAMPLER_FILTER_NEAREST; + sampler.mip_filter = RD::SAMPLER_FILTER_NEAREST; + sampler.repeat_u = RD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT; + sampler.repeat_v = RD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT; + sampler.repeat_w = RD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT; + sampler.max_lod = 4; + + ssao.mirror_sampler = RD::get_singleton()->sampler_create(sampler); + + uint32_t pipeline = 0; + { + Vector<String> ssao_modes; + ssao_modes.push_back("\n"); + ssao_modes.push_back("\n#define USE_HALF_SIZE\n"); + ssao_modes.push_back("\n#define GENERATE_MIPS\n"); + ssao_modes.push_back("\n#define GENERATE_MIPS\n#define USE_HALF_SIZE"); + ssao_modes.push_back("\n#define USE_HALF_BUFFERS\n"); + ssao_modes.push_back("\n#define USE_HALF_BUFFERS\n#define USE_HALF_SIZE"); + + ssao.downsample_shader.initialize(ssao_modes); + + ssao.downsample_shader_version = ssao.downsample_shader.version_create(); + + for (int i = 0; i <= SSAO_DOWNSAMPLE_HALF_RES_HALF; i++) { + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.downsample_shader.version_get_shader(ssao.downsample_shader_version, i)); + pipeline++; + } + } + { + Vector<String> ssao_modes; + + ssao_modes.push_back("\n"); + ssao_modes.push_back("\n#define SSAO_BASE\n"); + ssao_modes.push_back("\n#define ADAPTIVE\n"); + + ssao.gather_shader.initialize(ssao_modes); + + ssao.gather_shader_version = ssao.gather_shader.version_create(); + + for (int i = SSAO_GATHER; i <= SSAO_GATHER_ADAPTIVE; i++) { + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.gather_shader.version_get_shader(ssao.gather_shader_version, i - SSAO_GATHER)); + pipeline++; + } + + ssao.gather_constants_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(SSAOGatherConstants)); + SSAOGatherConstants gather_constants; + + const int sub_pass_count = 5; + for (int pass = 0; pass < 4; pass++) { + for (int subPass = 0; subPass < sub_pass_count; subPass++) { + int a = pass; + int b = subPass; + + int spmap[5]{ 0, 1, 4, 3, 2 }; + b = spmap[subPass]; + + float ca, sa; + float angle0 = (float(a) + float(b) / float(sub_pass_count)) * Math_PI * 0.5f; + + ca = Math::cos(angle0); + sa = Math::sin(angle0); + + float scale = 1.0f + (a - 1.5f + (b - (sub_pass_count - 1.0f) * 0.5f) / float(sub_pass_count)) * 0.07f; + + gather_constants.rotation_matrices[pass * 20 + subPass * 4 + 0] = scale * ca; + gather_constants.rotation_matrices[pass * 20 + subPass * 4 + 1] = scale * -sa; + gather_constants.rotation_matrices[pass * 20 + subPass * 4 + 2] = -scale * sa; + gather_constants.rotation_matrices[pass * 20 + subPass * 4 + 3] = -scale * ca; + } + } + + RD::get_singleton()->buffer_update(ssao.gather_constants_buffer, 0, sizeof(SSAOGatherConstants), &gather_constants); + } + { + Vector<String> ssao_modes; + ssao_modes.push_back("\n#define GENERATE_MAP\n"); + ssao_modes.push_back("\n#define PROCESS_MAPA\n"); + ssao_modes.push_back("\n#define PROCESS_MAPB\n"); + + ssao.importance_map_shader.initialize(ssao_modes); + + ssao.importance_map_shader_version = ssao.importance_map_shader.version_create(); + + for (int i = SSAO_GENERATE_IMPORTANCE_MAP; i <= SSAO_PROCESS_IMPORTANCE_MAPB; i++) { + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.importance_map_shader.version_get_shader(ssao.importance_map_shader_version, i - SSAO_GENERATE_IMPORTANCE_MAP)); + + pipeline++; + } + ssao.importance_map_load_counter = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t)); + int zero[1] = { 0 }; + RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero); + RD::get_singleton()->set_resource_name(ssao.importance_map_load_counter, "Importance Map Load Counter"); + + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 0; + u.ids.push_back(ssao.importance_map_load_counter); + uniforms.push_back(u); + } + ssao.counter_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.importance_map_shader.version_get_shader(ssao.importance_map_shader_version, 2), 2); + RD::get_singleton()->set_resource_name(ssao.counter_uniform_set, "Load Counter Uniform Set"); + } + { + Vector<String> ssao_modes; + ssao_modes.push_back("\n#define MODE_NON_SMART\n"); + ssao_modes.push_back("\n#define MODE_SMART\n"); + ssao_modes.push_back("\n#define MODE_WIDE\n"); + + ssao.blur_shader.initialize(ssao_modes); + + ssao.blur_shader_version = ssao.blur_shader.version_create(); + + for (int i = SSAO_BLUR_PASS; i <= SSAO_BLUR_PASS_WIDE; i++) { + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.blur_shader.version_get_shader(ssao.blur_shader_version, i - SSAO_BLUR_PASS)); + + pipeline++; + } + } + { + Vector<String> ssao_modes; + ssao_modes.push_back("\n#define MODE_NON_SMART\n"); + ssao_modes.push_back("\n#define MODE_SMART\n"); + ssao_modes.push_back("\n#define MODE_HALF\n"); + + ssao.interleave_shader.initialize(ssao_modes); + + ssao.interleave_shader_version = ssao.interleave_shader.version_create(); + for (int i = SSAO_INTERLEAVE; i <= SSAO_INTERLEAVE_HALF; i++) { + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.interleave_shader.version_get_shader(ssao.interleave_shader_version, i - SSAO_INTERLEAVE)); + RD::get_singleton()->set_resource_name(ssao.pipelines[pipeline], "Interleave Pipeline " + itos(i)); + pipeline++; + } + } + + ERR_FAIL_COND(pipeline != SSAO_MAX); + } + + { + // Initialize roughness limiter + Vector<String> shader_modes; + shader_modes.push_back(""); + + roughness_limiter.shader.initialize(shader_modes); + + roughness_limiter.shader_version = roughness_limiter.shader.version_create(); + + roughness_limiter.pipeline = RD::get_singleton()->compute_pipeline_create(roughness_limiter.shader.version_get_shader(roughness_limiter.shader_version, 0)); + } + + { + //Initialize cubemap downsampler + Vector<String> cubemap_downsampler_modes; + cubemap_downsampler_modes.push_back(""); + cubemap_downsampler.shader.initialize(cubemap_downsampler_modes); + + cubemap_downsampler.shader_version = cubemap_downsampler.shader.version_create(); + + cubemap_downsampler.pipeline = RD::get_singleton()->compute_pipeline_create(cubemap_downsampler.shader.version_get_shader(cubemap_downsampler.shader_version, 0)); + } + + { + // Initialize cubemap filter + filter.use_high_quality = GLOBAL_GET("rendering/reflections/sky_reflections/fast_filter_high_quality"); + + Vector<String> cubemap_filter_modes; + cubemap_filter_modes.push_back("\n#define USE_HIGH_QUALITY\n"); + cubemap_filter_modes.push_back("\n#define USE_LOW_QUALITY\n"); + cubemap_filter_modes.push_back("\n#define USE_HIGH_QUALITY\n#define USE_TEXTURE_ARRAY\n"); + cubemap_filter_modes.push_back("\n#define USE_LOW_QUALITY\n#define USE_TEXTURE_ARRAY\n"); + filter.shader.initialize(cubemap_filter_modes); + filter.shader_version = filter.shader.version_create(); + + for (int i = 0; i < FILTER_MODE_MAX; i++) { + filter.pipelines[i] = RD::get_singleton()->compute_pipeline_create(filter.shader.version_get_shader(filter.shader_version, i)); + } + + if (filter.use_high_quality) { + filter.coefficient_buffer = RD::get_singleton()->storage_buffer_create(sizeof(high_quality_coeffs)); + RD::get_singleton()->buffer_update(filter.coefficient_buffer, 0, sizeof(high_quality_coeffs), &high_quality_coeffs[0]); + } else { + filter.coefficient_buffer = RD::get_singleton()->storage_buffer_create(sizeof(low_quality_coeffs)); + RD::get_singleton()->buffer_update(filter.coefficient_buffer, 0, sizeof(low_quality_coeffs), &low_quality_coeffs[0]); + } + + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 0; + u.ids.push_back(filter.coefficient_buffer); + uniforms.push_back(u); + } + filter.uniform_set = RD::get_singleton()->uniform_set_create(uniforms, filter.shader.version_get_shader(filter.shader_version, filter.use_high_quality ? 0 : 1), 1); + } + + { + Vector<String> specular_modes; + specular_modes.push_back("\n#define MODE_MERGE\n"); + specular_modes.push_back("\n#define MODE_MERGE\n#define MODE_SSR\n"); + specular_modes.push_back("\n"); + specular_modes.push_back("\n#define MODE_SSR\n"); + + specular_merge.shader.initialize(specular_modes); + + specular_merge.shader_version = specular_merge.shader.version_create(); + + //use additive + + RD::PipelineColorBlendState::Attachment ba; + ba.enable_blend = true; + ba.src_color_blend_factor = RD::BLEND_FACTOR_ONE; + ba.dst_color_blend_factor = RD::BLEND_FACTOR_ONE; + ba.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + ba.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + ba.color_blend_op = RD::BLEND_OP_ADD; + ba.alpha_blend_op = RD::BLEND_OP_ADD; + + RD::PipelineColorBlendState blend_additive; + blend_additive.attachments.push_back(ba); + + for (int i = 0; i < SPECULAR_MERGE_MAX; i++) { + RD::PipelineColorBlendState blend_state; + if (i == SPECULAR_MERGE_ADDITIVE_ADD || i == SPECULAR_MERGE_ADDITIVE_SSR) { + blend_state = blend_additive; + } else { + blend_state = RD::PipelineColorBlendState::create_disabled(); + } + specular_merge.pipelines[i].setup(specular_merge.shader.version_get_shader(specular_merge.shader_version, i), RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state, 0); + } + } + + { + Vector<String> ssr_modes; + ssr_modes.push_back("\n"); + ssr_modes.push_back("\n#define MODE_ROUGH\n"); + + ssr.shader.initialize(ssr_modes); + + ssr.shader_version = ssr.shader.version_create(); + + for (int i = 0; i < SCREEN_SPACE_REFLECTION_MAX; i++) { + ssr.pipelines[i] = RD::get_singleton()->compute_pipeline_create(ssr.shader.version_get_shader(ssr.shader_version, i)); + } + } + + { + Vector<String> ssr_filter_modes; + ssr_filter_modes.push_back("\n"); + ssr_filter_modes.push_back("\n#define VERTICAL_PASS\n"); + + ssr_filter.shader.initialize(ssr_filter_modes); + + ssr_filter.shader_version = ssr_filter.shader.version_create(); + + for (int i = 0; i < SCREEN_SPACE_REFLECTION_FILTER_MAX; i++) { + ssr_filter.pipelines[i] = RD::get_singleton()->compute_pipeline_create(ssr_filter.shader.version_get_shader(ssr_filter.shader_version, i)); + } + } + + { + Vector<String> ssr_scale_modes; + ssr_scale_modes.push_back("\n"); + + ssr_scale.shader.initialize(ssr_scale_modes); + + ssr_scale.shader_version = ssr_scale.shader.version_create(); + + ssr_scale.pipeline = RD::get_singleton()->compute_pipeline_create(ssr_scale.shader.version_get_shader(ssr_scale.shader_version, 0)); + } + + { + Vector<String> sss_modes; + sss_modes.push_back("\n#define USE_11_SAMPLES\n"); + sss_modes.push_back("\n#define USE_17_SAMPLES\n"); + sss_modes.push_back("\n#define USE_25_SAMPLES\n"); + + sss.shader.initialize(sss_modes); + + sss.shader_version = sss.shader.version_create(); + + for (int i = 0; i < sss_modes.size(); i++) { + sss.pipelines[i] = RD::get_singleton()->compute_pipeline_create(sss.shader.version_get_shader(sss.shader_version, i)); + } + } + + { + Vector<String> resolve_modes; + resolve_modes.push_back("\n#define MODE_RESOLVE_GI\n"); + resolve_modes.push_back("\n#define MODE_RESOLVE_GI\n#define GIPROBE_RESOLVE\n"); + + resolve.shader.initialize(resolve_modes); + + resolve.shader_version = resolve.shader.version_create(); + + for (int i = 0; i < RESOLVE_MODE_MAX; i++) { + resolve.pipelines[i] = RD::get_singleton()->compute_pipeline_create(resolve.shader.version_get_shader(resolve.shader_version, i)); + } + } + + { + Vector<String> sort_modes; + sort_modes.push_back("\n#define MODE_SORT_BLOCK\n"); + sort_modes.push_back("\n#define MODE_SORT_STEP\n"); + sort_modes.push_back("\n#define MODE_SORT_INNER\n"); + + sort.shader.initialize(sort_modes); + + sort.shader_version = sort.shader.version_create(); + + for (int i = 0; i < SORT_MODE_MAX; i++) { + sort.pipelines[i] = RD::get_singleton()->compute_pipeline_create(sort.shader.version_get_shader(sort.shader_version, i)); + } + } + + RD::SamplerState sampler; + sampler.mag_filter = RD::SAMPLER_FILTER_LINEAR; + sampler.min_filter = RD::SAMPLER_FILTER_LINEAR; + sampler.max_lod = 0; + + default_sampler = RD::get_singleton()->sampler_create(sampler); + RD::get_singleton()->set_resource_name(default_sampler, "Default Linear Sampler"); + + sampler.min_filter = RD::SAMPLER_FILTER_LINEAR; + sampler.mip_filter = RD::SAMPLER_FILTER_LINEAR; + sampler.max_lod = 1e20; + + default_mipmap_sampler = RD::get_singleton()->sampler_create(sampler); + RD::get_singleton()->set_resource_name(default_mipmap_sampler, "Default MipMap Sampler"); + + { //create index array for copy shaders + Vector<uint8_t> pv; + pv.resize(6 * 4); + { + uint8_t *w = pv.ptrw(); + int *p32 = (int *)w; + p32[0] = 0; + p32[1] = 1; + p32[2] = 2; + p32[3] = 0; + p32[4] = 2; + p32[5] = 3; + } + index_buffer = RD::get_singleton()->index_buffer_create(6, RenderingDevice::INDEX_BUFFER_FORMAT_UINT32, pv); + index_array = RD::get_singleton()->index_array_create(index_buffer, 0, 6); + } +} + +EffectsRD::~EffectsRD() { + if (RD::get_singleton()->uniform_set_is_valid(filter.image_uniform_set)) { + RD::get_singleton()->free(filter.image_uniform_set); + } + + if (RD::get_singleton()->uniform_set_is_valid(filter.uniform_set)) { + RD::get_singleton()->free(filter.uniform_set); + } + + RD::get_singleton()->free(default_sampler); + RD::get_singleton()->free(default_mipmap_sampler); + RD::get_singleton()->free(index_buffer); //array gets freed as dependency + RD::get_singleton()->free(filter.coefficient_buffer); + + RD::get_singleton()->free(ssao.mirror_sampler); + RD::get_singleton()->free(ssao.gather_constants_buffer); + RD::get_singleton()->free(ssao.importance_map_load_counter); + + bokeh.shader.version_free(bokeh.shader_version); + copy.shader.version_free(copy.shader_version); + copy_to_fb.shader.version_free(copy_to_fb.shader_version); + cube_to_dp.shader.version_free(cube_to_dp.shader_version); + cubemap_downsampler.shader.version_free(cubemap_downsampler.shader_version); + filter.shader.version_free(filter.shader_version); + luminance_reduce.shader.version_free(luminance_reduce.shader_version); + resolve.shader.version_free(resolve.shader_version); + roughness.shader.version_free(roughness.shader_version); + roughness_limiter.shader.version_free(roughness_limiter.shader_version); + sort.shader.version_free(sort.shader_version); + specular_merge.shader.version_free(specular_merge.shader_version); + ssao.blur_shader.version_free(ssao.blur_shader_version); + ssao.gather_shader.version_free(ssao.gather_shader_version); + ssao.downsample_shader.version_free(ssao.downsample_shader_version); + ssao.interleave_shader.version_free(ssao.interleave_shader_version); + ssao.importance_map_shader.version_free(ssao.importance_map_shader_version); + ssr.shader.version_free(ssr.shader_version); + ssr_filter.shader.version_free(ssr_filter.shader_version); + ssr_scale.shader.version_free(ssr_scale.shader_version); + sss.shader.version_free(sss.shader_version); + tonemap.shader.version_free(tonemap.shader_version); +} diff --git a/servers/rendering/renderer_rd/effects_rd.h b/servers/rendering/renderer_rd/effects_rd.h new file mode 100644 index 0000000000..1ba25e301b --- /dev/null +++ b/servers/rendering/renderer_rd/effects_rd.h @@ -0,0 +1,761 @@ +/*************************************************************************/ +/* effects_rd.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef EFFECTS_RD_H +#define EFFECTS_RD_H + +#include "core/math/camera_matrix.h" +#include "servers/rendering/renderer_rd/pipeline_cache_rd.h" +#include "servers/rendering/renderer_rd/shaders/bokeh_dof.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/copy.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/copy_to_fb.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/cube_to_dp.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/cubemap_downsampler.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/cubemap_filter.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/cubemap_roughness.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/luminance_reduce.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/resolve.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/roughness_limiter.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/screen_space_reflection.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/screen_space_reflection_filter.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/screen_space_reflection_scale.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/sort.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/specular_merge.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/ssao.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/ssao_blur.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/ssao_downsample.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/ssao_interleave.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/subsurface_scattering.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/tonemap.glsl.gen.h" + +#include "servers/rendering_server.h" + +class EffectsRD { + enum CopyMode { + COPY_MODE_GAUSSIAN_COPY, + COPY_MODE_GAUSSIAN_COPY_8BIT, + COPY_MODE_GAUSSIAN_GLOW, + COPY_MODE_GAUSSIAN_GLOW_AUTO_EXPOSURE, + COPY_MODE_SIMPLY_COPY, + COPY_MODE_SIMPLY_COPY_8BIT, + COPY_MODE_SIMPLY_COPY_DEPTH, + COPY_MODE_SET_COLOR, + COPY_MODE_SET_COLOR_8BIT, + COPY_MODE_MIPMAP, + COPY_MODE_LINEARIZE_DEPTH, + COPY_MODE_CUBE_TO_PANORAMA, + COPY_MODE_CUBE_ARRAY_TO_PANORAMA, + COPY_MODE_MAX, + + }; + + enum { + COPY_FLAG_HORIZONTAL = (1 << 0), + COPY_FLAG_USE_COPY_SECTION = (1 << 1), + COPY_FLAG_USE_ORTHOGONAL_PROJECTION = (1 << 2), + COPY_FLAG_DOF_NEAR_FIRST_TAP = (1 << 3), + COPY_FLAG_GLOW_FIRST_PASS = (1 << 4), + COPY_FLAG_FLIP_Y = (1 << 5), + COPY_FLAG_FORCE_LUMINANCE = (1 << 6), + COPY_FLAG_ALL_SOURCE = (1 << 7), + COPY_FLAG_HIGH_QUALITY_GLOW = (1 << 8), + COPY_FLAG_ALPHA_TO_ONE = (1 << 9), + }; + + struct CopyPushConstant { + int32_t section[4]; + int32_t target[2]; + uint32_t flags; + uint32_t pad; + // Glow. + float glow_strength; + float glow_bloom; + float glow_hdr_threshold; + float glow_hdr_scale; + + float glow_exposure; + float glow_white; + float glow_luminance_cap; + float glow_auto_exposure_grey; + // DOF. + float camera_z_far; + float camera_z_near; + uint32_t pad2[2]; + //SET color + float set_color[4]; + }; + + struct Copy { + CopyPushConstant push_constant; + CopyShaderRD shader; + RID shader_version; + RID pipelines[COPY_MODE_MAX]; + + } copy; + + enum CopyToFBMode { + COPY_TO_FB_COPY, + COPY_TO_FB_COPY_PANORAMA_TO_DP, + COPY_TO_FB_COPY2, + COPY_TO_FB_MAX, + + }; + + struct CopyToFbPushConstant { + float section[4]; + float pixel_size[2]; + uint32_t flip_y; + uint32_t use_section; + + uint32_t force_luminance; + uint32_t alpha_to_zero; + uint32_t srgb; + uint32_t pad; + }; + + struct CopyToFb { + CopyToFbPushConstant push_constant; + CopyToFbShaderRD shader; + RID shader_version; + PipelineCacheRD pipelines[COPY_TO_FB_MAX]; + + } copy_to_fb; + + struct CubemapRoughnessPushConstant { + uint32_t face_id; + uint32_t sample_count; + float roughness; + uint32_t use_direct_write; + float face_size; + float pad[3]; + }; + + struct CubemapRoughness { + CubemapRoughnessPushConstant push_constant; + CubemapRoughnessShaderRD shader; + RID shader_version; + RID pipeline; + } roughness; + + enum TonemapMode { + TONEMAP_MODE_NORMAL, + TONEMAP_MODE_BICUBIC_GLOW_FILTER, + TONEMAP_MODE_1D_LUT, + TONEMAP_MODE_BICUBIC_GLOW_FILTER_1D_LUT, + TONEMAP_MODE_MAX + }; + + struct TonemapPushConstant { + float bcs[3]; + uint32_t use_bcs; + + uint32_t use_glow; + uint32_t use_auto_exposure; + uint32_t use_color_correction; + uint32_t tonemapper; + + uint32_t glow_texture_size[2]; + float glow_intensity; + uint32_t pad3; + + uint32_t glow_mode; + float glow_levels[7]; + + float exposure; + float white; + float auto_exposure_grey; + uint32_t pad2; + + float pixel_size[2]; + uint32_t use_fxaa; + uint32_t use_debanding; + }; + + /* tonemap actually writes to a framebuffer, which is + * better to do using the raster pipeline rather than + * compute, as that framebuffer might be in different formats + */ + struct Tonemap { + TonemapPushConstant push_constant; + TonemapShaderRD shader; + RID shader_version; + PipelineCacheRD pipelines[TONEMAP_MODE_MAX]; + } tonemap; + + enum LuminanceReduceMode { + LUMINANCE_REDUCE_READ, + LUMINANCE_REDUCE, + LUMINANCE_REDUCE_WRITE, + LUMINANCE_REDUCE_MAX + }; + + struct LuminanceReducePushConstant { + int32_t source_size[2]; + float max_luminance; + float min_luminance; + float exposure_adjust; + float pad[3]; + }; + + struct LuminanceReduce { + LuminanceReducePushConstant push_constant; + LuminanceReduceShaderRD shader; + RID shader_version; + RID pipelines[LUMINANCE_REDUCE_MAX]; + } luminance_reduce; + + struct CopyToDPPushConstant { + float z_far; + float z_near; + uint32_t z_flip; + uint32_t pad; + float screen_rect[4]; + }; + + struct CoptToDP { + CubeToDpShaderRD shader; + RID shader_version; + PipelineCacheRD pipeline; + } cube_to_dp; + + struct BokehPushConstant { + uint32_t size[2]; + float z_far; + float z_near; + + uint32_t orthogonal; + float blur_size; + float blur_scale; + uint32_t steps; + + uint32_t blur_near_active; + float blur_near_begin; + float blur_near_end; + uint32_t blur_far_active; + + float blur_far_begin; + float blur_far_end; + uint32_t second_pass; + uint32_t half_size; + + uint32_t use_jitter; + float jitter_seed; + uint32_t pad[2]; + }; + + enum BokehMode { + BOKEH_GEN_BLUR_SIZE, + BOKEH_GEN_BOKEH_BOX, + BOKEH_GEN_BOKEH_HEXAGONAL, + BOKEH_GEN_BOKEH_CIRCULAR, + BOKEH_COMPOSITE, + BOKEH_MAX + }; + + struct Bokeh { + BokehPushConstant push_constant; + BokehDofShaderRD shader; + RID shader_version; + RID pipelines[BOKEH_MAX]; + } bokeh; + + enum SSAOMode { + SSAO_DOWNSAMPLE, + SSAO_DOWNSAMPLE_HALF_RES, + SSAO_DOWNSAMPLE_MIPMAP, + SSAO_DOWNSAMPLE_MIPMAP_HALF_RES, + SSAO_DOWNSAMPLE_HALF, + SSAO_DOWNSAMPLE_HALF_RES_HALF, + SSAO_GATHER, + SSAO_GATHER_BASE, + SSAO_GATHER_ADAPTIVE, + SSAO_GENERATE_IMPORTANCE_MAP, + SSAO_PROCESS_IMPORTANCE_MAPA, + SSAO_PROCESS_IMPORTANCE_MAPB, + SSAO_BLUR_PASS, + SSAO_BLUR_PASS_SMART, + SSAO_BLUR_PASS_WIDE, + SSAO_INTERLEAVE, + SSAO_INTERLEAVE_SMART, + SSAO_INTERLEAVE_HALF, + SSAO_MAX + }; + + struct SSAODownsamplePushConstant { + float pixel_size[2]; + float z_far; + float z_near; + uint32_t orthogonal; + float radius_sq; + uint32_t pad[2]; + }; + + struct SSAOGatherPushConstant { + int32_t screen_size[2]; + int pass; + int quality; + + float half_screen_pixel_size[2]; + int size_multiplier; + float detail_intensity; + + float NDC_to_view_mul[2]; + float NDC_to_view_add[2]; + + float pad[2]; + float half_screen_pixel_size_x025[2]; + + float radius; + float intensity; + float shadow_power; + float shadow_clamp; + + float fade_out_mul; + float fade_out_add; + float horizon_angle_threshold; + float inv_radius_near_limit; + + bool is_orthogonal; + float neg_inv_radius; + float load_counter_avg_div; + float adaptive_sample_limit; + + int32_t pass_coord_offset[2]; + float pass_uv_offset[2]; + }; + + struct SSAOGatherConstants { + float rotation_matrices[80]; //5 vec4s * 4 + }; + + struct SSAOImportanceMapPushConstant { + float half_screen_pixel_size[2]; + float intensity; + float power; + }; + + struct SSAOBlurPushConstant { + float edge_sharpness; + float pad; + float half_screen_pixel_size[2]; + }; + + struct SSAOInterleavePushConstant { + float inv_sharpness; + uint32_t size_modifier; + float pixel_size[2]; + }; + + struct SSAO { + SSAODownsamplePushConstant downsample_push_constant; + SsaoDownsampleShaderRD downsample_shader; + RID downsample_shader_version; + RID downsample_uniform_set; + + SSAOGatherPushConstant gather_push_constant; + SsaoShaderRD gather_shader; + RID gather_shader_version; + RID gather_uniform_set; + RID gather_constants_buffer; + bool gather_initialized = false; + + SSAOImportanceMapPushConstant importance_map_push_constant; + SsaoImportanceMapShaderRD importance_map_shader; + RID importance_map_shader_version; + RID importance_map_load_counter; + RID importance_map_uniform_set; + RID counter_uniform_set; + + SSAOBlurPushConstant blur_push_constant; + SsaoBlurShaderRD blur_shader; + RID blur_shader_version; + + SSAOInterleavePushConstant interleave_push_constant; + SsaoInterleaveShaderRD interleave_shader; + RID interleave_shader_version; + + RID mirror_sampler; + RID pipelines[SSAO_MAX]; + } ssao; + + struct RoughnessLimiterPushConstant { + int32_t screen_size[2]; + float curve; + uint32_t pad; + }; + + struct RoughnessLimiter { + RoughnessLimiterPushConstant push_constant; + RoughnessLimiterShaderRD shader; + RID shader_version; + RID pipeline; + + } roughness_limiter; + + struct CubemapDownsamplerPushConstant { + uint32_t face_size; + float pad[3]; + }; + + struct CubemapDownsampler { + CubemapDownsamplerPushConstant push_constant; + CubemapDownsamplerShaderRD shader; + RID shader_version; + RID pipeline; + + } cubemap_downsampler; + + enum CubemapFilterMode { + FILTER_MODE_HIGH_QUALITY, + FILTER_MODE_LOW_QUALITY, + FILTER_MODE_HIGH_QUALITY_ARRAY, + FILTER_MODE_LOW_QUALITY_ARRAY, + FILTER_MODE_MAX, + }; + + struct CubemapFilter { + CubemapFilterShaderRD shader; + RID shader_version; + RID pipelines[FILTER_MODE_MAX]; + RID uniform_set; + RID image_uniform_set; + RID coefficient_buffer; + bool use_high_quality; + + } filter; + + struct SkyPushConstant { + float orientation[12]; + float proj[4]; + float position[3]; + float multiplier; + float time; + float pad[3]; + }; + + enum SpecularMergeMode { + SPECULAR_MERGE_ADD, + SPECULAR_MERGE_SSR, + SPECULAR_MERGE_ADDITIVE_ADD, + SPECULAR_MERGE_ADDITIVE_SSR, + SPECULAR_MERGE_MAX + }; + + /* Specular merge must be done using raster, rather than compute + * because it must continue the existing color buffer + */ + + struct SpecularMerge { + SpecularMergeShaderRD shader; + RID shader_version; + PipelineCacheRD pipelines[SPECULAR_MERGE_MAX]; + + } specular_merge; + + enum ScreenSpaceReflectionMode { + SCREEN_SPACE_REFLECTION_NORMAL, + SCREEN_SPACE_REFLECTION_ROUGH, + SCREEN_SPACE_REFLECTION_MAX, + }; + + struct ScreenSpaceReflectionPushConstant { + float proj_info[4]; + + int32_t screen_size[2]; + float camera_z_near; + float camera_z_far; + + int32_t num_steps; + float depth_tolerance; + float distance_fade; + float curve_fade_in; + + uint32_t orthogonal; + float filter_mipmap_levels; + uint32_t use_half_res; + uint8_t metallic_mask[4]; + + float projection[16]; + }; + + struct ScreenSpaceReflection { + ScreenSpaceReflectionPushConstant push_constant; + ScreenSpaceReflectionShaderRD shader; + RID shader_version; + RID pipelines[SCREEN_SPACE_REFLECTION_MAX]; + + } ssr; + + struct ScreenSpaceReflectionFilterPushConstant { + float proj_info[4]; + + uint32_t orthogonal; + float edge_tolerance; + int32_t increment; + uint32_t pad; + + int32_t screen_size[2]; + uint32_t vertical; + uint32_t steps; + }; + enum { + SCREEN_SPACE_REFLECTION_FILTER_HORIZONTAL, + SCREEN_SPACE_REFLECTION_FILTER_VERTICAL, + SCREEN_SPACE_REFLECTION_FILTER_MAX, + }; + + struct ScreenSpaceReflectionFilter { + ScreenSpaceReflectionFilterPushConstant push_constant; + ScreenSpaceReflectionFilterShaderRD shader; + RID shader_version; + RID pipelines[SCREEN_SPACE_REFLECTION_FILTER_MAX]; + } ssr_filter; + + struct ScreenSpaceReflectionScalePushConstant { + int32_t screen_size[2]; + float camera_z_near; + float camera_z_far; + + uint32_t orthogonal; + uint32_t filter; + uint32_t pad[2]; + }; + + struct ScreenSpaceReflectionScale { + ScreenSpaceReflectionScalePushConstant push_constant; + ScreenSpaceReflectionScaleShaderRD shader; + RID shader_version; + RID pipeline; + } ssr_scale; + + struct SubSurfaceScatteringPushConstant { + int32_t screen_size[2]; + float camera_z_far; + float camera_z_near; + + uint32_t vertical; + uint32_t orthogonal; + float unit_size; + float scale; + + float depth_scale; + uint32_t pad[3]; + }; + + struct SubSurfaceScattering { + SubSurfaceScatteringPushConstant push_constant; + SubsurfaceScatteringShaderRD shader; + RID shader_version; + RID pipelines[3]; //3 quality levels + } sss; + + struct ResolvePushConstant { + int32_t screen_size[2]; + int32_t samples; + uint32_t pad; + }; + + enum ResolveMode { + RESOLVE_MODE_GI, + RESOLVE_MODE_GI_GIPROBE, + RESOLVE_MODE_MAX + }; + + struct Resolve { + ResolvePushConstant push_constant; + ResolveShaderRD shader; + RID shader_version; + RID pipelines[RESOLVE_MODE_MAX]; //3 quality levels + } resolve; + + enum SortMode { + SORT_MODE_BLOCK, + SORT_MODE_STEP, + SORT_MODE_INNER, + SORT_MODE_MAX + }; + + struct Sort { + struct PushConstant { + uint32_t total_elements; + uint32_t pad[3]; + int32_t job_params[4]; + }; + + SortShaderRD shader; + RID shader_version; + RID pipelines[SORT_MODE_MAX]; + } sort; + + RID default_sampler; + RID default_mipmap_sampler; + RID index_buffer; + RID index_array; + + Map<RID, RID> texture_to_uniform_set_cache; + + Map<RID, RID> image_to_uniform_set_cache; + + struct TexturePair { + RID texture1; + RID texture2; + _FORCE_INLINE_ bool operator<(const TexturePair &p_pair) const { + if (texture1 == p_pair.texture1) { + return texture2 < p_pair.texture2; + } else { + return texture1 < p_pair.texture1; + } + } + }; + + struct TextureSamplerPair { + RID texture; + RID sampler; + _FORCE_INLINE_ bool operator<(const TextureSamplerPair &p_pair) const { + if (texture == p_pair.texture) { + return sampler < p_pair.sampler; + } else { + return texture < p_pair.texture; + } + } + }; + + Map<RID, RID> texture_to_compute_uniform_set_cache; + Map<TexturePair, RID> texture_pair_to_compute_uniform_set_cache; + Map<TexturePair, RID> image_pair_to_compute_uniform_set_cache; + Map<TextureSamplerPair, RID> texture_sampler_to_compute_uniform_set_cache; + + RID _get_uniform_set_from_image(RID p_texture); + RID _get_uniform_set_from_texture(RID p_texture, bool p_use_mipmaps = false); + RID _get_compute_uniform_set_from_texture(RID p_texture, bool p_use_mipmaps = false); + RID _get_compute_uniform_set_from_texture_and_sampler(RID p_texture, RID p_sampler); + RID _get_compute_uniform_set_from_texture_pair(RID p_texture, RID p_texture2, bool p_use_mipmaps = false); + RID _get_compute_uniform_set_from_image_pair(RID p_texture, RID p_texture2); + +public: + void copy_to_fb_rect(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2i &p_rect, bool p_flip_y = false, bool p_force_luminance = false, bool p_alpha_to_zero = false, bool p_srgb = false, RID p_secondary = RID()); + void copy_to_rect(RID p_source_rd_texture, RID p_dest_texture, const Rect2i &p_rect, bool p_flip_y = false, bool p_force_luminance = false, bool p_all_source = false, bool p_8_bit_dst = false, bool p_alpha_to_one = false); + void copy_cubemap_to_panorama(RID p_source_cube, RID p_dest_panorama, const Size2i &p_panorama_size, float p_lod, bool p_is_array); + void copy_depth_to_rect(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2i &p_rect, bool p_flip_y = false); + void copy_depth_to_rect_and_linearize(RID p_source_rd_texture, RID p_dest_texture, const Rect2i &p_rect, bool p_flip_y, float p_z_near, float p_z_far); + void copy_to_atlas_fb(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2 &p_uv_rect, RD::DrawListID p_draw_list, bool p_flip_y = false, bool p_panorama = false); + void gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back_texture, const Rect2i &p_region, bool p_8bit_dst = false); + void set_color(RID p_dest_texture, const Color &p_color, const Rect2i &p_region, bool p_8bit_dst = false); + void gaussian_glow(RID p_source_rd_texture, RID p_back_texture, const Size2i &p_size, float p_strength = 1.0, bool p_high_quality = false, bool p_first_pass = false, float p_luminance_cap = 16.0, float p_exposure = 1.0, float p_bloom = 0.0, float p_hdr_bleed_treshold = 1.0, float p_hdr_bleed_scale = 1.0, RID p_auto_exposure = RID(), float p_auto_exposure_grey = 1.0); + + void cubemap_roughness(RID p_source_rd_texture, RID p_dest_framebuffer, uint32_t p_face_id, uint32_t p_sample_count, float p_roughness, float p_size); + void make_mipmap(RID p_source_rd_texture, RID p_dest_texture, const Size2i &p_size); + void copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dest_texture, const Rect2 &p_rect, float p_z_near, float p_z_far, bool p_dp_flip); + void luminance_reduction(RID p_source_texture, const Size2i p_source_size, const Vector<RID> p_reduce, RID p_prev_luminance, float p_min_luminance, float p_max_luminance, float p_adjust, bool p_set = false); + void bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i &p_base_texture_size, RID p_secondary_texture, RID p_bokeh_texture1, RID p_bokeh_texture2, bool p_dof_far, float p_dof_far_begin, float p_dof_far_size, bool p_dof_near, float p_dof_near_begin, float p_dof_near_size, float p_bokeh_size, RS::DOFBokehShape p_bokeh_shape, RS::DOFBlurQuality p_quality, bool p_use_jitter, float p_cam_znear, float p_cam_zfar, bool p_cam_orthogonal); + + struct TonemapSettings { + bool use_glow = false; + enum GlowMode { + GLOW_MODE_ADD, + GLOW_MODE_SCREEN, + GLOW_MODE_SOFTLIGHT, + GLOW_MODE_REPLACE, + GLOW_MODE_MIX + }; + + GlowMode glow_mode = GLOW_MODE_ADD; + float glow_intensity = 1.0; + float glow_levels[7] = { 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0 }; + Vector2i glow_texture_size; + bool glow_use_bicubic_upscale = false; + RID glow_texture; + + RS::EnvironmentToneMapper tonemap_mode = RS::ENV_TONE_MAPPER_LINEAR; + float exposure = 1.0; + float white = 1.0; + + bool use_auto_exposure = false; + float auto_exposure_grey = 0.5; + RID exposure_texture; + + bool use_bcs = false; + float brightness = 1.0; + float contrast = 1.0; + float saturation = 1.0; + + bool use_color_correction = false; + bool use_1d_color_correction = false; + RID color_correction_texture; + + bool use_fxaa = false; + bool use_debanding = false; + Vector2i texture_size; + }; + + struct SSAOSettings { + float radius = 1.0; + float intensity = 2.0; + float power = 1.5; + float detail = 0.5; + float horizon = 0.06; + float sharpness = 0.98; + + RS::EnvironmentSSAOQuality quality = RS::ENV_SSAO_QUALITY_MEDIUM; + bool half_size = false; + float adaptive_target = 0.5; + int blur_passes = 2; + float fadeout_from = 50.0; + float fadeout_to = 300.0; + + Size2i full_screen_size = Size2i(); + Size2i half_screen_size = Size2i(); + Size2i quarter_screen_size = Size2i(); + }; + + void tonemapper(RID p_source_color, RID p_dst_framebuffer, const TonemapSettings &p_settings); + + void gather_ssao(RD::ComputeListID p_compute_list, const Vector<RID> p_ao_slices, const SSAOSettings &p_settings, bool p_adaptive_base_pass); + void generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_depth_mipmaps_texture, const Vector<RID> &depth_mipmaps, RID p_ao, const Vector<RID> p_ao_slices, RID p_ao_pong, const Vector<RID> p_ao_pong_slices, RID p_upscale_buffer, RID p_importance_map, RID p_importance_map_pong, const CameraMatrix &p_projection, const SSAOSettings &p_settings, bool p_invalidate_uniform_sets); + + void roughness_limit(RID p_source_normal, RID p_roughness, const Size2i &p_size, float p_curve); + void cubemap_downsample(RID p_source_cubemap, RID p_dest_cubemap, const Size2i &p_size); + void cubemap_filter(RID p_source_cubemap, Vector<RID> p_dest_cubemap, bool p_use_array); + void render_sky(RD::DrawListID p_list, float p_time, RID p_fb, RID p_samplers, RID p_fog, PipelineCacheRD *p_pipeline, RID p_uniform_set, RID p_texture_set, const CameraMatrix &p_camera, const Basis &p_orientation, float p_multiplier, const Vector3 &p_position); + + void screen_space_reflection(RID p_diffuse, RID p_normal_roughness, RS::EnvironmentSSRRoughnessQuality p_roughness_quality, RID p_blur_radius, RID p_blur_radius2, RID p_metallic, const Color &p_metallic_mask, RID p_depth, RID p_scale_depth, RID p_scale_normal, RID p_output, RID p_output_blur, const Size2i &p_screen_size, int p_max_steps, float p_fade_in, float p_fade_out, float p_tolerance, const CameraMatrix &p_camera); + void merge_specular(RID p_dest_framebuffer, RID p_specular, RID p_base, RID p_reflection); + void sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_depth, const CameraMatrix &p_camera, const Size2i &p_screen_size, float p_scale, float p_depth_scale, RS::SubSurfaceScatteringQuality p_quality); + + void resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples, uint32_t p_barrier = RD::BARRIER_MASK_ALL); + + void sort_buffer(RID p_uniform_set, int p_size); + + EffectsRD(); + ~EffectsRD(); +}; + +#endif // !RASTERIZER_EFFECTS_RD_H diff --git a/servers/rendering/renderer_rd/pipeline_cache_rd.cpp b/servers/rendering/renderer_rd/pipeline_cache_rd.cpp new file mode 100644 index 0000000000..b2b919c40e --- /dev/null +++ b/servers/rendering/renderer_rd/pipeline_cache_rd.cpp @@ -0,0 +1,99 @@ +/*************************************************************************/ +/* pipeline_cache_rd.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "pipeline_cache_rd.h" +#include "core/os/memory.h" + +RID PipelineCacheRD::_generate_version(RD::VertexFormatID p_vertex_format_id, RD::FramebufferFormatID p_framebuffer_format_id, bool p_wireframe) { + RD::PipelineMultisampleState multisample_state_version = multisample_state; + multisample_state_version.sample_count = RD::get_singleton()->framebuffer_format_get_texture_samples(p_framebuffer_format_id); + + RD::PipelineRasterizationState raster_state_version = rasterization_state; + raster_state_version.wireframe = p_wireframe; + + RID pipeline = RD::get_singleton()->render_pipeline_create(shader, p_framebuffer_format_id, p_vertex_format_id, render_primitive, raster_state_version, multisample_state_version, depth_stencil_state, blend_state, dynamic_state_flags); + ERR_FAIL_COND_V(pipeline.is_null(), RID()); + versions = (Version *)memrealloc(versions, sizeof(Version) * (version_count + 1)); + versions[version_count].framebuffer_id = p_framebuffer_format_id; + versions[version_count].vertex_id = p_vertex_format_id; + versions[version_count].wireframe = p_wireframe; + versions[version_count].pipeline = pipeline; + version_count++; + return pipeline; +} + +void PipelineCacheRD::_clear() { + if (versions) { + for (uint32_t i = 0; i < version_count; i++) { + //shader may be gone, so this may not be valid + if (RD::get_singleton()->render_pipeline_is_valid(versions[i].pipeline)) { + RD::get_singleton()->free(versions[i].pipeline); + } + } + version_count = 0; + memfree(versions); + versions = nullptr; + } +} + +void PipelineCacheRD::setup(RID p_shader, RD::RenderPrimitive p_primitive, const RD::PipelineRasterizationState &p_rasterization_state, RD::PipelineMultisampleState p_multisample, const RD::PipelineDepthStencilState &p_depth_stencil_state, const RD::PipelineColorBlendState &p_blend_state, int p_dynamic_state_flags) { + ERR_FAIL_COND(p_shader.is_null()); + _clear(); + shader = p_shader; + input_mask = RD::get_singleton()->shader_get_vertex_input_attribute_mask(p_shader); + render_primitive = p_primitive; + rasterization_state = p_rasterization_state; + multisample_state = p_multisample; + depth_stencil_state = p_depth_stencil_state; + blend_state = p_blend_state; + dynamic_state_flags = p_dynamic_state_flags; +} + +void PipelineCacheRD::update_shader(RID p_shader) { + ERR_FAIL_COND(p_shader.is_null()); + _clear(); + setup(p_shader, render_primitive, rasterization_state, multisample_state, depth_stencil_state, blend_state, dynamic_state_flags); +} + +void PipelineCacheRD::clear() { + _clear(); + shader = RID(); //clear shader + input_mask = 0; +} + +PipelineCacheRD::PipelineCacheRD() { + version_count = 0; + versions = nullptr; + input_mask = 0; +} + +PipelineCacheRD::~PipelineCacheRD() { + _clear(); +} diff --git a/servers/rendering/renderer_rd/pipeline_cache_rd.h b/servers/rendering/renderer_rd/pipeline_cache_rd.h new file mode 100644 index 0000000000..b1c8f21ecc --- /dev/null +++ b/servers/rendering/renderer_rd/pipeline_cache_rd.h @@ -0,0 +1,96 @@ +/*************************************************************************/ +/* pipeline_cache_rd.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef PIPELINE_CACHE_RD_H +#define PIPELINE_CACHE_RD_H + +#include "core/os/spin_lock.h" +#include "servers/rendering/rendering_device.h" + +class PipelineCacheRD { + SpinLock spin_lock; + + RID shader; + uint32_t input_mask; + + RD::RenderPrimitive render_primitive; + RD::PipelineRasterizationState rasterization_state; + RD::PipelineMultisampleState multisample_state; + RD::PipelineDepthStencilState depth_stencil_state; + RD::PipelineColorBlendState blend_state; + int dynamic_state_flags; + + struct Version { + RD::VertexFormatID vertex_id; + RD::FramebufferFormatID framebuffer_id; + bool wireframe; + RID pipeline; + }; + + Version *versions; + uint32_t version_count; + + RID _generate_version(RD::VertexFormatID p_vertex_format_id, RD::FramebufferFormatID p_framebuffer_format_id, bool p_wireframe); + + void _clear(); + +public: + void setup(RID p_shader, RD::RenderPrimitive p_primitive, const RD::PipelineRasterizationState &p_rasterization_state, RD::PipelineMultisampleState p_multisample, const RD::PipelineDepthStencilState &p_depth_stencil_state, const RD::PipelineColorBlendState &p_blend_state, int p_dynamic_state_flags = 0); + void update_shader(RID p_shader); + + _FORCE_INLINE_ RID get_render_pipeline(RD::VertexFormatID p_vertex_format_id, RD::FramebufferFormatID p_framebuffer_format_id, bool p_wireframe = false) { +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_V_MSG(shader.is_null(), RID(), + "Attempted to use an unused shader variant (shader is null),"); +#endif + + spin_lock.lock(); + RID result; + for (uint32_t i = 0; i < version_count; i++) { + if (versions[i].vertex_id == p_vertex_format_id && versions[i].framebuffer_id == p_framebuffer_format_id && versions[i].wireframe == p_wireframe) { + result = versions[i].pipeline; + spin_lock.unlock(); + return result; + } + } + result = _generate_version(p_vertex_format_id, p_framebuffer_format_id, p_wireframe); + spin_lock.unlock(); + return result; + } + + _FORCE_INLINE_ uint32_t get_vertex_input_mask() const { + return input_mask; + } + void clear(); + PipelineCacheRD(); + ~PipelineCacheRD(); +}; + +#endif // RENDER_PIPELINE_CACHE_RD_H diff --git a/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp b/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp new file mode 100644 index 0000000000..7d6e2fa8e4 --- /dev/null +++ b/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp @@ -0,0 +1,2808 @@ +/*************************************************************************/ +/* renderer_canvas_render_rd.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "renderer_canvas_render_rd.h" +#include "core/config/project_settings.h" +#include "core/math/geometry_2d.h" +#include "core/math/math_funcs.h" +#include "renderer_compositor_rd.h" + +void RendererCanvasRenderRD::_update_transform_2d_to_mat4(const Transform2D &p_transform, float *p_mat4) { + p_mat4[0] = p_transform.elements[0][0]; + p_mat4[1] = p_transform.elements[0][1]; + p_mat4[2] = 0; + p_mat4[3] = 0; + p_mat4[4] = p_transform.elements[1][0]; + p_mat4[5] = p_transform.elements[1][1]; + p_mat4[6] = 0; + p_mat4[7] = 0; + p_mat4[8] = 0; + p_mat4[9] = 0; + p_mat4[10] = 1; + p_mat4[11] = 0; + p_mat4[12] = p_transform.elements[2][0]; + p_mat4[13] = p_transform.elements[2][1]; + p_mat4[14] = 0; + p_mat4[15] = 1; +} + +void RendererCanvasRenderRD::_update_transform_2d_to_mat2x4(const Transform2D &p_transform, float *p_mat2x4) { + p_mat2x4[0] = p_transform.elements[0][0]; + p_mat2x4[1] = p_transform.elements[1][0]; + p_mat2x4[2] = 0; + p_mat2x4[3] = p_transform.elements[2][0]; + + p_mat2x4[4] = p_transform.elements[0][1]; + p_mat2x4[5] = p_transform.elements[1][1]; + p_mat2x4[6] = 0; + p_mat2x4[7] = p_transform.elements[2][1]; +} + +void RendererCanvasRenderRD::_update_transform_2d_to_mat2x3(const Transform2D &p_transform, float *p_mat2x3) { + p_mat2x3[0] = p_transform.elements[0][0]; + p_mat2x3[1] = p_transform.elements[0][1]; + p_mat2x3[2] = p_transform.elements[1][0]; + p_mat2x3[3] = p_transform.elements[1][1]; + p_mat2x3[4] = p_transform.elements[2][0]; + p_mat2x3[5] = p_transform.elements[2][1]; +} + +void RendererCanvasRenderRD::_update_transform_to_mat4(const Transform &p_transform, float *p_mat4) { + p_mat4[0] = p_transform.basis.elements[0][0]; + p_mat4[1] = p_transform.basis.elements[1][0]; + p_mat4[2] = p_transform.basis.elements[2][0]; + p_mat4[3] = 0; + p_mat4[4] = p_transform.basis.elements[0][1]; + p_mat4[5] = p_transform.basis.elements[1][1]; + p_mat4[6] = p_transform.basis.elements[2][1]; + p_mat4[7] = 0; + p_mat4[8] = p_transform.basis.elements[0][2]; + p_mat4[9] = p_transform.basis.elements[1][2]; + p_mat4[10] = p_transform.basis.elements[2][2]; + p_mat4[11] = 0; + p_mat4[12] = p_transform.origin.x; + p_mat4[13] = p_transform.origin.y; + p_mat4[14] = p_transform.origin.z; + p_mat4[15] = 1; +} + +RendererCanvasRender::PolygonID RendererCanvasRenderRD::request_polygon(const Vector<int> &p_indices, const Vector<Point2> &p_points, const Vector<Color> &p_colors, const Vector<Point2> &p_uvs, const Vector<int> &p_bones, const Vector<float> &p_weights) { + // Care must be taken to generate array formats + // in ways where they could be reused, so we will + // put single-occuring elements first, and repeated + // elements later. This way the generated formats are + // the same no matter the length of the arrays. + // This dramatically reduces the amount of pipeline objects + // that need to be created for these formats. + + uint32_t vertex_count = p_points.size(); + uint32_t stride = 2; //vertices always repeat + if ((uint32_t)p_colors.size() == vertex_count || p_colors.size() == 1) { + stride += 4; + } + if ((uint32_t)p_uvs.size() == vertex_count) { + stride += 2; + } + if ((uint32_t)p_bones.size() == vertex_count * 4 && (uint32_t)p_weights.size() == vertex_count * 4) { + stride += 4; + } + + uint32_t buffer_size = stride * p_points.size(); + + Vector<uint8_t> polygon_buffer; + polygon_buffer.resize(buffer_size * sizeof(float)); + Vector<RD::VertexAttribute> descriptions; + descriptions.resize(5); + Vector<RID> buffers; + buffers.resize(5); + + { + const uint8_t *r = polygon_buffer.ptr(); + float *fptr = (float *)r; + uint32_t *uptr = (uint32_t *)r; + uint32_t base_offset = 0; + { //vertices + RD::VertexAttribute vd; + vd.format = RD::DATA_FORMAT_R32G32_SFLOAT; + vd.offset = base_offset * sizeof(float); + vd.location = RS::ARRAY_VERTEX; + vd.stride = stride * sizeof(float); + + descriptions.write[0] = vd; + + const Vector2 *points_ptr = p_points.ptr(); + + for (uint32_t i = 0; i < vertex_count; i++) { + fptr[base_offset + i * stride + 0] = points_ptr[i].x; + fptr[base_offset + i * stride + 1] = points_ptr[i].y; + } + + base_offset += 2; + } + + //colors + if ((uint32_t)p_colors.size() == vertex_count || p_colors.size() == 1) { + RD::VertexAttribute vd; + vd.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; + vd.offset = base_offset * sizeof(float); + vd.location = RS::ARRAY_COLOR; + vd.stride = stride * sizeof(float); + + descriptions.write[1] = vd; + + if (p_colors.size() == 1) { + Color color = p_colors[0]; + for (uint32_t i = 0; i < vertex_count; i++) { + fptr[base_offset + i * stride + 0] = color.r; + fptr[base_offset + i * stride + 1] = color.g; + fptr[base_offset + i * stride + 2] = color.b; + fptr[base_offset + i * stride + 3] = color.a; + } + } else { + const Color *color_ptr = p_colors.ptr(); + + for (uint32_t i = 0; i < vertex_count; i++) { + fptr[base_offset + i * stride + 0] = color_ptr[i].r; + fptr[base_offset + i * stride + 1] = color_ptr[i].g; + fptr[base_offset + i * stride + 2] = color_ptr[i].b; + fptr[base_offset + i * stride + 3] = color_ptr[i].a; + } + } + base_offset += 4; + } else { + RD::VertexAttribute vd; + vd.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; + vd.offset = 0; + vd.location = RS::ARRAY_COLOR; + vd.stride = 0; + + descriptions.write[1] = vd; + buffers.write[1] = storage->mesh_get_default_rd_buffer(RendererStorageRD::DEFAULT_RD_BUFFER_COLOR); + } + + //uvs + if ((uint32_t)p_uvs.size() == vertex_count) { + RD::VertexAttribute vd; + vd.format = RD::DATA_FORMAT_R32G32_SFLOAT; + vd.offset = base_offset * sizeof(float); + vd.location = RS::ARRAY_TEX_UV; + vd.stride = stride * sizeof(float); + + descriptions.write[2] = vd; + + const Vector2 *uv_ptr = p_uvs.ptr(); + + for (uint32_t i = 0; i < vertex_count; i++) { + fptr[base_offset + i * stride + 0] = uv_ptr[i].x; + fptr[base_offset + i * stride + 1] = uv_ptr[i].y; + } + base_offset += 2; + } else { + RD::VertexAttribute vd; + vd.format = RD::DATA_FORMAT_R32G32_SFLOAT; + vd.offset = 0; + vd.location = RS::ARRAY_TEX_UV; + vd.stride = 0; + + descriptions.write[2] = vd; + buffers.write[2] = storage->mesh_get_default_rd_buffer(RendererStorageRD::DEFAULT_RD_BUFFER_TEX_UV); + } + + //bones + if ((uint32_t)p_indices.size() == vertex_count * 4 && (uint32_t)p_weights.size() == vertex_count * 4) { + RD::VertexAttribute vd; + vd.format = RD::DATA_FORMAT_R16G16B16A16_UINT; + vd.offset = base_offset * sizeof(float); + vd.location = RS::ARRAY_BONES; + vd.stride = stride * sizeof(float); + + descriptions.write[3] = vd; + + const int *bone_ptr = p_bones.ptr(); + + for (uint32_t i = 0; i < vertex_count; i++) { + uint16_t *bone16w = (uint16_t *)&uptr[base_offset + i * stride]; + + bone16w[0] = bone_ptr[i * 4 + 0]; + bone16w[1] = bone_ptr[i * 4 + 1]; + bone16w[2] = bone_ptr[i * 4 + 2]; + bone16w[3] = bone_ptr[i * 4 + 3]; + } + + base_offset += 2; + } else { + RD::VertexAttribute vd; + vd.format = RD::DATA_FORMAT_R32G32B32A32_UINT; + vd.offset = 0; + vd.location = RS::ARRAY_BONES; + vd.stride = 0; + + descriptions.write[3] = vd; + buffers.write[3] = storage->mesh_get_default_rd_buffer(RendererStorageRD::DEFAULT_RD_BUFFER_BONES); + } + + //weights + if ((uint32_t)p_weights.size() == vertex_count * 4) { + RD::VertexAttribute vd; + vd.format = RD::DATA_FORMAT_R16G16B16A16_UNORM; + vd.offset = base_offset * sizeof(float); + vd.location = RS::ARRAY_WEIGHTS; + vd.stride = stride * sizeof(float); + + descriptions.write[4] = vd; + + const float *weight_ptr = p_weights.ptr(); + + for (uint32_t i = 0; i < vertex_count; i++) { + uint16_t *weight16w = (uint16_t *)&uptr[base_offset + i * stride]; + + weight16w[0] = CLAMP(weight_ptr[i * 4 + 0] * 65535, 0, 65535); + weight16w[1] = CLAMP(weight_ptr[i * 4 + 1] * 65535, 0, 65535); + weight16w[2] = CLAMP(weight_ptr[i * 4 + 2] * 65535, 0, 65535); + weight16w[3] = CLAMP(weight_ptr[i * 4 + 3] * 65535, 0, 65535); + } + + base_offset += 2; + } else { + RD::VertexAttribute vd; + vd.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; + vd.offset = 0; + vd.location = RS::ARRAY_WEIGHTS; + vd.stride = 0; + + descriptions.write[4] = vd; + buffers.write[4] = storage->mesh_get_default_rd_buffer(RendererStorageRD::DEFAULT_RD_BUFFER_BONES); + } + + //check that everything is as it should be + ERR_FAIL_COND_V(base_offset != stride, 0); //bug + } + + RD::VertexFormatID vertex_id = RD::get_singleton()->vertex_format_create(descriptions); + ERR_FAIL_COND_V(vertex_id == RD::INVALID_ID, 0); + + PolygonBuffers pb; + pb.vertex_buffer = RD::get_singleton()->vertex_buffer_create(polygon_buffer.size(), polygon_buffer); + for (int i = 0; i < descriptions.size(); i++) { + if (buffers[i] == RID()) { //if put in vertex, use as vertex + buffers.write[i] = pb.vertex_buffer; + } + } + + pb.vertex_array = RD::get_singleton()->vertex_array_create(p_points.size(), vertex_id, buffers); + + if (p_indices.size()) { + //create indices, as indices were requested + Vector<uint8_t> index_buffer; + index_buffer.resize(p_indices.size() * sizeof(int32_t)); + { + uint8_t *w = index_buffer.ptrw(); + copymem(w, p_indices.ptr(), sizeof(int32_t) * p_indices.size()); + } + pb.index_buffer = RD::get_singleton()->index_buffer_create(p_indices.size(), RD::INDEX_BUFFER_FORMAT_UINT32, index_buffer); + pb.indices = RD::get_singleton()->index_array_create(pb.index_buffer, 0, p_indices.size()); + } + + pb.vertex_format_id = vertex_id; + + PolygonID id = polygon_buffers.last_id++; + + polygon_buffers.polygons[id] = pb; + + return id; +} + +void RendererCanvasRenderRD::free_polygon(PolygonID p_polygon) { + PolygonBuffers *pb_ptr = polygon_buffers.polygons.getptr(p_polygon); + ERR_FAIL_COND(!pb_ptr); + + PolygonBuffers &pb = *pb_ptr; + + if (pb.indices.is_valid()) { + RD::get_singleton()->free(pb.indices); + } + if (pb.index_buffer.is_valid()) { + RD::get_singleton()->free(pb.index_buffer); + } + + RD::get_singleton()->free(pb.vertex_array); + RD::get_singleton()->free(pb.vertex_buffer); + + polygon_buffers.polygons.erase(p_polygon); +} + +//////////////////// + +void RendererCanvasRenderRD::_bind_canvas_texture(RD::DrawListID p_draw_list, RID p_texture, RS::CanvasItemTextureFilter p_base_filter, RS::CanvasItemTextureRepeat p_base_repeat, RID &r_last_texture, PushConstant &push_constant, Size2 &r_texpixel_size) { + if (p_texture == RID()) { + p_texture = default_canvas_texture; + } + + if (r_last_texture == p_texture) { + return; //nothing to do, its the same + } + + RID uniform_set; + Color specular_shininess; + Size2i size; + bool use_normal; + bool use_specular; + + bool success = storage->canvas_texture_get_uniform_set(p_texture, p_base_filter, p_base_repeat, shader.default_version_rd_shader, CANVAS_TEXTURE_UNIFORM_SET, uniform_set, size, specular_shininess, use_normal, use_specular); + //something odd happened + if (!success) { + _bind_canvas_texture(p_draw_list, default_canvas_texture, p_base_filter, p_base_repeat, r_last_texture, push_constant, r_texpixel_size); + return; + } + + RD::get_singleton()->draw_list_bind_uniform_set(p_draw_list, uniform_set, CANVAS_TEXTURE_UNIFORM_SET); + + if (specular_shininess.a < 0.999) { + push_constant.flags |= FLAGS_DEFAULT_SPECULAR_MAP_USED; + } else { + push_constant.flags &= ~FLAGS_DEFAULT_SPECULAR_MAP_USED; + } + + if (use_normal) { + push_constant.flags |= FLAGS_DEFAULT_NORMAL_MAP_USED; + } else { + push_constant.flags &= ~FLAGS_DEFAULT_NORMAL_MAP_USED; + } + + push_constant.specular_shininess = uint32_t(CLAMP(specular_shininess.a * 255.0, 0, 255)) << 24; + push_constant.specular_shininess |= uint32_t(CLAMP(specular_shininess.b * 255.0, 0, 255)) << 16; + push_constant.specular_shininess |= uint32_t(CLAMP(specular_shininess.g * 255.0, 0, 255)) << 8; + push_constant.specular_shininess |= uint32_t(CLAMP(specular_shininess.r * 255.0, 0, 255)); + + r_texpixel_size.x = 1.0 / float(size.x); + r_texpixel_size.y = 1.0 / float(size.y); + + push_constant.color_texture_pixel_size[0] = r_texpixel_size.x; + push_constant.color_texture_pixel_size[1] = r_texpixel_size.y; + + r_last_texture = p_texture; +} + +void RendererCanvasRenderRD::_render_item(RD::DrawListID p_draw_list, const Item *p_item, RD::FramebufferFormatID p_framebuffer_format, const Transform2D &p_canvas_transform_inverse, Item *¤t_clip, Light *p_lights, PipelineVariants *p_pipeline_variants) { + //create an empty push constant + + RS::CanvasItemTextureFilter current_filter = default_filter; + RS::CanvasItemTextureRepeat current_repeat = default_repeat; + + if (p_item->texture_filter != RS::CANVAS_ITEM_TEXTURE_FILTER_DEFAULT) { + current_filter = p_item->texture_filter; + } + + if (p_item->texture_repeat != RS::CANVAS_ITEM_TEXTURE_REPEAT_DEFAULT) { + current_repeat = p_item->texture_repeat; + } + + PushConstant push_constant; + Transform2D base_transform = p_canvas_transform_inverse * p_item->final_transform; + _update_transform_2d_to_mat2x3(base_transform, push_constant.world); + + Color base_color = p_item->final_modulate; + + for (int i = 0; i < 4; i++) { + push_constant.modulation[i] = 0; + push_constant.ninepatch_margins[i] = 0; + push_constant.src_rect[i] = 0; + push_constant.dst_rect[i] = 0; + } + push_constant.flags = 0; + push_constant.color_texture_pixel_size[0] = 0; + push_constant.color_texture_pixel_size[1] = 0; + + push_constant.pad[0] = 0; + push_constant.pad[1] = 0; + + push_constant.lights[0] = 0; + push_constant.lights[1] = 0; + push_constant.lights[2] = 0; + push_constant.lights[3] = 0; + + uint32_t base_flags = 0; + + uint16_t light_count = 0; + PipelineLightMode light_mode; + + { + Light *light = p_lights; + + while (light) { + if (light->render_index_cache >= 0 && p_item->light_mask & light->item_mask && p_item->z_final >= light->z_min && p_item->z_final <= light->z_max && p_item->global_rect_cache.intersects_transformed(light->xform_cache, light->rect_cache)) { + uint32_t light_index = light->render_index_cache; + push_constant.lights[light_count >> 2] |= light_index << ((light_count & 3) * 8); + + light_count++; + + if (light_count == MAX_LIGHTS_PER_ITEM) { + break; + } + } + light = light->next_ptr; + } + + base_flags |= light_count << FLAGS_LIGHT_COUNT_SHIFT; + } + + light_mode = (light_count > 0 || using_directional_lights) ? PIPELINE_LIGHT_MODE_ENABLED : PIPELINE_LIGHT_MODE_DISABLED; + + PipelineVariants *pipeline_variants = p_pipeline_variants; + + bool reclip = false; + + RID last_texture; + Size2 texpixel_size; + + const Item::Command *c = p_item->commands; + while (c) { + push_constant.flags = base_flags | (push_constant.flags & (FLAGS_DEFAULT_NORMAL_MAP_USED | FLAGS_DEFAULT_SPECULAR_MAP_USED)); //reset on each command for sanity, keep canvastexture binding config + + switch (c->type) { + case Item::Command::TYPE_RECT: { + const Item::CommandRect *rect = static_cast<const Item::CommandRect *>(c); + + //bind pipeline + { + RID pipeline = pipeline_variants->variants[light_mode][PIPELINE_VARIANT_QUAD].get_render_pipeline(RD::INVALID_ID, p_framebuffer_format); + RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, pipeline); + } + + //bind textures + + _bind_canvas_texture(p_draw_list, rect->texture, current_filter, current_repeat, last_texture, push_constant, texpixel_size); + + Rect2 src_rect; + Rect2 dst_rect; + + if (rect->texture != RID()) { + src_rect = (rect->flags & CANVAS_RECT_REGION) ? Rect2(rect->source.position * texpixel_size, rect->source.size * texpixel_size) : Rect2(0, 0, 1, 1); + dst_rect = Rect2(rect->rect.position, rect->rect.size); + + if (dst_rect.size.width < 0) { + dst_rect.position.x += dst_rect.size.width; + dst_rect.size.width *= -1; + } + if (dst_rect.size.height < 0) { + dst_rect.position.y += dst_rect.size.height; + dst_rect.size.height *= -1; + } + + if (rect->flags & CANVAS_RECT_FLIP_H) { + src_rect.size.x *= -1; + } + + if (rect->flags & CANVAS_RECT_FLIP_V) { + src_rect.size.y *= -1; + } + + if (rect->flags & CANVAS_RECT_TRANSPOSE) { + dst_rect.size.x *= -1; // Encoding in the dst_rect.z uniform + } + + if (rect->flags & CANVAS_RECT_CLIP_UV) { + push_constant.flags |= FLAGS_CLIP_RECT_UV; + } + + } else { + dst_rect = Rect2(rect->rect.position, rect->rect.size); + + if (dst_rect.size.width < 0) { + dst_rect.position.x += dst_rect.size.width; + dst_rect.size.width *= -1; + } + if (dst_rect.size.height < 0) { + dst_rect.position.y += dst_rect.size.height; + dst_rect.size.height *= -1; + } + + src_rect = Rect2(0, 0, 1, 1); + } + + push_constant.modulation[0] = rect->modulate.r * base_color.r; + push_constant.modulation[1] = rect->modulate.g * base_color.g; + push_constant.modulation[2] = rect->modulate.b * base_color.b; + push_constant.modulation[3] = rect->modulate.a * base_color.a; + + push_constant.src_rect[0] = src_rect.position.x; + push_constant.src_rect[1] = src_rect.position.y; + push_constant.src_rect[2] = src_rect.size.width; + push_constant.src_rect[3] = src_rect.size.height; + + push_constant.dst_rect[0] = dst_rect.position.x; + push_constant.dst_rect[1] = dst_rect.position.y; + push_constant.dst_rect[2] = dst_rect.size.width; + push_constant.dst_rect[3] = dst_rect.size.height; + + RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(PushConstant)); + RD::get_singleton()->draw_list_bind_index_array(p_draw_list, shader.quad_index_array); + RD::get_singleton()->draw_list_draw(p_draw_list, true); + + } break; + + case Item::Command::TYPE_NINEPATCH: { + const Item::CommandNinePatch *np = static_cast<const Item::CommandNinePatch *>(c); + + //bind pipeline + { + RID pipeline = pipeline_variants->variants[light_mode][PIPELINE_VARIANT_NINEPATCH].get_render_pipeline(RD::INVALID_ID, p_framebuffer_format); + RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, pipeline); + } + + //bind textures + + _bind_canvas_texture(p_draw_list, np->texture, current_filter, current_repeat, last_texture, push_constant, texpixel_size); + + Rect2 src_rect; + Rect2 dst_rect(np->rect.position.x, np->rect.position.y, np->rect.size.x, np->rect.size.y); + + if (np->texture == RID()) { + texpixel_size = Size2(1, 1); + src_rect = Rect2(0, 0, 1, 1); + + } else { + if (np->source != Rect2()) { + src_rect = Rect2(np->source.position.x * texpixel_size.width, np->source.position.y * texpixel_size.height, np->source.size.x * texpixel_size.width, np->source.size.y * texpixel_size.height); + push_constant.color_texture_pixel_size[0] = 1.0 / np->source.size.width; + push_constant.color_texture_pixel_size[1] = 1.0 / np->source.size.height; + + } else { + src_rect = Rect2(0, 0, 1, 1); + } + } + + push_constant.modulation[0] = np->color.r * base_color.r; + push_constant.modulation[1] = np->color.g * base_color.g; + push_constant.modulation[2] = np->color.b * base_color.b; + push_constant.modulation[3] = np->color.a * base_color.a; + + push_constant.src_rect[0] = src_rect.position.x; + push_constant.src_rect[1] = src_rect.position.y; + push_constant.src_rect[2] = src_rect.size.width; + push_constant.src_rect[3] = src_rect.size.height; + + push_constant.dst_rect[0] = dst_rect.position.x; + push_constant.dst_rect[1] = dst_rect.position.y; + push_constant.dst_rect[2] = dst_rect.size.width; + push_constant.dst_rect[3] = dst_rect.size.height; + + push_constant.flags |= int(np->axis_x) << FLAGS_NINEPATCH_H_MODE_SHIFT; + push_constant.flags |= int(np->axis_y) << FLAGS_NINEPATCH_V_MODE_SHIFT; + + if (np->draw_center) { + push_constant.flags |= FLAGS_NINEPACH_DRAW_CENTER; + } + + push_constant.ninepatch_margins[0] = np->margin[SIDE_LEFT]; + push_constant.ninepatch_margins[1] = np->margin[SIDE_TOP]; + push_constant.ninepatch_margins[2] = np->margin[SIDE_RIGHT]; + push_constant.ninepatch_margins[3] = np->margin[SIDE_BOTTOM]; + + RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(PushConstant)); + RD::get_singleton()->draw_list_bind_index_array(p_draw_list, shader.quad_index_array); + RD::get_singleton()->draw_list_draw(p_draw_list, true); + + //restore if overrided + push_constant.color_texture_pixel_size[0] = texpixel_size.x; + push_constant.color_texture_pixel_size[1] = texpixel_size.y; + + } break; + case Item::Command::TYPE_POLYGON: { + const Item::CommandPolygon *polygon = static_cast<const Item::CommandPolygon *>(c); + + PolygonBuffers *pb = polygon_buffers.polygons.getptr(polygon->polygon.polygon_id); + ERR_CONTINUE(!pb); + //bind pipeline + { + static const PipelineVariant variant[RS::PRIMITIVE_MAX] = { PIPELINE_VARIANT_ATTRIBUTE_POINTS, PIPELINE_VARIANT_ATTRIBUTE_LINES, PIPELINE_VARIANT_ATTRIBUTE_LINES_STRIP, PIPELINE_VARIANT_ATTRIBUTE_TRIANGLES, PIPELINE_VARIANT_ATTRIBUTE_TRIANGLE_STRIP }; + ERR_CONTINUE(polygon->primitive < 0 || polygon->primitive >= RS::PRIMITIVE_MAX); + RID pipeline = pipeline_variants->variants[light_mode][variant[polygon->primitive]].get_render_pipeline(pb->vertex_format_id, p_framebuffer_format); + RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, pipeline); + } + + if (polygon->primitive == RS::PRIMITIVE_LINES) { + //not supported in most hardware, so pointless + //RD::get_singleton()->draw_list_set_line_width(p_draw_list, polygon->line_width); + } + + //bind textures + + _bind_canvas_texture(p_draw_list, polygon->texture, current_filter, current_repeat, last_texture, push_constant, texpixel_size); + + push_constant.modulation[0] = base_color.r; + push_constant.modulation[1] = base_color.g; + push_constant.modulation[2] = base_color.b; + push_constant.modulation[3] = base_color.a; + + for (int j = 0; j < 4; j++) { + push_constant.src_rect[j] = 0; + push_constant.dst_rect[j] = 0; + push_constant.ninepatch_margins[j] = 0; + } + + RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(PushConstant)); + RD::get_singleton()->draw_list_bind_vertex_array(p_draw_list, pb->vertex_array); + if (pb->indices.is_valid()) { + RD::get_singleton()->draw_list_bind_index_array(p_draw_list, pb->indices); + } + RD::get_singleton()->draw_list_draw(p_draw_list, pb->indices.is_valid()); + + } break; + case Item::Command::TYPE_PRIMITIVE: { + const Item::CommandPrimitive *primitive = static_cast<const Item::CommandPrimitive *>(c); + + //bind pipeline + { + static const PipelineVariant variant[4] = { PIPELINE_VARIANT_PRIMITIVE_POINTS, PIPELINE_VARIANT_PRIMITIVE_LINES, PIPELINE_VARIANT_PRIMITIVE_TRIANGLES, PIPELINE_VARIANT_PRIMITIVE_TRIANGLES }; + ERR_CONTINUE(primitive->point_count == 0 || primitive->point_count > 4); + RID pipeline = pipeline_variants->variants[light_mode][variant[primitive->point_count - 1]].get_render_pipeline(RD::INVALID_ID, p_framebuffer_format); + RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, pipeline); + } + + //bind textures + + _bind_canvas_texture(p_draw_list, RID(), current_filter, current_repeat, last_texture, push_constant, texpixel_size); + + RD::get_singleton()->draw_list_bind_index_array(p_draw_list, primitive_arrays.index_array[MIN(3, primitive->point_count) - 1]); + + for (uint32_t j = 0; j < MIN(3, primitive->point_count); j++) { + push_constant.points[j * 2 + 0] = primitive->points[j].x; + push_constant.points[j * 2 + 1] = primitive->points[j].y; + push_constant.uvs[j * 2 + 0] = primitive->uvs[j].x; + push_constant.uvs[j * 2 + 1] = primitive->uvs[j].y; + Color col = primitive->colors[j] * base_color; + push_constant.colors[j * 2 + 0] = (uint32_t(Math::make_half_float(col.g)) << 16) | Math::make_half_float(col.r); + push_constant.colors[j * 2 + 1] = (uint32_t(Math::make_half_float(col.a)) << 16) | Math::make_half_float(col.b); + } + RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(PushConstant)); + RD::get_singleton()->draw_list_draw(p_draw_list, true); + + if (primitive->point_count == 4) { + for (uint32_t j = 1; j < 3; j++) { + //second half of triangle + push_constant.points[j * 2 + 0] = primitive->points[j + 1].x; + push_constant.points[j * 2 + 1] = primitive->points[j + 1].y; + push_constant.uvs[j * 2 + 0] = primitive->uvs[j + 1].x; + push_constant.uvs[j * 2 + 1] = primitive->uvs[j + 1].y; + Color col = primitive->colors[j + 1] * base_color; + push_constant.colors[j * 2 + 0] = (uint32_t(Math::make_half_float(col.g)) << 16) | Math::make_half_float(col.r); + push_constant.colors[j * 2 + 1] = (uint32_t(Math::make_half_float(col.a)) << 16) | Math::make_half_float(col.b); + } + + RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(PushConstant)); + RD::get_singleton()->draw_list_draw(p_draw_list, true); + } + + } break; + case Item::Command::TYPE_MESH: + case Item::Command::TYPE_MULTIMESH: + case Item::Command::TYPE_PARTICLES: { + ERR_PRINT("FIXME: Mesh, MultiMesh and Particles render commands are unimplemented currently, they need to be ported to the 4.0 rendering architecture."); +#ifndef _MSC_VER +#warning Item::Command types for Mesh, MultiMesh and Particles need to be implemented. +#endif + // See #if 0'ed code below to port from GLES3. + } break; + +#if 0 + case Item::Command::TYPE_MESH: { + Item::CommandMesh *mesh = static_cast<Item::CommandMesh *>(c); + _set_texture_rect_mode(false); + + RasterizerStorageGLES3::Texture *texture = _bind_canvas_texture(mesh->texture, mesh->normal_map); + + if (texture) { + Size2 texpixel_size(1.0 / texture->width, 1.0 / texture->height); + state.canvas_shader.set_uniform(CanvasShaderGLES3::COLOR_TEXPIXEL_SIZE, texpixel_size); + } + + state.canvas_shader.set_uniform(CanvasShaderGLES3::MODELVIEW_MATRIX, state.final_transform * mesh->transform); + + RasterizerStorageGLES3::Mesh *mesh_data = storage->mesh_owner.getornull(mesh->mesh); + if (mesh_data) { + for (int j = 0; j < mesh_data->surfaces.size(); j++) { + RasterizerStorageGLES3::Surface *s = mesh_data->surfaces[j]; + // materials are ignored in 2D meshes, could be added but many things (ie, lighting mode, reading from screen, etc) would break as they are not meant be set up at this point of drawing + glBindVertexArray(s->array_id); + + glVertexAttrib4f(RS::ARRAY_COLOR, mesh->modulate.r, mesh->modulate.g, mesh->modulate.b, mesh->modulate.a); + + if (s->index_array_len) { + glDrawElements(gl_primitive[s->primitive], s->index_array_len, (s->array_len >= (1 << 16)) ? GL_UNSIGNED_INT : GL_UNSIGNED_SHORT, 0); + } else { + glDrawArrays(gl_primitive[s->primitive], 0, s->array_len); + } + + glBindVertexArray(0); + } + } + state.canvas_shader.set_uniform(CanvasShaderGLES3::MODELVIEW_MATRIX, state.final_transform); + + } break; + case Item::Command::TYPE_MULTIMESH: { + Item::CommandMultiMesh *mmesh = static_cast<Item::CommandMultiMesh *>(c); + + RasterizerStorageGLES3::MultiMesh *multi_mesh = storage->multimesh_owner.getornull(mmesh->multimesh); + + if (!multi_mesh) + break; + + RasterizerStorageGLES3::Mesh *mesh_data = storage->mesh_owner.getornull(multi_mesh->mesh); + + if (!mesh_data) + break; + + RasterizerStorageGLES3::Texture *texture = _bind_canvas_texture(mmesh->texture, mmesh->normal_map); + + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_INSTANCE_CUSTOM, multi_mesh->custom_data_format != RS::MULTIMESH_CUSTOM_DATA_NONE); + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_INSTANCING, true); + //reset shader and force rebind + state.using_texture_rect = true; + _set_texture_rect_mode(false); + + if (texture) { + Size2 texpixel_size(1.0 / texture->width, 1.0 / texture->height); + state.canvas_shader.set_uniform(CanvasShaderGLES3::COLOR_TEXPIXEL_SIZE, texpixel_size); + } + + int amount = MIN(multi_mesh->size, multi_mesh->visible_instances); + + if (amount == -1) { + amount = multi_mesh->size; + } + + for (int j = 0; j < mesh_data->surfaces.size(); j++) { + RasterizerStorageGLES3::Surface *s = mesh_data->surfaces[j]; + // materials are ignored in 2D meshes, could be added but many things (ie, lighting mode, reading from screen, etc) would break as they are not meant be set up at this point of drawing + glBindVertexArray(s->instancing_array_id); + + glBindBuffer(GL_ARRAY_BUFFER, multi_mesh->buffer); //modify the buffer + + int stride = (multi_mesh->xform_floats + multi_mesh->color_floats + multi_mesh->custom_data_floats) * 4; + glEnableVertexAttribArray(8); + glVertexAttribPointer(8, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(0)); + glVertexAttribDivisor(8, 1); + glEnableVertexAttribArray(9); + glVertexAttribPointer(9, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(4 * 4)); + glVertexAttribDivisor(9, 1); + + int color_ofs; + + if (multi_mesh->transform_format == RS::MULTIMESH_TRANSFORM_3D) { + glEnableVertexAttribArray(10); + glVertexAttribPointer(10, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(8 * 4)); + glVertexAttribDivisor(10, 1); + color_ofs = 12 * 4; + } else { + glDisableVertexAttribArray(10); + glVertexAttrib4f(10, 0, 0, 1, 0); + color_ofs = 8 * 4; + } + + int custom_data_ofs = color_ofs; + + switch (multi_mesh->color_format) { + case RS::MULTIMESH_COLOR_NONE: { + glDisableVertexAttribArray(11); + glVertexAttrib4f(11, 1, 1, 1, 1); + } break; + case RS::MULTIMESH_COLOR_8BIT: { + glEnableVertexAttribArray(11); + glVertexAttribPointer(11, 4, GL_UNSIGNED_BYTE, GL_TRUE, stride, CAST_INT_TO_UCHAR_PTR(color_ofs)); + glVertexAttribDivisor(11, 1); + custom_data_ofs += 4; + + } break; + case RS::MULTIMESH_COLOR_FLOAT: { + glEnableVertexAttribArray(11); + glVertexAttribPointer(11, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(color_ofs)); + glVertexAttribDivisor(11, 1); + custom_data_ofs += 4 * 4; + } break; + } + + switch (multi_mesh->custom_data_format) { + case RS::MULTIMESH_CUSTOM_DATA_NONE: { + glDisableVertexAttribArray(12); + glVertexAttrib4f(12, 1, 1, 1, 1); + } break; + case RS::MULTIMESH_CUSTOM_DATA_8BIT: { + glEnableVertexAttribArray(12); + glVertexAttribPointer(12, 4, GL_UNSIGNED_BYTE, GL_TRUE, stride, CAST_INT_TO_UCHAR_PTR(custom_data_ofs)); + glVertexAttribDivisor(12, 1); + + } break; + case RS::MULTIMESH_CUSTOM_DATA_FLOAT: { + glEnableVertexAttribArray(12); + glVertexAttribPointer(12, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(custom_data_ofs)); + glVertexAttribDivisor(12, 1); + } break; + } + + if (s->index_array_len) { + glDrawElementsInstanced(gl_primitive[s->primitive], s->index_array_len, (s->array_len >= (1 << 16)) ? GL_UNSIGNED_INT : GL_UNSIGNED_SHORT, 0, amount); + } else { + glDrawArraysInstanced(gl_primitive[s->primitive], 0, s->array_len, amount); + } + + glBindVertexArray(0); + } + + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_INSTANCE_CUSTOM, false); + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_INSTANCING, false); + state.using_texture_rect = true; + _set_texture_rect_mode(false); + + } break; + case Item::Command::TYPE_PARTICLES: { + Item::CommandParticles *particles_cmd = static_cast<Item::CommandParticles *>(c); + + RasterizerStorageGLES3::Particles *particles = storage->particles_owner.getornull(particles_cmd->particles); + if (!particles) + break; + + if (particles->inactive && !particles->emitting) + break; + + glVertexAttrib4f(RS::ARRAY_COLOR, 1, 1, 1, 1); //not used, so keep white + + RenderingServerDefault::redraw_request(); + + storage->particles_request_process(particles_cmd->particles); + //enable instancing + + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_INSTANCE_CUSTOM, true); + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_PARTICLES, true); + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_INSTANCING, true); + //reset shader and force rebind + state.using_texture_rect = true; + _set_texture_rect_mode(false); + + RasterizerStorageGLES3::Texture *texture = _bind_canvas_texture(particles_cmd->texture, particles_cmd->normal_map); + + if (texture) { + Size2 texpixel_size(1.0 / texture->width, 1.0 / texture->height); + state.canvas_shader.set_uniform(CanvasShaderGLES3::COLOR_TEXPIXEL_SIZE, texpixel_size); + } else { + state.canvas_shader.set_uniform(CanvasShaderGLES3::COLOR_TEXPIXEL_SIZE, Vector2(1.0, 1.0)); + } + + if (!particles->use_local_coords) { + Transform2D inv_xf; + inv_xf.set_axis(0, Vector2(particles->emission_transform.basis.get_axis(0).x, particles->emission_transform.basis.get_axis(0).y)); + inv_xf.set_axis(1, Vector2(particles->emission_transform.basis.get_axis(1).x, particles->emission_transform.basis.get_axis(1).y)); + inv_xf.set_origin(Vector2(particles->emission_transform.get_origin().x, particles->emission_transform.get_origin().y)); + inv_xf.affine_invert(); + + state.canvas_shader.set_uniform(CanvasShaderGLES3::MODELVIEW_MATRIX, state.final_transform * inv_xf); + } + + glBindVertexArray(data.particle_quad_array); //use particle quad array + glBindBuffer(GL_ARRAY_BUFFER, particles->particle_buffers[0]); //bind particle buffer + + int stride = sizeof(float) * 4 * 6; + + int amount = particles->amount; + + if (particles->draw_order != RS::PARTICLES_DRAW_ORDER_LIFETIME) { + glEnableVertexAttribArray(8); //xform x + glVertexAttribPointer(8, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(sizeof(float) * 4 * 3)); + glVertexAttribDivisor(8, 1); + glEnableVertexAttribArray(9); //xform y + glVertexAttribPointer(9, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(sizeof(float) * 4 * 4)); + glVertexAttribDivisor(9, 1); + glEnableVertexAttribArray(10); //xform z + glVertexAttribPointer(10, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(sizeof(float) * 4 * 5)); + glVertexAttribDivisor(10, 1); + glEnableVertexAttribArray(11); //color + glVertexAttribPointer(11, 4, GL_FLOAT, GL_FALSE, stride, nullptr); + glVertexAttribDivisor(11, 1); + glEnableVertexAttribArray(12); //custom + glVertexAttribPointer(12, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(sizeof(float) * 4 * 2)); + glVertexAttribDivisor(12, 1); + + glDrawArraysInstanced(GL_TRIANGLE_FAN, 0, 4, amount); + } else { + //split + int split = int(Math::ceil(particles->phase * particles->amount)); + + if (amount - split > 0) { + glEnableVertexAttribArray(8); //xform x + glVertexAttribPointer(8, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(stride * split + sizeof(float) * 4 * 3)); + glVertexAttribDivisor(8, 1); + glEnableVertexAttribArray(9); //xform y + glVertexAttribPointer(9, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(stride * split + sizeof(float) * 4 * 4)); + glVertexAttribDivisor(9, 1); + glEnableVertexAttribArray(10); //xform z + glVertexAttribPointer(10, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(stride * split + sizeof(float) * 4 * 5)); + glVertexAttribDivisor(10, 1); + glEnableVertexAttribArray(11); //color + glVertexAttribPointer(11, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(stride * split + 0)); + glVertexAttribDivisor(11, 1); + glEnableVertexAttribArray(12); //custom + glVertexAttribPointer(12, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(stride * split + sizeof(float) * 4 * 2)); + glVertexAttribDivisor(12, 1); + + glDrawArraysInstanced(GL_TRIANGLE_FAN, 0, 4, amount - split); + } + + if (split > 0) { + glEnableVertexAttribArray(8); //xform x + glVertexAttribPointer(8, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(sizeof(float) * 4 * 3)); + glVertexAttribDivisor(8, 1); + glEnableVertexAttribArray(9); //xform y + glVertexAttribPointer(9, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(sizeof(float) * 4 * 4)); + glVertexAttribDivisor(9, 1); + glEnableVertexAttribArray(10); //xform z + glVertexAttribPointer(10, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(sizeof(float) * 4 * 5)); + glVertexAttribDivisor(10, 1); + glEnableVertexAttribArray(11); //color + glVertexAttribPointer(11, 4, GL_FLOAT, GL_FALSE, stride, nullptr); + glVertexAttribDivisor(11, 1); + glEnableVertexAttribArray(12); //custom + glVertexAttribPointer(12, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(sizeof(float) * 4 * 2)); + glVertexAttribDivisor(12, 1); + + glDrawArraysInstanced(GL_TRIANGLE_FAN, 0, 4, split); + } + } + + glBindVertexArray(0); + + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_INSTANCE_CUSTOM, false); + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_PARTICLES, false); + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_INSTANCING, false); + state.using_texture_rect = true; + _set_texture_rect_mode(false); + + } break; +#endif + case Item::Command::TYPE_TRANSFORM: { + const Item::CommandTransform *transform = static_cast<const Item::CommandTransform *>(c); + _update_transform_2d_to_mat2x3(base_transform * transform->xform, push_constant.world); + + } break; + case Item::Command::TYPE_CLIP_IGNORE: { + const Item::CommandClipIgnore *ci = static_cast<const Item::CommandClipIgnore *>(c); + if (current_clip) { + if (ci->ignore != reclip) { + if (ci->ignore) { + RD::get_singleton()->draw_list_disable_scissor(p_draw_list); + reclip = true; + } else { + RD::get_singleton()->draw_list_enable_scissor(p_draw_list, current_clip->final_clip_rect); + reclip = false; + } + } + } + + } break; + } + + c = c->next; + } + + if (current_clip && reclip) { + //will make it re-enable clipping if needed afterwards + current_clip = nullptr; + } +} + +RID RendererCanvasRenderRD::_create_base_uniform_set(RID p_to_render_target, bool p_backbuffer) { + //re create canvas state + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 1; + u.ids.push_back(state.canvas_state_buffer); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 2; + u.ids.push_back(state.lights_uniform_buffer); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 3; + u.ids.push_back(storage->decal_atlas_get_texture()); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 4; + u.ids.push_back(state.shadow_texture); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 5; + u.ids.push_back(state.shadow_sampler); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 6; + RID screen; + if (p_backbuffer) { + screen = storage->render_target_get_rd_texture(p_to_render_target); + } else { + screen = storage->render_target_get_rd_backbuffer(p_to_render_target); + if (screen.is_null()) { //unallocated backbuffer + screen = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE); + } + } + u.ids.push_back(screen); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 7; + RID sdf = storage->render_target_get_sdf_texture(p_to_render_target); + u.ids.push_back(sdf); + uniforms.push_back(u); + } + + { + //needs samplers for the material (uses custom textures) create them + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 8; + u.ids.resize(12); + RID *ids_ptr = u.ids.ptrw(); + ids_ptr[0] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[1] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[2] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[3] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[4] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[5] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[6] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[7] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[8] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[9] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[10] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[11] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 9; + u.ids.push_back(storage->global_variables_get_storage_buffer()); + uniforms.push_back(u); + } + + RID uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shader.default_version_rd_shader, BASE_UNIFORM_SET); + if (p_backbuffer) { + storage->render_target_set_backbuffer_uniform_set(p_to_render_target, uniform_set); + } else { + storage->render_target_set_framebuffer_uniform_set(p_to_render_target, uniform_set); + } + + return uniform_set; +} + +void RendererCanvasRenderRD::_render_items(RID p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, bool p_to_backbuffer) { + Item *current_clip = nullptr; + + Transform2D canvas_transform_inverse = p_canvas_transform_inverse; + + RID framebuffer; + RID fb_uniform_set; + bool clear = false; + Vector<Color> clear_colors; + + if (p_to_backbuffer) { + framebuffer = storage->render_target_get_rd_backbuffer_framebuffer(p_to_render_target); + fb_uniform_set = storage->render_target_get_backbuffer_uniform_set(p_to_render_target); + } else { + framebuffer = storage->render_target_get_rd_framebuffer(p_to_render_target); + + if (storage->render_target_is_clear_requested(p_to_render_target)) { + clear = true; + clear_colors.push_back(storage->render_target_get_clear_request_color(p_to_render_target)); + storage->render_target_disable_clear_request(p_to_render_target); + } +#ifndef _MSC_VER +#warning TODO obtain from framebuffer format eventually when this is implemented +#endif + + fb_uniform_set = storage->render_target_get_framebuffer_uniform_set(p_to_render_target); + } + + if (fb_uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(fb_uniform_set)) { + fb_uniform_set = _create_base_uniform_set(p_to_render_target, p_to_backbuffer); + } + + RD::FramebufferFormatID fb_format = RD::get_singleton()->framebuffer_get_format(framebuffer); + + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, clear ? RD::INITIAL_ACTION_CLEAR : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD, clear_colors); + + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, fb_uniform_set, BASE_UNIFORM_SET); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, state.default_transforms_uniform_set, TRANSFORMS_UNIFORM_SET); + + RID prev_material; + + PipelineVariants *pipeline_variants = &shader.pipeline_variants; + + for (int i = 0; i < p_item_count; i++) { + Item *ci = items[i]; + + if (current_clip != ci->final_clip_owner) { + current_clip = ci->final_clip_owner; + + //setup clip + if (current_clip) { + RD::get_singleton()->draw_list_enable_scissor(draw_list, current_clip->final_clip_rect); + + } else { + RD::get_singleton()->draw_list_disable_scissor(draw_list); + } + } + + RID material = ci->material; + + if (material.is_null() && ci->canvas_group != nullptr) { + material = default_canvas_group_material; + } + + if (material != prev_material) { + MaterialData *material_data = nullptr; + if (material.is_valid()) { + material_data = (MaterialData *)storage->material_get_data(material, RendererStorageRD::SHADER_TYPE_2D); + } + + if (material_data) { + if (material_data->shader_data->version.is_valid() && material_data->shader_data->valid) { + pipeline_variants = &material_data->shader_data->pipeline_variants; + if (material_data->uniform_set.is_valid()) { + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, material_data->uniform_set, MATERIAL_UNIFORM_SET); + } + } else { + pipeline_variants = &shader.pipeline_variants; + } + } else { + pipeline_variants = &shader.pipeline_variants; + } + } + + _render_item(draw_list, ci, fb_format, canvas_transform_inverse, current_clip, p_lights, pipeline_variants); + + prev_material = material; + } + + RD::get_singleton()->draw_list_end(); +} + +void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p_item_list, const Color &p_modulate, Light *p_light_list, Light *p_directional_light_list, const Transform2D &p_canvas_transform, RenderingServer::CanvasItemTextureFilter p_default_filter, RenderingServer::CanvasItemTextureRepeat p_default_repeat, bool p_snap_2d_vertices_to_pixel, bool &r_sdf_used) { + r_sdf_used = false; + int item_count = 0; + + //setup canvas state uniforms if needed + + Transform2D canvas_transform_inverse = p_canvas_transform.affine_inverse(); + + //setup directional lights if exist + + uint32_t light_count = 0; + uint32_t directional_light_count = 0; + { + Light *l = p_directional_light_list; + uint32_t index = 0; + + while (l) { + if (index == state.max_lights_per_render) { + l->render_index_cache = -1; + l = l->next_ptr; + continue; + } + + CanvasLight *clight = canvas_light_owner.getornull(l->light_internal); + if (!clight) { //unused or invalid texture + l->render_index_cache = -1; + l = l->next_ptr; + ERR_CONTINUE(!clight); + } + + Vector2 canvas_light_dir = l->xform_cache.elements[1].normalized(); + + state.light_uniforms[index].position[0] = -canvas_light_dir.x; + state.light_uniforms[index].position[1] = -canvas_light_dir.y; + + _update_transform_2d_to_mat2x4(clight->shadow.directional_xform, state.light_uniforms[index].shadow_matrix); + + state.light_uniforms[index].height = l->height; //0..1 here + + for (int i = 0; i < 4; i++) { + state.light_uniforms[index].shadow_color[i] = uint8_t(CLAMP(int32_t(l->shadow_color[i] * 255.0), 0, 255)); + state.light_uniforms[index].color[i] = l->color[i]; + } + + state.light_uniforms[index].color[3] = l->energy; //use alpha for energy, so base color can go separate + + if (state.shadow_fb.is_valid()) { + state.light_uniforms[index].shadow_pixel_size = (1.0 / state.shadow_texture_size) * (1.0 + l->shadow_smooth); + state.light_uniforms[index].shadow_z_far_inv = 1.0 / clight->shadow.z_far; + state.light_uniforms[index].shadow_y_ofs = clight->shadow.y_offset; + } else { + state.light_uniforms[index].shadow_pixel_size = 1.0; + state.light_uniforms[index].shadow_z_far_inv = 1.0; + state.light_uniforms[index].shadow_y_ofs = 0; + } + + state.light_uniforms[index].flags = l->blend_mode << LIGHT_FLAGS_BLEND_SHIFT; + state.light_uniforms[index].flags |= l->shadow_filter << LIGHT_FLAGS_FILTER_SHIFT; + if (clight->shadow.enabled) { + state.light_uniforms[index].flags |= LIGHT_FLAGS_HAS_SHADOW; + } + + l->render_index_cache = index; + + index++; + l = l->next_ptr; + } + + light_count = index; + directional_light_count = light_count; + using_directional_lights = directional_light_count > 0; + } + + //setup lights if exist + + { + Light *l = p_light_list; + uint32_t index = light_count; + + while (l) { + if (index == state.max_lights_per_render) { + l->render_index_cache = -1; + l = l->next_ptr; + continue; + } + + CanvasLight *clight = canvas_light_owner.getornull(l->light_internal); + if (!clight) { //unused or invalid texture + l->render_index_cache = -1; + l = l->next_ptr; + ERR_CONTINUE(!clight); + } + Transform2D to_light_xform = (p_canvas_transform * l->light_shader_xform).affine_inverse(); + + Vector2 canvas_light_pos = p_canvas_transform.xform(l->xform.get_origin()); //convert light position to canvas coordinates, as all computation is done in canvas coords to avoid precision loss + state.light_uniforms[index].position[0] = canvas_light_pos.x; + state.light_uniforms[index].position[1] = canvas_light_pos.y; + + _update_transform_2d_to_mat2x4(to_light_xform, state.light_uniforms[index].matrix); + _update_transform_2d_to_mat2x4(l->xform_cache.affine_inverse(), state.light_uniforms[index].shadow_matrix); + + state.light_uniforms[index].height = l->height * (p_canvas_transform.elements[0].length() + p_canvas_transform.elements[1].length()) * 0.5; //approximate height conversion to the canvas size, since all calculations are done in canvas coords to avoid precision loss + for (int i = 0; i < 4; i++) { + state.light_uniforms[index].shadow_color[i] = uint8_t(CLAMP(int32_t(l->shadow_color[i] * 255.0), 0, 255)); + state.light_uniforms[index].color[i] = l->color[i]; + } + + state.light_uniforms[index].color[3] = l->energy; //use alpha for energy, so base color can go separate + + if (state.shadow_fb.is_valid()) { + state.light_uniforms[index].shadow_pixel_size = (1.0 / state.shadow_texture_size) * (1.0 + l->shadow_smooth); + state.light_uniforms[index].shadow_z_far_inv = 1.0 / clight->shadow.z_far; + state.light_uniforms[index].shadow_y_ofs = clight->shadow.y_offset; + } else { + state.light_uniforms[index].shadow_pixel_size = 1.0; + state.light_uniforms[index].shadow_z_far_inv = 1.0; + state.light_uniforms[index].shadow_y_ofs = 0; + } + + state.light_uniforms[index].flags = l->blend_mode << LIGHT_FLAGS_BLEND_SHIFT; + state.light_uniforms[index].flags |= l->shadow_filter << LIGHT_FLAGS_FILTER_SHIFT; + if (clight->shadow.enabled) { + state.light_uniforms[index].flags |= LIGHT_FLAGS_HAS_SHADOW; + } + + if (clight->texture.is_valid()) { + Rect2 atlas_rect = storage->decal_atlas_get_texture_rect(clight->texture); + state.light_uniforms[index].atlas_rect[0] = atlas_rect.position.x; + state.light_uniforms[index].atlas_rect[1] = atlas_rect.position.y; + state.light_uniforms[index].atlas_rect[2] = atlas_rect.size.width; + state.light_uniforms[index].atlas_rect[3] = atlas_rect.size.height; + + } else { + state.light_uniforms[index].atlas_rect[0] = 0; + state.light_uniforms[index].atlas_rect[1] = 0; + state.light_uniforms[index].atlas_rect[2] = 0; + state.light_uniforms[index].atlas_rect[3] = 0; + } + + l->render_index_cache = index; + + index++; + l = l->next_ptr; + } + + light_count = index; + } + + if (light_count > 0) { + RD::get_singleton()->buffer_update(state.lights_uniform_buffer, 0, sizeof(LightUniform) * light_count, &state.light_uniforms[0]); + } + + { + //update canvas state uniform buffer + State::Buffer state_buffer; + + Size2i ssize = storage->render_target_get_size(p_to_render_target); + + Transform screen_transform; + screen_transform.translate(-(ssize.width / 2.0f), -(ssize.height / 2.0f), 0.0f); + screen_transform.scale(Vector3(2.0f / ssize.width, 2.0f / ssize.height, 1.0f)); + _update_transform_to_mat4(screen_transform, state_buffer.screen_transform); + _update_transform_2d_to_mat4(p_canvas_transform, state_buffer.canvas_transform); + + Transform2D normal_transform = p_canvas_transform; + normal_transform.elements[0].normalize(); + normal_transform.elements[1].normalize(); + normal_transform.elements[2] = Vector2(); + _update_transform_2d_to_mat4(normal_transform, state_buffer.canvas_normal_transform); + + state_buffer.canvas_modulate[0] = p_modulate.r; + state_buffer.canvas_modulate[1] = p_modulate.g; + state_buffer.canvas_modulate[2] = p_modulate.b; + state_buffer.canvas_modulate[3] = p_modulate.a; + + Size2 render_target_size = storage->render_target_get_size(p_to_render_target); + state_buffer.screen_pixel_size[0] = 1.0 / render_target_size.x; + state_buffer.screen_pixel_size[1] = 1.0 / render_target_size.y; + + state_buffer.time = state.time; + state_buffer.use_pixel_snap = p_snap_2d_vertices_to_pixel; + + state_buffer.directional_light_count = directional_light_count; + + Vector2 canvas_scale = p_canvas_transform.get_scale(); + + state_buffer.sdf_to_screen[0] = render_target_size.width / canvas_scale.x; + state_buffer.sdf_to_screen[1] = render_target_size.height / canvas_scale.y; + + state_buffer.screen_to_sdf[0] = 1.0 / state_buffer.sdf_to_screen[0]; + state_buffer.screen_to_sdf[1] = 1.0 / state_buffer.sdf_to_screen[1]; + + Rect2 sdf_rect = storage->render_target_get_sdf_rect(p_to_render_target); + Rect2 sdf_tex_rect(sdf_rect.position / canvas_scale, sdf_rect.size / canvas_scale); + + state_buffer.sdf_to_tex[0] = 1.0 / sdf_tex_rect.size.width; + state_buffer.sdf_to_tex[1] = 1.0 / sdf_tex_rect.size.height; + state_buffer.sdf_to_tex[2] = -sdf_tex_rect.position.x / sdf_tex_rect.size.width; + state_buffer.sdf_to_tex[3] = -sdf_tex_rect.position.y / sdf_tex_rect.size.height; + + //print_line("w: " + itos(ssize.width) + " s: " + rtos(canvas_scale)); + state_buffer.tex_to_sdf = 1.0 / ((canvas_scale.x + canvas_scale.y) * 0.5); + + RD::get_singleton()->buffer_update(state.canvas_state_buffer, 0, sizeof(State::Buffer), &state_buffer); + } + + { //default filter/repeat + default_filter = p_default_filter; + default_repeat = p_default_repeat; + } + + //fill the list until rendering is possible. + bool material_screen_texture_found = false; + Item *ci = p_item_list; + Rect2 back_buffer_rect; + bool backbuffer_copy = false; + + Item *canvas_group_owner = nullptr; + + while (ci) { + if (ci->copy_back_buffer && canvas_group_owner == nullptr) { + backbuffer_copy = true; + + if (ci->copy_back_buffer->full) { + back_buffer_rect = Rect2(); + } else { + back_buffer_rect = ci->copy_back_buffer->rect; + } + } + + if (ci->material.is_valid()) { + MaterialData *md = (MaterialData *)storage->material_get_data(ci->material, RendererStorageRD::SHADER_TYPE_2D); + if (md && md->shader_data->valid) { + if (md->shader_data->uses_screen_texture && canvas_group_owner == nullptr) { + if (!material_screen_texture_found) { + backbuffer_copy = true; + back_buffer_rect = Rect2(); + } + } + + if (md->shader_data->uses_sdf) { + r_sdf_used = true; + } + if (md->last_frame != RendererCompositorRD::singleton->get_frame_number()) { + md->last_frame = RendererCompositorRD::singleton->get_frame_number(); + if (!RD::get_singleton()->uniform_set_is_valid(md->uniform_set)) { + // uniform set may be gone because a dependency was erased. In this case, it will happen + // if a texture is deleted, so just re-create it. + storage->material_force_update_textures(ci->material, RendererStorageRD::SHADER_TYPE_2D); + } + } + } + } + + if (ci->canvas_group_owner != nullptr) { + if (canvas_group_owner == nullptr) { + //Canvas group begins here, render until before this item + _render_items(p_to_render_target, item_count, canvas_transform_inverse, p_light_list); + item_count = 0; + + Rect2i group_rect = ci->canvas_group_owner->global_rect_cache; + + if (ci->canvas_group_owner->canvas_group->mode == RS::CANVAS_GROUP_MODE_OPAQUE) { + storage->render_target_copy_to_back_buffer(p_to_render_target, group_rect, false); + } else { + storage->render_target_clear_back_buffer(p_to_render_target, group_rect, Color(0, 0, 0, 0)); + } + + backbuffer_copy = false; + canvas_group_owner = ci->canvas_group_owner; //continue until owner found + } + + ci->canvas_group_owner = nullptr; //must be cleared + } + + if (ci == canvas_group_owner) { + _render_items(p_to_render_target, item_count, canvas_transform_inverse, p_light_list, true); + item_count = 0; + + if (ci->canvas_group->blur_mipmaps) { + storage->render_target_gen_back_buffer_mipmaps(p_to_render_target, ci->global_rect_cache); + } + + canvas_group_owner = nullptr; + } + + if (backbuffer_copy) { + //render anything pending, including clearing if no items + _render_items(p_to_render_target, item_count, canvas_transform_inverse, p_light_list); + item_count = 0; + + storage->render_target_copy_to_back_buffer(p_to_render_target, back_buffer_rect, true); + + backbuffer_copy = false; + material_screen_texture_found = true; //after a backbuffer copy, screen texture makes no further copies + } + + items[item_count++] = ci; + + if (!ci->next || item_count == MAX_RENDER_ITEMS - 1) { + _render_items(p_to_render_target, item_count, canvas_transform_inverse, p_light_list); + //then reset + item_count = 0; + } + + ci = ci->next; + } +} + +RID RendererCanvasRenderRD::light_create() { + CanvasLight canvas_light; + return canvas_light_owner.make_rid(canvas_light); +} + +void RendererCanvasRenderRD::light_set_texture(RID p_rid, RID p_texture) { + CanvasLight *cl = canvas_light_owner.getornull(p_rid); + ERR_FAIL_COND(!cl); + if (cl->texture == p_texture) { + return; + } + if (cl->texture.is_valid()) { + storage->texture_remove_from_decal_atlas(cl->texture); + } + cl->texture = p_texture; + + if (cl->texture.is_valid()) { + storage->texture_add_to_decal_atlas(cl->texture); + } +} + +void RendererCanvasRenderRD::light_set_use_shadow(RID p_rid, bool p_enable) { + CanvasLight *cl = canvas_light_owner.getornull(p_rid); + ERR_FAIL_COND(!cl); + + cl->shadow.enabled = p_enable; +} + +void RendererCanvasRenderRD::_update_shadow_atlas() { + if (state.shadow_fb == RID()) { + //ah, we lack the shadow texture.. + RD::get_singleton()->free(state.shadow_texture); //erase placeholder + + Vector<RID> fb_textures; + + { //texture + RD::TextureFormat tf; + tf.texture_type = RD::TEXTURE_TYPE_2D; + tf.width = state.shadow_texture_size; + tf.height = state.max_lights_per_render * 2; + tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; + tf.format = RD::DATA_FORMAT_R32_SFLOAT; + + state.shadow_texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); + fb_textures.push_back(state.shadow_texture); + } + { + RD::TextureFormat tf; + tf.texture_type = RD::TEXTURE_TYPE_2D; + tf.width = state.shadow_texture_size; + tf.height = state.max_lights_per_render * 2; + tf.usage_bits = RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + tf.format = RD::DATA_FORMAT_D32_SFLOAT; + //chunks to write + state.shadow_depth_texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); + fb_textures.push_back(state.shadow_depth_texture); + } + + state.shadow_fb = RD::get_singleton()->framebuffer_create(fb_textures); + } +} +void RendererCanvasRenderRD::light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders) { + CanvasLight *cl = canvas_light_owner.getornull(p_rid); + ERR_FAIL_COND(!cl->shadow.enabled); + + _update_shadow_atlas(); + + cl->shadow.z_far = p_far; + cl->shadow.y_offset = float(p_shadow_index * 2 + 1) / float(state.max_lights_per_render * 2); + Vector<Color> cc; + cc.push_back(Color(p_far, p_far, p_far, 1.0)); + + for (int i = 0; i < 4; i++) { + //make sure it remains orthogonal, makes easy to read angle later + + //light.basis.scale(Vector3(to_light.elements[0].length(),to_light.elements[1].length(),1)); + + Rect2i rect((state.shadow_texture_size / 4) * i, p_shadow_index * 2, (state.shadow_texture_size / 4), 2); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(state.shadow_fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_DISCARD, cc, 1.0, 0, rect); + + CameraMatrix projection; + { + real_t fov = 90; + real_t nearp = p_near; + real_t farp = p_far; + real_t aspect = 1.0; + + real_t ymax = nearp * Math::tan(Math::deg2rad(fov * 0.5)); + real_t ymin = -ymax; + real_t xmin = ymin * aspect; + real_t xmax = ymax * aspect; + + projection.set_frustum(xmin, xmax, ymin, ymax, nearp, farp); + } + + Vector3 cam_target = Basis(Vector3(0, 0, Math_TAU * ((i + 3) / 4.0))).xform(Vector3(0, 1, 0)); + projection = projection * CameraMatrix(Transform().looking_at(cam_target, Vector3(0, 0, -1)).affine_inverse()); + + ShadowRenderPushConstant push_constant; + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 4; x++) { + push_constant.projection[y * 4 + x] = projection.matrix[y][x]; + } + } + static const Vector2 directions[4] = { Vector2(1, 0), Vector2(0, 1), Vector2(-1, 0), Vector2(0, -1) }; + push_constant.direction[0] = directions[i].x; + push_constant.direction[1] = directions[i].y; + push_constant.z_far = p_far; + push_constant.pad = 0; + + /*if (i == 0) + *p_xform_cache = projection;*/ + + LightOccluderInstance *instance = p_occluders; + + while (instance) { + OccluderPolygon *co = occluder_polygon_owner.getornull(instance->occluder); + + if (!co || co->index_array.is_null() || !(p_light_mask & instance->light_mask)) { + instance = instance->next; + continue; + } + + _update_transform_2d_to_mat2x4(p_light_xform * instance->xform_cache, push_constant.modelview); + + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, shadow_render.render_pipelines[co->cull_mode]); + RD::get_singleton()->draw_list_bind_vertex_array(draw_list, co->vertex_array); + RD::get_singleton()->draw_list_bind_index_array(draw_list, co->index_array); + RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(ShadowRenderPushConstant)); + + RD::get_singleton()->draw_list_draw(draw_list, true); + + instance = instance->next; + } + + RD::get_singleton()->draw_list_end(); + } +} + +void RendererCanvasRenderRD::light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) { + CanvasLight *cl = canvas_light_owner.getornull(p_rid); + ERR_FAIL_COND(!cl->shadow.enabled); + + _update_shadow_atlas(); + + Vector2 light_dir = p_light_xform.elements[1].normalized(); + + Vector2 center = p_clip_rect.position + p_clip_rect.size * 0.5; + + float to_edge_distance = ABS(light_dir.dot(p_clip_rect.get_support(light_dir)) - light_dir.dot(center)); + + Vector2 from_pos = center - light_dir * (to_edge_distance + p_cull_distance); + float distance = to_edge_distance * 2.0 + p_cull_distance; + float half_size = p_clip_rect.size.length() * 0.5; //shadow length, must keep this no matter the angle + + cl->shadow.z_far = distance; + cl->shadow.y_offset = float(p_shadow_index * 2 + 1) / float(state.max_lights_per_render * 2); + + Transform2D to_light_xform; + + to_light_xform[2] = from_pos; + to_light_xform[1] = light_dir; + to_light_xform[0] = -light_dir.orthogonal(); + + to_light_xform.invert(); + + Vector<Color> cc; + cc.push_back(Color(1, 1, 1, 1)); + + Rect2i rect(0, p_shadow_index * 2, state.shadow_texture_size, 2); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(state.shadow_fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_DISCARD, cc, 1.0, 0, rect); + + CameraMatrix projection; + projection.set_orthogonal(-half_size, half_size, -0.5, 0.5, 0.0, distance); + projection = projection * CameraMatrix(Transform().looking_at(Vector3(0, 1, 0), Vector3(0, 0, -1)).affine_inverse()); + + ShadowRenderPushConstant push_constant; + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 4; x++) { + push_constant.projection[y * 4 + x] = projection.matrix[y][x]; + } + } + + push_constant.direction[0] = 0.0; + push_constant.direction[1] = 1.0; + push_constant.z_far = distance; + push_constant.pad = 0; + + LightOccluderInstance *instance = p_occluders; + + while (instance) { + OccluderPolygon *co = occluder_polygon_owner.getornull(instance->occluder); + + if (!co || co->index_array.is_null() || !(p_light_mask & instance->light_mask)) { + instance = instance->next; + continue; + } + + _update_transform_2d_to_mat2x4(to_light_xform * instance->xform_cache, push_constant.modelview); + + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, shadow_render.render_pipelines[co->cull_mode]); + RD::get_singleton()->draw_list_bind_vertex_array(draw_list, co->vertex_array); + RD::get_singleton()->draw_list_bind_index_array(draw_list, co->index_array); + RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(ShadowRenderPushConstant)); + + RD::get_singleton()->draw_list_draw(draw_list, true); + + instance = instance->next; + } + + RD::get_singleton()->draw_list_end(); + + Transform2D to_shadow; + to_shadow.elements[0].x = 1.0 / -(half_size * 2.0); + to_shadow.elements[2].x = 0.5; + + cl->shadow.directional_xform = to_shadow * to_light_xform; +} + +void RendererCanvasRenderRD::render_sdf(RID p_render_target, LightOccluderInstance *p_occluders) { + RID fb = storage->render_target_get_sdf_framebuffer(p_render_target); + Rect2i rect = storage->render_target_get_sdf_rect(p_render_target); + + Transform2D to_sdf; + to_sdf.elements[0] *= rect.size.width; + to_sdf.elements[1] *= rect.size.height; + to_sdf.elements[2] = rect.position; + + Transform2D to_clip; + to_clip.elements[0] *= 2.0; + to_clip.elements[1] *= 2.0; + to_clip.elements[2] = -Vector2(1.0, 1.0); + + to_clip = to_clip * to_sdf.affine_inverse(); + + Vector<Color> cc; + cc.push_back(Color(0, 0, 0, 0)); + + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_DISCARD, cc); + + CameraMatrix projection; + + ShadowRenderPushConstant push_constant; + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 4; x++) { + push_constant.projection[y * 4 + x] = projection.matrix[y][x]; + } + } + + push_constant.direction[0] = 0.0; + push_constant.direction[1] = 0.0; + push_constant.z_far = 0; + push_constant.pad = 0; + + LightOccluderInstance *instance = p_occluders; + + while (instance) { + OccluderPolygon *co = occluder_polygon_owner.getornull(instance->occluder); + + if (!co || co->sdf_index_array.is_null()) { + instance = instance->next; + continue; + } + + _update_transform_2d_to_mat2x4(to_clip * instance->xform_cache, push_constant.modelview); + + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, shadow_render.sdf_render_pipelines[co->sdf_is_lines ? SHADOW_RENDER_SDF_LINES : SHADOW_RENDER_SDF_TRIANGLES]); + RD::get_singleton()->draw_list_bind_vertex_array(draw_list, co->sdf_vertex_array); + RD::get_singleton()->draw_list_bind_index_array(draw_list, co->sdf_index_array); + RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(ShadowRenderPushConstant)); + + RD::get_singleton()->draw_list_draw(draw_list, true); + + instance = instance->next; + } + + RD::get_singleton()->draw_list_end(); + + storage->render_target_sdf_process(p_render_target); //done rendering, process it +} + +RID RendererCanvasRenderRD::occluder_polygon_create() { + OccluderPolygon occluder; + occluder.line_point_count = 0; + occluder.sdf_point_count = 0; + occluder.sdf_index_count = 0; + occluder.cull_mode = RS::CANVAS_OCCLUDER_POLYGON_CULL_DISABLED; + return occluder_polygon_owner.make_rid(occluder); +} + +void RendererCanvasRenderRD::occluder_polygon_set_shape(RID p_occluder, const Vector<Vector2> &p_points, bool p_closed) { + OccluderPolygon *oc = occluder_polygon_owner.getornull(p_occluder); + ERR_FAIL_COND(!oc); + + Vector<Vector2> lines; + + if (p_points.size()) { + int lc = p_points.size() * 2; + + lines.resize(lc - (p_closed ? 0 : 2)); + { + Vector2 *w = lines.ptrw(); + const Vector2 *r = p_points.ptr(); + + int max = lc / 2; + if (!p_closed) { + max--; + } + for (int i = 0; i < max; i++) { + Vector2 a = r[i]; + Vector2 b = r[(i + 1) % (lc / 2)]; + w[i * 2 + 0] = a; + w[i * 2 + 1] = b; + } + } + } + + if (oc->line_point_count != lines.size() && oc->vertex_array.is_valid()) { + RD::get_singleton()->free(oc->vertex_array); + RD::get_singleton()->free(oc->vertex_buffer); + RD::get_singleton()->free(oc->index_array); + RD::get_singleton()->free(oc->index_buffer); + + oc->vertex_array = RID(); + oc->vertex_buffer = RID(); + oc->index_array = RID(); + oc->index_buffer = RID(); + + oc->line_point_count = lines.size(); + } + + if (lines.size()) { + Vector<uint8_t> geometry; + Vector<uint8_t> indices; + int lc = lines.size(); + + geometry.resize(lc * 6 * sizeof(float)); + indices.resize(lc * 3 * sizeof(uint16_t)); + + { + uint8_t *vw = geometry.ptrw(); + float *vwptr = (float *)vw; + uint8_t *iw = indices.ptrw(); + uint16_t *iwptr = (uint16_t *)iw; + + const Vector2 *lr = lines.ptr(); + + const int POLY_HEIGHT = 16384; + + for (int i = 0; i < lc / 2; i++) { + vwptr[i * 12 + 0] = lr[i * 2 + 0].x; + vwptr[i * 12 + 1] = lr[i * 2 + 0].y; + vwptr[i * 12 + 2] = POLY_HEIGHT; + + vwptr[i * 12 + 3] = lr[i * 2 + 1].x; + vwptr[i * 12 + 4] = lr[i * 2 + 1].y; + vwptr[i * 12 + 5] = POLY_HEIGHT; + + vwptr[i * 12 + 6] = lr[i * 2 + 1].x; + vwptr[i * 12 + 7] = lr[i * 2 + 1].y; + vwptr[i * 12 + 8] = -POLY_HEIGHT; + + vwptr[i * 12 + 9] = lr[i * 2 + 0].x; + vwptr[i * 12 + 10] = lr[i * 2 + 0].y; + vwptr[i * 12 + 11] = -POLY_HEIGHT; + + iwptr[i * 6 + 0] = i * 4 + 0; + iwptr[i * 6 + 1] = i * 4 + 1; + iwptr[i * 6 + 2] = i * 4 + 2; + + iwptr[i * 6 + 3] = i * 4 + 2; + iwptr[i * 6 + 4] = i * 4 + 3; + iwptr[i * 6 + 5] = i * 4 + 0; + } + } + + //if same buffer len is being set, just use BufferSubData to avoid a pipeline flush + + if (oc->vertex_array.is_null()) { + //create from scratch + //vertices + oc->vertex_buffer = RD::get_singleton()->vertex_buffer_create(lc * 6 * sizeof(real_t), geometry); + + Vector<RID> buffer; + buffer.push_back(oc->vertex_buffer); + oc->vertex_array = RD::get_singleton()->vertex_array_create(4 * lc / 2, shadow_render.vertex_format, buffer); + //indices + + oc->index_buffer = RD::get_singleton()->index_buffer_create(3 * lc, RD::INDEX_BUFFER_FORMAT_UINT16, indices); + oc->index_array = RD::get_singleton()->index_array_create(oc->index_buffer, 0, 3 * lc); + + } else { + //update existing + const uint8_t *vr = geometry.ptr(); + RD::get_singleton()->buffer_update(oc->vertex_buffer, 0, geometry.size(), vr); + const uint8_t *ir = indices.ptr(); + RD::get_singleton()->buffer_update(oc->index_buffer, 0, indices.size(), ir); + } + } + + // sdf + + Vector<int> sdf_indices; + + if (p_points.size()) { + if (p_closed) { + sdf_indices = Geometry2D::triangulate_polygon(p_points); + oc->sdf_is_lines = false; + } else { + int max = p_points.size(); + sdf_indices.resize(max * 2); + + int *iw = sdf_indices.ptrw(); + for (int i = 0; i < max; i++) { + iw[i * 2 + 0] = i; + iw[i * 2 + 1] = (i + 1) % max; + } + oc->sdf_is_lines = true; + } + } + + if (oc->sdf_index_count != sdf_indices.size() && oc->sdf_point_count != p_points.size() && oc->sdf_vertex_array.is_valid()) { + RD::get_singleton()->free(oc->sdf_vertex_array); + RD::get_singleton()->free(oc->sdf_vertex_buffer); + RD::get_singleton()->free(oc->sdf_index_array); + RD::get_singleton()->free(oc->sdf_index_buffer); + + oc->sdf_vertex_array = RID(); + oc->sdf_vertex_buffer = RID(); + oc->sdf_index_array = RID(); + oc->sdf_index_buffer = RID(); + + oc->sdf_index_count = sdf_indices.size(); + oc->sdf_point_count = p_points.size(); + + oc->sdf_is_lines = false; + } + + if (sdf_indices.size()) { + if (oc->sdf_vertex_array.is_null()) { + //create from scratch + //vertices + oc->sdf_vertex_buffer = RD::get_singleton()->vertex_buffer_create(p_points.size() * 2 * sizeof(real_t), p_points.to_byte_array()); + oc->sdf_index_buffer = RD::get_singleton()->index_buffer_create(sdf_indices.size(), RD::INDEX_BUFFER_FORMAT_UINT32, sdf_indices.to_byte_array()); + oc->sdf_index_array = RD::get_singleton()->index_array_create(oc->sdf_index_buffer, 0, sdf_indices.size()); + + Vector<RID> buffer; + buffer.push_back(oc->sdf_vertex_buffer); + oc->sdf_vertex_array = RD::get_singleton()->vertex_array_create(p_points.size(), shadow_render.sdf_vertex_format, buffer); + //indices + + } else { + //update existing + RD::get_singleton()->buffer_update(oc->vertex_buffer, 0, sizeof(real_t) * 2 * p_points.size(), p_points.ptr()); + RD::get_singleton()->buffer_update(oc->index_buffer, 0, sdf_indices.size() * sizeof(int32_t), sdf_indices.ptr()); + } + } +} + +void RendererCanvasRenderRD::occluder_polygon_set_cull_mode(RID p_occluder, RS::CanvasOccluderPolygonCullMode p_mode) { + OccluderPolygon *oc = occluder_polygon_owner.getornull(p_occluder); + ERR_FAIL_COND(!oc); + oc->cull_mode = p_mode; +} + +void RendererCanvasRenderRD::ShaderData::set_code(const String &p_code) { + //compile + + code = p_code; + valid = false; + ubo_size = 0; + uniforms.clear(); + uses_screen_texture = false; + uses_sdf = false; + + if (code == String()) { + return; //just invalid, but no error + } + + ShaderCompilerRD::GeneratedCode gen_code; + + int blend_mode = BLEND_MODE_MIX; + uses_screen_texture = false; + + ShaderCompilerRD::IdentifierActions actions; + + actions.render_mode_values["blend_add"] = Pair<int *, int>(&blend_mode, BLEND_MODE_ADD); + actions.render_mode_values["blend_mix"] = Pair<int *, int>(&blend_mode, BLEND_MODE_MIX); + actions.render_mode_values["blend_sub"] = Pair<int *, int>(&blend_mode, BLEND_MODE_SUB); + actions.render_mode_values["blend_mul"] = Pair<int *, int>(&blend_mode, BLEND_MODE_MUL); + actions.render_mode_values["blend_premul_alpha"] = Pair<int *, int>(&blend_mode, BLEND_MODE_PMALPHA); + actions.render_mode_values["blend_disabled"] = Pair<int *, int>(&blend_mode, BLEND_MODE_DISABLED); + + actions.usage_flag_pointers["SCREEN_TEXTURE"] = &uses_screen_texture; + actions.usage_flag_pointers["texture_sdf"] = &uses_sdf; + + actions.uniforms = &uniforms; + + RendererCanvasRenderRD *canvas_singleton = (RendererCanvasRenderRD *)RendererCanvasRender::singleton; + + Error err = canvas_singleton->shader.compiler.compile(RS::SHADER_CANVAS_ITEM, code, &actions, path, gen_code); + + ERR_FAIL_COND(err != OK); + + if (version.is_null()) { + version = canvas_singleton->shader.canvas_shader.version_create(); + } + +#if 0 + print_line("**compiling shader:"); + print_line("**defines:\n"); + for (int i = 0; i < gen_code.defines.size(); i++) { + print_line(gen_code.defines[i]); + } + print_line("\n**uniforms:\n" + gen_code.uniforms); + print_line("\n**vertex_globals:\n" + gen_code.vertex_global); + print_line("\n**vertex_code:\n" + gen_code.vertex); + print_line("\n**fragment_globals:\n" + gen_code.fragment_global); + print_line("\n**fragment_code:\n" + gen_code.fragment); + print_line("\n**light_code:\n" + gen_code.light); +#endif + canvas_singleton->shader.canvas_shader.version_set_code(version, gen_code.uniforms, gen_code.vertex_global, gen_code.vertex, gen_code.fragment_global, gen_code.light, gen_code.fragment, gen_code.defines); + ERR_FAIL_COND(!canvas_singleton->shader.canvas_shader.version_is_valid(version)); + + ubo_size = gen_code.uniform_total_size; + ubo_offsets = gen_code.uniform_offsets; + texture_uniforms = gen_code.texture_uniforms; + + //update them pipelines + + RD::PipelineColorBlendState::Attachment attachment; + + switch (blend_mode) { + case BLEND_MODE_DISABLED: { + // nothing to do here, disabled by default + + } break; + case BLEND_MODE_MIX: { + attachment.enable_blend = true; + attachment.color_blend_op = RD::BLEND_OP_ADD; + attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; + attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + + attachment.alpha_blend_op = RD::BLEND_OP_ADD; + attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + + } break; + case BLEND_MODE_ADD: { + attachment.enable_blend = true; + attachment.alpha_blend_op = RD::BLEND_OP_ADD; + attachment.color_blend_op = RD::BLEND_OP_ADD; + attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; + attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE; + attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; + attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + + } break; + case BLEND_MODE_SUB: { + attachment.enable_blend = true; + attachment.alpha_blend_op = RD::BLEND_OP_SUBTRACT; + attachment.color_blend_op = RD::BLEND_OP_SUBTRACT; + attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; + attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE; + attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; + attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + + } break; + case BLEND_MODE_MUL: { + attachment.enable_blend = true; + attachment.alpha_blend_op = RD::BLEND_OP_ADD; + attachment.color_blend_op = RD::BLEND_OP_ADD; + attachment.src_color_blend_factor = RD::BLEND_FACTOR_DST_COLOR; + attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ZERO; + attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_DST_ALPHA; + attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ZERO; + + } break; + case BLEND_MODE_PMALPHA: { + attachment.enable_blend = true; + attachment.alpha_blend_op = RD::BLEND_OP_ADD; + attachment.color_blend_op = RD::BLEND_OP_ADD; + attachment.src_color_blend_factor = RD::BLEND_FACTOR_ONE; + attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + + } break; + } + + RD::PipelineColorBlendState blend_state; + blend_state.attachments.push_back(attachment); + + //update pipelines + + for (int i = 0; i < PIPELINE_LIGHT_MODE_MAX; i++) { + for (int j = 0; j < PIPELINE_VARIANT_MAX; j++) { + RD::RenderPrimitive primitive[PIPELINE_VARIANT_MAX] = { + RD::RENDER_PRIMITIVE_TRIANGLES, + RD::RENDER_PRIMITIVE_TRIANGLES, + RD::RENDER_PRIMITIVE_TRIANGLES, + RD::RENDER_PRIMITIVE_LINES, + RD::RENDER_PRIMITIVE_POINTS, + RD::RENDER_PRIMITIVE_TRIANGLES, + RD::RENDER_PRIMITIVE_TRIANGLE_STRIPS, + RD::RENDER_PRIMITIVE_LINES, + RD::RENDER_PRIMITIVE_LINESTRIPS, + RD::RENDER_PRIMITIVE_POINTS, + }; + + ShaderVariant shader_variants[PIPELINE_LIGHT_MODE_MAX][PIPELINE_VARIANT_MAX] = { + { //non lit + SHADER_VARIANT_QUAD, + SHADER_VARIANT_NINEPATCH, + SHADER_VARIANT_PRIMITIVE, + SHADER_VARIANT_PRIMITIVE, + SHADER_VARIANT_PRIMITIVE_POINTS, + SHADER_VARIANT_ATTRIBUTES, + SHADER_VARIANT_ATTRIBUTES, + SHADER_VARIANT_ATTRIBUTES, + SHADER_VARIANT_ATTRIBUTES, + SHADER_VARIANT_ATTRIBUTES_POINTS }, + { //lit + SHADER_VARIANT_QUAD_LIGHT, + SHADER_VARIANT_NINEPATCH_LIGHT, + SHADER_VARIANT_PRIMITIVE_LIGHT, + SHADER_VARIANT_PRIMITIVE_LIGHT, + SHADER_VARIANT_PRIMITIVE_POINTS_LIGHT, + SHADER_VARIANT_ATTRIBUTES_LIGHT, + SHADER_VARIANT_ATTRIBUTES_LIGHT, + SHADER_VARIANT_ATTRIBUTES_LIGHT, + SHADER_VARIANT_ATTRIBUTES_LIGHT, + SHADER_VARIANT_ATTRIBUTES_POINTS_LIGHT }, + }; + + RID shader_variant = canvas_singleton->shader.canvas_shader.version_get_shader(version, shader_variants[i][j]); + pipeline_variants.variants[i][j].setup(shader_variant, primitive[j], RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state, 0); + } + } + + valid = true; +} + +void RendererCanvasRenderRD::ShaderData::set_default_texture_param(const StringName &p_name, RID p_texture) { + if (!p_texture.is_valid()) { + default_texture_params.erase(p_name); + } else { + default_texture_params[p_name] = p_texture; + } +} + +void RendererCanvasRenderRD::ShaderData::get_param_list(List<PropertyInfo> *p_param_list) const { + Map<int, StringName> order; + + for (Map<StringName, ShaderLanguage::ShaderNode::Uniform>::Element *E = uniforms.front(); E; E = E->next()) { + if (E->get().scope != ShaderLanguage::ShaderNode::Uniform::SCOPE_LOCAL) { + continue; + } + if (E->get().texture_order >= 0) { + order[E->get().texture_order + 100000] = E->key(); + } else { + order[E->get().order] = E->key(); + } + } + + for (Map<int, StringName>::Element *E = order.front(); E; E = E->next()) { + PropertyInfo pi = ShaderLanguage::uniform_to_property_info(uniforms[E->get()]); + pi.name = E->get(); + p_param_list->push_back(pi); + } +} + +void RendererCanvasRenderRD::ShaderData::get_instance_param_list(List<RendererStorage::InstanceShaderParam> *p_param_list) const { + for (Map<StringName, ShaderLanguage::ShaderNode::Uniform>::Element *E = uniforms.front(); E; E = E->next()) { + if (E->get().scope != ShaderLanguage::ShaderNode::Uniform::SCOPE_INSTANCE) { + continue; + } + + RendererStorage::InstanceShaderParam p; + p.info = ShaderLanguage::uniform_to_property_info(E->get()); + p.info.name = E->key(); //supply name + p.index = E->get().instance_index; + p.default_value = ShaderLanguage::constant_value_to_variant(E->get().default_value, E->get().type, E->get().hint); + p_param_list->push_back(p); + } +} + +bool RendererCanvasRenderRD::ShaderData::is_param_texture(const StringName &p_param) const { + if (!uniforms.has(p_param)) { + return false; + } + + return uniforms[p_param].texture_order >= 0; +} + +bool RendererCanvasRenderRD::ShaderData::is_animated() const { + return false; +} + +bool RendererCanvasRenderRD::ShaderData::casts_shadows() const { + return false; +} + +Variant RendererCanvasRenderRD::ShaderData::get_default_parameter(const StringName &p_parameter) const { + if (uniforms.has(p_parameter)) { + ShaderLanguage::ShaderNode::Uniform uniform = uniforms[p_parameter]; + Vector<ShaderLanguage::ConstantNode::Value> default_value = uniform.default_value; + return ShaderLanguage::constant_value_to_variant(default_value, uniform.type, uniform.hint); + } + return Variant(); +} + +RS::ShaderNativeSourceCode RendererCanvasRenderRD::ShaderData::get_native_source_code() const { + RendererCanvasRenderRD *canvas_singleton = (RendererCanvasRenderRD *)RendererCanvasRender::singleton; + return canvas_singleton->shader.canvas_shader.version_get_native_source_code(version); +} + +RendererCanvasRenderRD::ShaderData::ShaderData() { + valid = false; + uses_screen_texture = false; + uses_sdf = false; +} + +RendererCanvasRenderRD::ShaderData::~ShaderData() { + RendererCanvasRenderRD *canvas_singleton = (RendererCanvasRenderRD *)RendererCanvasRender::singleton; + ERR_FAIL_COND(!canvas_singleton); + //pipeline variants will clear themselves if shader is gone + if (version.is_valid()) { + canvas_singleton->shader.canvas_shader.version_free(version); + } +} + +RendererStorageRD::ShaderData *RendererCanvasRenderRD::_create_shader_func() { + ShaderData *shader_data = memnew(ShaderData); + return shader_data; +} + +void RendererCanvasRenderRD::MaterialData::update_parameters(const Map<StringName, Variant> &p_parameters, bool p_uniform_dirty, bool p_textures_dirty) { + RendererCanvasRenderRD *canvas_singleton = (RendererCanvasRenderRD *)RendererCanvasRender::singleton; + + if ((uint32_t)ubo_data.size() != shader_data->ubo_size) { + p_uniform_dirty = true; + if (uniform_buffer.is_valid()) { + RD::get_singleton()->free(uniform_buffer); + uniform_buffer = RID(); + } + + ubo_data.resize(shader_data->ubo_size); + if (ubo_data.size()) { + uniform_buffer = RD::get_singleton()->uniform_buffer_create(ubo_data.size()); + memset(ubo_data.ptrw(), 0, ubo_data.size()); //clear + } + + //clear previous uniform set + if (uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + RD::get_singleton()->free(uniform_set); + uniform_set = RID(); + } + } + + //check whether buffer changed + if (p_uniform_dirty && ubo_data.size()) { + update_uniform_buffer(shader_data->uniforms, shader_data->ubo_offsets.ptr(), p_parameters, ubo_data.ptrw(), ubo_data.size(), false); + RD::get_singleton()->buffer_update(uniform_buffer, 0, ubo_data.size(), ubo_data.ptrw()); + } + + uint32_t tex_uniform_count = shader_data->texture_uniforms.size(); + + if ((uint32_t)texture_cache.size() != tex_uniform_count) { + texture_cache.resize(tex_uniform_count); + p_textures_dirty = true; + + //clear previous uniform set + if (uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + RD::get_singleton()->free(uniform_set); + uniform_set = RID(); + } + } + + if (p_textures_dirty && tex_uniform_count) { + update_textures(p_parameters, shader_data->default_texture_params, shader_data->texture_uniforms, texture_cache.ptrw(), false); + } + + if (shader_data->ubo_size == 0) { + // This material does not require an uniform set, so don't create it. + return; + } + + if (!p_textures_dirty && uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + //no reason to update uniform set, only UBO (or nothing) was needed to update + return; + } + + Vector<RD::Uniform> uniforms; + + { + if (shader_data->ubo_size) { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 0; + u.ids.push_back(uniform_buffer); + uniforms.push_back(u); + } + + const RID *textures = texture_cache.ptrw(); + for (uint32_t i = 0; i < tex_uniform_count; i++) { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 1 + i; + u.ids.push_back(textures[i]); + uniforms.push_back(u); + } + } + + uniform_set = RD::get_singleton()->uniform_set_create(uniforms, canvas_singleton->shader.canvas_shader.version_get_shader(shader_data->version, 0), MATERIAL_UNIFORM_SET); +} + +RendererCanvasRenderRD::MaterialData::~MaterialData() { + if (uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + RD::get_singleton()->free(uniform_set); + } + + if (uniform_buffer.is_valid()) { + RD::get_singleton()->free(uniform_buffer); + } +} + +RendererStorageRD::MaterialData *RendererCanvasRenderRD::_create_material_func(ShaderData *p_shader) { + MaterialData *material_data = memnew(MaterialData); + material_data->shader_data = p_shader; + material_data->last_frame = false; + //update will happen later anyway so do nothing. + return material_data; +} + +void RendererCanvasRenderRD::set_time(double p_time) { + state.time = p_time; +} + +void RendererCanvasRenderRD::update() { +} + +RendererCanvasRenderRD::RendererCanvasRenderRD(RendererStorageRD *p_storage) { + storage = p_storage; + + { //create default samplers + + default_samplers.default_filter = RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR; + default_samplers.default_repeat = RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED; + } + + { //shader variants + + String global_defines; + + uint32_t uniform_max_size = RD::get_singleton()->limit_get(RD::LIMIT_MAX_UNIFORM_BUFFER_SIZE); + if (uniform_max_size < 65536) { + //Yes, you guessed right, ARM again + state.max_lights_per_render = 64; + global_defines += "#define MAX_LIGHTS 64\n"; + } else { + state.max_lights_per_render = DEFAULT_MAX_LIGHTS_PER_RENDER; + global_defines += "#define MAX_LIGHTS " + itos(DEFAULT_MAX_LIGHTS_PER_RENDER) + "\n"; + } + + state.light_uniforms = memnew_arr(LightUniform, state.max_lights_per_render); + Vector<String> variants; + //non light variants + variants.push_back(""); //none by default is first variant + variants.push_back("#define USE_NINEPATCH\n"); //ninepatch is the second variant + variants.push_back("#define USE_PRIMITIVE\n"); //primitive is the third + variants.push_back("#define USE_PRIMITIVE\n#define USE_POINT_SIZE\n"); //points need point size + variants.push_back("#define USE_ATTRIBUTES\n"); // attributes for vertex arrays + variants.push_back("#define USE_ATTRIBUTES\n#define USE_POINT_SIZE\n"); //attributes with point size + //light variants + variants.push_back("#define USE_LIGHTING\n"); //none by default is first variant + variants.push_back("#define USE_LIGHTING\n#define USE_NINEPATCH\n"); //ninepatch is the second variant + variants.push_back("#define USE_LIGHTING\n#define USE_PRIMITIVE\n"); //primitive is the third + variants.push_back("#define USE_LIGHTING\n#define USE_PRIMITIVE\n#define USE_POINT_SIZE\n"); //points need point size + variants.push_back("#define USE_LIGHTING\n#define USE_ATTRIBUTES\n"); // attributes for vertex arrays + variants.push_back("#define USE_LIGHTING\n#define USE_ATTRIBUTES\n#define USE_POINT_SIZE\n"); //attributes with point size + + shader.canvas_shader.initialize(variants, global_defines); + + shader.default_version = shader.canvas_shader.version_create(); + shader.default_version_rd_shader = shader.canvas_shader.version_get_shader(shader.default_version, SHADER_VARIANT_QUAD); + + RD::PipelineColorBlendState blend_state; + RD::PipelineColorBlendState::Attachment blend_attachment; + + blend_attachment.enable_blend = true; + blend_attachment.color_blend_op = RD::BLEND_OP_ADD; + blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; + blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + + blend_attachment.alpha_blend_op = RD::BLEND_OP_ADD; + blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + + blend_state.attachments.push_back(blend_attachment); + + for (int i = 0; i < PIPELINE_LIGHT_MODE_MAX; i++) { + for (int j = 0; j < PIPELINE_VARIANT_MAX; j++) { + RD::RenderPrimitive primitive[PIPELINE_VARIANT_MAX] = { + RD::RENDER_PRIMITIVE_TRIANGLES, + RD::RENDER_PRIMITIVE_TRIANGLES, + RD::RENDER_PRIMITIVE_TRIANGLES, + RD::RENDER_PRIMITIVE_LINES, + RD::RENDER_PRIMITIVE_POINTS, + RD::RENDER_PRIMITIVE_TRIANGLES, + RD::RENDER_PRIMITIVE_TRIANGLE_STRIPS, + RD::RENDER_PRIMITIVE_LINES, + RD::RENDER_PRIMITIVE_LINESTRIPS, + RD::RENDER_PRIMITIVE_POINTS, + }; + + ShaderVariant shader_variants[PIPELINE_LIGHT_MODE_MAX][PIPELINE_VARIANT_MAX] = { + { //non lit + SHADER_VARIANT_QUAD, + SHADER_VARIANT_NINEPATCH, + SHADER_VARIANT_PRIMITIVE, + SHADER_VARIANT_PRIMITIVE, + SHADER_VARIANT_PRIMITIVE_POINTS, + SHADER_VARIANT_ATTRIBUTES, + SHADER_VARIANT_ATTRIBUTES, + SHADER_VARIANT_ATTRIBUTES, + SHADER_VARIANT_ATTRIBUTES, + SHADER_VARIANT_ATTRIBUTES_POINTS }, + { //lit + SHADER_VARIANT_QUAD_LIGHT, + SHADER_VARIANT_NINEPATCH_LIGHT, + SHADER_VARIANT_PRIMITIVE_LIGHT, + SHADER_VARIANT_PRIMITIVE_LIGHT, + SHADER_VARIANT_PRIMITIVE_POINTS_LIGHT, + SHADER_VARIANT_ATTRIBUTES_LIGHT, + SHADER_VARIANT_ATTRIBUTES_LIGHT, + SHADER_VARIANT_ATTRIBUTES_LIGHT, + SHADER_VARIANT_ATTRIBUTES_LIGHT, + SHADER_VARIANT_ATTRIBUTES_POINTS_LIGHT }, + }; + + RID shader_variant = shader.canvas_shader.version_get_shader(shader.default_version, shader_variants[i][j]); + shader.pipeline_variants.variants[i][j].setup(shader_variant, primitive[j], RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state, 0); + } + } + } + + { + //shader compiler + ShaderCompilerRD::DefaultIdentifierActions actions; + + actions.renames["VERTEX"] = "vertex"; + actions.renames["LIGHT_VERTEX"] = "light_vertex"; + actions.renames["SHADOW_VERTEX"] = "shadow_vertex"; + actions.renames["UV"] = "uv"; + actions.renames["POINT_SIZE"] = "gl_PointSize"; + + actions.renames["WORLD_MATRIX"] = "world_matrix"; + actions.renames["CANVAS_MATRIX"] = "canvas_data.canvas_transform"; + actions.renames["SCREEN_MATRIX"] = "canvas_data.screen_transform"; + actions.renames["TIME"] = "canvas_data.time"; + actions.renames["AT_LIGHT_PASS"] = "false"; + actions.renames["INSTANCE_CUSTOM"] = "instance_custom"; + + actions.renames["COLOR"] = "color"; + actions.renames["NORMAL"] = "normal"; + actions.renames["NORMAL_MAP"] = "normal_map"; + actions.renames["NORMAL_MAP_DEPTH"] = "normal_map_depth"; + actions.renames["TEXTURE"] = "color_texture"; + actions.renames["TEXTURE_PIXEL_SIZE"] = "draw_data.color_texture_pixel_size"; + actions.renames["NORMAL_TEXTURE"] = "normal_texture"; + actions.renames["SPECULAR_SHININESS_TEXTURE"] = "specular_texture"; + actions.renames["SPECULAR_SHININESS"] = "specular_shininess"; + actions.renames["SCREEN_UV"] = "screen_uv"; + actions.renames["SCREEN_TEXTURE"] = "screen_texture"; + actions.renames["SCREEN_PIXEL_SIZE"] = "canvas_data.screen_pixel_size"; + actions.renames["FRAGCOORD"] = "gl_FragCoord"; + actions.renames["POINT_COORD"] = "gl_PointCoord"; + + actions.renames["LIGHT_POSITION"] = "light_position"; + actions.renames["LIGHT_COLOR"] = "light_color"; + actions.renames["LIGHT_ENERGY"] = "light_energy"; + actions.renames["LIGHT"] = "light"; + actions.renames["SHADOW_MODULATE"] = "shadow_modulate"; + + actions.renames["texture_sdf"] = "texture_sdf"; + actions.renames["texture_sdf_normal"] = "texture_sdf_normal"; + actions.renames["sdf_to_screen_uv"] = "sdf_to_screen_uv"; + actions.renames["screen_uv_to_sdf"] = "screen_uv_to_sdf"; + + actions.usage_defines["COLOR"] = "#define COLOR_USED\n"; + actions.usage_defines["SCREEN_TEXTURE"] = "#define SCREEN_TEXTURE_USED\n"; + actions.usage_defines["SCREEN_UV"] = "#define SCREEN_UV_USED\n"; + actions.usage_defines["SCREEN_PIXEL_SIZE"] = "@SCREEN_UV"; + actions.usage_defines["NORMAL"] = "#define NORMAL_USED\n"; + actions.usage_defines["NORMAL_MAP"] = "#define NORMAL_MAP_USED\n"; + actions.usage_defines["LIGHT"] = "#define LIGHT_SHADER_CODE_USED\n"; + + actions.render_mode_defines["skip_vertex_transform"] = "#define SKIP_TRANSFORM_USED\n"; + actions.render_mode_defines["unshaded"] = "#define MODE_UNSHADED\n"; + actions.render_mode_defines["light_only"] = "#define MODE_LIGHT_ONLY\n"; + + actions.custom_samplers["TEXTURE"] = "texture_sampler"; + actions.custom_samplers["NORMAL_TEXTURE"] = "texture_sampler"; + actions.custom_samplers["SPECULAR_SHININESS_TEXTURE"] = "texture_sampler"; + actions.custom_samplers["SCREEN_TEXTURE"] = "material_samplers[3]"; //mipmap and filter for screen texture + actions.sampler_array_name = "material_samplers"; + actions.base_texture_binding_index = 1; + actions.texture_layout_set = MATERIAL_UNIFORM_SET; + actions.base_uniform_string = "material."; + actions.default_filter = ShaderLanguage::FILTER_LINEAR; + actions.default_repeat = ShaderLanguage::REPEAT_DISABLE; + actions.base_varying_index = 4; + + actions.global_buffer_array_variable = "global_variables.data"; + + shader.compiler.initialize(actions); + } + + { //shadow rendering + Vector<String> versions; + versions.push_back("\n#define MODE_SHADOW\n"); //shadow + versions.push_back("\n#define MODE_SDF\n"); //sdf + shadow_render.shader.initialize(versions); + + { + Vector<RD::AttachmentFormat> attachments; + + RD::AttachmentFormat af_color; + af_color.format = RD::DATA_FORMAT_R32_SFLOAT; + af_color.usage_flags = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + + attachments.push_back(af_color); + + RD::AttachmentFormat af_depth; + af_depth.format = RD::DATA_FORMAT_D32_SFLOAT; + af_depth.usage_flags = RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + + attachments.push_back(af_depth); + + shadow_render.framebuffer_format = RD::get_singleton()->framebuffer_format_create(attachments); + } + + { + Vector<RD::AttachmentFormat> attachments; + + RD::AttachmentFormat af_color; + af_color.format = RD::DATA_FORMAT_R8_UNORM; + af_color.usage_flags = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + + attachments.push_back(af_color); + + shadow_render.sdf_framebuffer_format = RD::get_singleton()->framebuffer_format_create(attachments); + } + + //pipelines + Vector<RD::VertexAttribute> vf; + RD::VertexAttribute vd; + vd.format = sizeof(real_t) == sizeof(float) ? RD::DATA_FORMAT_R32G32B32_SFLOAT : RD::DATA_FORMAT_R64G64B64_SFLOAT; + vd.location = 0; + vd.offset = 0; + vd.stride = sizeof(real_t) * 3; + vf.push_back(vd); + shadow_render.vertex_format = RD::get_singleton()->vertex_format_create(vf); + + vd.format = sizeof(real_t) == sizeof(float) ? RD::DATA_FORMAT_R32G32_SFLOAT : RD::DATA_FORMAT_R64G64_SFLOAT; + vd.stride = sizeof(real_t) * 2; + + vf.write[0] = vd; + shadow_render.sdf_vertex_format = RD::get_singleton()->vertex_format_create(vf); + + shadow_render.shader_version = shadow_render.shader.version_create(); + + for (int i = 0; i < 3; i++) { + RD::PipelineRasterizationState rs; + rs.cull_mode = i == 0 ? RD::POLYGON_CULL_DISABLED : (i == 1 ? RD::POLYGON_CULL_FRONT : RD::POLYGON_CULL_BACK); + RD::PipelineDepthStencilState ds; + ds.enable_depth_write = true; + ds.enable_depth_test = true; + ds.depth_compare_operator = RD::COMPARE_OP_LESS; + shadow_render.render_pipelines[i] = RD::get_singleton()->render_pipeline_create(shadow_render.shader.version_get_shader(shadow_render.shader_version, SHADOW_RENDER_MODE_SHADOW), shadow_render.framebuffer_format, shadow_render.vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, rs, RD::PipelineMultisampleState(), ds, RD::PipelineColorBlendState::create_disabled(), 0); + } + + for (int i = 0; i < 2; i++) { + shadow_render.sdf_render_pipelines[i] = RD::get_singleton()->render_pipeline_create(shadow_render.shader.version_get_shader(shadow_render.shader_version, SHADOW_RENDER_MODE_SDF), shadow_render.sdf_framebuffer_format, shadow_render.sdf_vertex_format, i == 0 ? RD::RENDER_PRIMITIVE_TRIANGLES : RD::RENDER_PRIMITIVE_LINES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), RD::PipelineColorBlendState::create_disabled(), 0); + } + } + + { //bindings + + state.canvas_state_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(State::Buffer)); + state.lights_uniform_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(LightUniform) * state.max_lights_per_render); + + RD::SamplerState shadow_sampler_state; + shadow_sampler_state.mag_filter = RD::SAMPLER_FILTER_LINEAR; + shadow_sampler_state.min_filter = RD::SAMPLER_FILTER_LINEAR; + shadow_sampler_state.repeat_u = RD::SAMPLER_REPEAT_MODE_REPEAT; //shadow wrap around + shadow_sampler_state.compare_op = RD::COMPARE_OP_GREATER; + shadow_sampler_state.enable_compare = true; + state.shadow_sampler = RD::get_singleton()->sampler_create(shadow_sampler_state); + } + + { + //polygon buffers + polygon_buffers.last_id = 1; + } + + { // default index buffer + + Vector<uint8_t> pv; + pv.resize(6 * 4); + { + uint8_t *w = pv.ptrw(); + int *p32 = (int *)w; + p32[0] = 0; + p32[1] = 1; + p32[2] = 2; + p32[3] = 0; + p32[4] = 2; + p32[5] = 3; + } + shader.quad_index_buffer = RD::get_singleton()->index_buffer_create(6, RenderingDevice::INDEX_BUFFER_FORMAT_UINT32, pv); + shader.quad_index_array = RD::get_singleton()->index_array_create(shader.quad_index_buffer, 0, 6); + } + + { //primitive + primitive_arrays.index_array[0] = shader.quad_index_array = RD::get_singleton()->index_array_create(shader.quad_index_buffer, 0, 1); + primitive_arrays.index_array[1] = shader.quad_index_array = RD::get_singleton()->index_array_create(shader.quad_index_buffer, 0, 2); + primitive_arrays.index_array[2] = shader.quad_index_array = RD::get_singleton()->index_array_create(shader.quad_index_buffer, 0, 3); + primitive_arrays.index_array[3] = shader.quad_index_array = RD::get_singleton()->index_array_create(shader.quad_index_buffer, 0, 6); + } + + { //default skeleton buffer + + shader.default_skeleton_uniform_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(SkeletonUniform)); + SkeletonUniform su; + _update_transform_2d_to_mat4(Transform2D(), su.skeleton_inverse); + _update_transform_2d_to_mat4(Transform2D(), su.skeleton_transform); + RD::get_singleton()->buffer_update(shader.default_skeleton_uniform_buffer, 0, sizeof(SkeletonUniform), &su); + + shader.default_skeleton_texture_buffer = RD::get_singleton()->texture_buffer_create(32, RD::DATA_FORMAT_R32G32B32A32_SFLOAT); + } + { + //default shadow texture to keep uniform set happy + RD::TextureFormat tf; + tf.texture_type = RD::TEXTURE_TYPE_2D; + tf.width = 4; + tf.height = 4; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT; + tf.format = RD::DATA_FORMAT_R32_SFLOAT; + + state.shadow_texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); + } + + { + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 0; + u.ids.push_back(storage->get_default_rd_storage_buffer()); + uniforms.push_back(u); + } + + state.default_transforms_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shader.default_version_rd_shader, TRANSFORMS_UNIFORM_SET); + } + + default_canvas_texture = storage->canvas_texture_allocate(); + storage->canvas_texture_initialize(default_canvas_texture); + + state.shadow_texture_size = GLOBAL_GET("rendering/2d/shadow_atlas/size"); + + //create functions for shader and material + storage->shader_set_data_request_function(RendererStorageRD::SHADER_TYPE_2D, _create_shader_funcs); + storage->material_set_data_request_function(RendererStorageRD::SHADER_TYPE_2D, _create_material_funcs); + + state.time = 0; + + { + default_canvas_group_shader = storage->shader_allocate(); + storage->shader_initialize(default_canvas_group_shader); + + storage->shader_set_code(default_canvas_group_shader, "shader_type canvas_item; \nvoid fragment() {\n\tvec4 c = textureLod(SCREEN_TEXTURE,SCREEN_UV,0.0); if (c.a > 0.0001) c.rgb/=c.a; COLOR *= c; \n}\n"); + + default_canvas_group_material = storage->material_allocate(); + storage->material_initialize(default_canvas_group_material); + + storage->material_set_shader(default_canvas_group_material, default_canvas_group_shader); + } + + static_assert(sizeof(PushConstant) == 128); +} + +bool RendererCanvasRenderRD::free(RID p_rid) { + if (canvas_light_owner.owns(p_rid)) { + CanvasLight *cl = canvas_light_owner.getornull(p_rid); + ERR_FAIL_COND_V(!cl, false); + light_set_use_shadow(p_rid, false); + canvas_light_owner.free(p_rid); + } else if (occluder_polygon_owner.owns(p_rid)) { + occluder_polygon_set_shape(p_rid, Vector<Vector2>(), false); + occluder_polygon_owner.free(p_rid); + } else { + return false; + } + + return true; +} + +void RendererCanvasRenderRD::set_shadow_texture_size(int p_size) { + p_size = nearest_power_of_2_templated(p_size); + if (p_size == state.shadow_texture_size) { + return; + } + state.shadow_texture_size = p_size; + if (state.shadow_fb.is_valid()) { + RD::get_singleton()->free(state.shadow_texture); + RD::get_singleton()->free(state.shadow_depth_texture); + state.shadow_fb = RID(); + + { + //create a default shadow texture to keep uniform set happy (and that it gets erased when a new one is created) + RD::TextureFormat tf; + tf.texture_type = RD::TEXTURE_TYPE_2D; + tf.width = 4; + tf.height = 4; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT; + tf.format = RD::DATA_FORMAT_R32_SFLOAT; + + state.shadow_texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); + } + } +} + +RendererCanvasRenderRD::~RendererCanvasRenderRD() { + //canvas state + + storage->free(default_canvas_group_material); + storage->free(default_canvas_group_shader); + + { + if (state.canvas_state_buffer.is_valid()) { + RD::get_singleton()->free(state.canvas_state_buffer); + } + + memdelete_arr(state.light_uniforms); + RD::get_singleton()->free(state.lights_uniform_buffer); + RD::get_singleton()->free(shader.default_skeleton_uniform_buffer); + RD::get_singleton()->free(shader.default_skeleton_texture_buffer); + } + + //shadow rendering + { + shadow_render.shader.version_free(shadow_render.shader_version); + //this will also automatically clear all pipelines + RD::get_singleton()->free(state.shadow_sampler); + } + //bindings + + //shaders + + shader.canvas_shader.version_free(shader.default_version); + + //buffers + { + RD::get_singleton()->free(shader.quad_index_array); + RD::get_singleton()->free(shader.quad_index_buffer); + //primitives are erase by dependency + } + + if (state.shadow_fb.is_valid()) { + RD::get_singleton()->free(state.shadow_depth_texture); + } + RD::get_singleton()->free(state.shadow_texture); + + storage->free(default_canvas_texture); + //pipelines don't need freeing, they are all gone after shaders are gone +} diff --git a/servers/rendering/renderer_rd/renderer_canvas_render_rd.h b/servers/rendering/renderer_rd/renderer_canvas_render_rd.h new file mode 100644 index 0000000000..cb947d7180 --- /dev/null +++ b/servers/rendering/renderer_rd/renderer_canvas_render_rd.h @@ -0,0 +1,472 @@ +/*************************************************************************/ +/* renderer_canvas_render_rd.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef RENDERING_SERVER_CANVAS_RENDER_RD_H +#define RENDERING_SERVER_CANVAS_RENDER_RD_H + +#include "servers/rendering/renderer_canvas_render.h" +#include "servers/rendering/renderer_compositor.h" +#include "servers/rendering/renderer_rd/pipeline_cache_rd.h" +#include "servers/rendering/renderer_rd/renderer_storage_rd.h" +#include "servers/rendering/renderer_rd/shader_compiler_rd.h" +#include "servers/rendering/renderer_rd/shaders/canvas.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/canvas_occlusion.glsl.gen.h" +#include "servers/rendering/rendering_device.h" + +class RendererCanvasRenderRD : public RendererCanvasRender { + RendererStorageRD *storage; + + enum { + BASE_UNIFORM_SET = 0, + MATERIAL_UNIFORM_SET = 1, + TRANSFORMS_UNIFORM_SET = 2, + CANVAS_TEXTURE_UNIFORM_SET = 3, + }; + + enum ShaderVariant { + SHADER_VARIANT_QUAD, + SHADER_VARIANT_NINEPATCH, + SHADER_VARIANT_PRIMITIVE, + SHADER_VARIANT_PRIMITIVE_POINTS, + SHADER_VARIANT_ATTRIBUTES, + SHADER_VARIANT_ATTRIBUTES_POINTS, + SHADER_VARIANT_QUAD_LIGHT, + SHADER_VARIANT_NINEPATCH_LIGHT, + SHADER_VARIANT_PRIMITIVE_LIGHT, + SHADER_VARIANT_PRIMITIVE_POINTS_LIGHT, + SHADER_VARIANT_ATTRIBUTES_LIGHT, + SHADER_VARIANT_ATTRIBUTES_POINTS_LIGHT, + SHADER_VARIANT_MAX + }; + + enum { + FLAGS_INSTANCING_STRIDE_MASK = 0xF, + FLAGS_INSTANCING_ENABLED = (1 << 4), + FLAGS_INSTANCING_HAS_COLORS = (1 << 5), + FLAGS_INSTANCING_COLOR_8BIT = (1 << 6), + FLAGS_INSTANCING_HAS_CUSTOM_DATA = (1 << 7), + FLAGS_INSTANCING_CUSTOM_DATA_8_BIT = (1 << 8), + + FLAGS_CLIP_RECT_UV = (1 << 9), + FLAGS_TRANSPOSE_RECT = (1 << 10), + + FLAGS_NINEPACH_DRAW_CENTER = (1 << 12), + FLAGS_USING_PARTICLES = (1 << 13), + + FLAGS_USE_SKELETON = (1 << 15), + FLAGS_NINEPATCH_H_MODE_SHIFT = 16, + FLAGS_NINEPATCH_V_MODE_SHIFT = 18, + FLAGS_LIGHT_COUNT_SHIFT = 20, + + FLAGS_DEFAULT_NORMAL_MAP_USED = (1 << 26), + FLAGS_DEFAULT_SPECULAR_MAP_USED = (1 << 27) + + }; + + enum { + LIGHT_FLAGS_TEXTURE_MASK = 0xFFFF, + LIGHT_FLAGS_BLEND_SHIFT = 16, + LIGHT_FLAGS_BLEND_MASK = (3 << 16), + LIGHT_FLAGS_BLEND_MODE_ADD = (0 << 16), + LIGHT_FLAGS_BLEND_MODE_SUB = (1 << 16), + LIGHT_FLAGS_BLEND_MODE_MIX = (2 << 16), + LIGHT_FLAGS_BLEND_MODE_MASK = (3 << 16), + LIGHT_FLAGS_HAS_SHADOW = (1 << 20), + LIGHT_FLAGS_FILTER_SHIFT = 22 + + }; + + enum { + MAX_RENDER_ITEMS = 256 * 1024, + MAX_LIGHT_TEXTURES = 1024, + MAX_LIGHTS_PER_ITEM = 16, + DEFAULT_MAX_LIGHTS_PER_RENDER = 256 + }; + + /****************/ + /**** SHADER ****/ + /****************/ + + enum PipelineVariant { + PIPELINE_VARIANT_QUAD, + PIPELINE_VARIANT_NINEPATCH, + PIPELINE_VARIANT_PRIMITIVE_TRIANGLES, + PIPELINE_VARIANT_PRIMITIVE_LINES, + PIPELINE_VARIANT_PRIMITIVE_POINTS, + PIPELINE_VARIANT_ATTRIBUTE_TRIANGLES, + PIPELINE_VARIANT_ATTRIBUTE_TRIANGLE_STRIP, + PIPELINE_VARIANT_ATTRIBUTE_LINES, + PIPELINE_VARIANT_ATTRIBUTE_LINES_STRIP, + PIPELINE_VARIANT_ATTRIBUTE_POINTS, + PIPELINE_VARIANT_MAX + }; + enum PipelineLightMode { + PIPELINE_LIGHT_MODE_DISABLED, + PIPELINE_LIGHT_MODE_ENABLED, + PIPELINE_LIGHT_MODE_MAX + }; + + struct PipelineVariants { + PipelineCacheRD variants[PIPELINE_LIGHT_MODE_MAX][PIPELINE_VARIANT_MAX]; + }; + + struct { + CanvasShaderRD canvas_shader; + RID default_version; + RID default_version_rd_shader; + RID quad_index_buffer; + RID quad_index_array; + PipelineVariants pipeline_variants; + + // default_skeleton uniform set + RID default_skeleton_uniform_buffer; + RID default_skeleton_texture_buffer; + + ShaderCompilerRD compiler; + } shader; + + struct ShaderData : public RendererStorageRD::ShaderData { + enum BlendMode { //used internally + BLEND_MODE_MIX, + BLEND_MODE_ADD, + BLEND_MODE_SUB, + BLEND_MODE_MUL, + BLEND_MODE_PMALPHA, + BLEND_MODE_DISABLED, + }; + + bool valid; + RID version; + PipelineVariants pipeline_variants; + String path; + + Map<StringName, ShaderLanguage::ShaderNode::Uniform> uniforms; + Vector<ShaderCompilerRD::GeneratedCode::Texture> texture_uniforms; + + Vector<uint32_t> ubo_offsets; + uint32_t ubo_size; + + String code; + Map<StringName, RID> default_texture_params; + + bool uses_screen_texture = false; + bool uses_sdf = false; + + virtual void set_code(const String &p_Code); + virtual void set_default_texture_param(const StringName &p_name, RID p_texture); + virtual void get_param_list(List<PropertyInfo> *p_param_list) const; + virtual void get_instance_param_list(List<RendererStorage::InstanceShaderParam> *p_param_list) const; + + virtual bool is_param_texture(const StringName &p_param) const; + virtual bool is_animated() const; + virtual bool casts_shadows() const; + virtual Variant get_default_parameter(const StringName &p_parameter) const; + virtual RS::ShaderNativeSourceCode get_native_source_code() const; + + ShaderData(); + virtual ~ShaderData(); + }; + + RendererStorageRD::ShaderData *_create_shader_func(); + static RendererStorageRD::ShaderData *_create_shader_funcs() { + return static_cast<RendererCanvasRenderRD *>(singleton)->_create_shader_func(); + } + + struct MaterialData : public RendererStorageRD::MaterialData { + uint64_t last_frame; + ShaderData *shader_data; + RID uniform_buffer; + RID uniform_set; + Vector<RID> texture_cache; + Vector<uint8_t> ubo_data; + + virtual void set_render_priority(int p_priority) {} + virtual void set_next_pass(RID p_pass) {} + virtual void update_parameters(const Map<StringName, Variant> &p_parameters, bool p_uniform_dirty, bool p_textures_dirty); + virtual ~MaterialData(); + }; + + RendererStorageRD::MaterialData *_create_material_func(ShaderData *p_shader); + static RendererStorageRD::MaterialData *_create_material_funcs(RendererStorageRD::ShaderData *p_shader) { + return static_cast<RendererCanvasRenderRD *>(singleton)->_create_material_func(static_cast<ShaderData *>(p_shader)); + } + + /**************************/ + /**** CANVAS TEXTURES *****/ + /**************************/ + + struct { + RS::CanvasItemTextureFilter default_filter; + RS::CanvasItemTextureRepeat default_repeat; + } default_samplers; + + /******************/ + /**** POLYGONS ****/ + /******************/ + + struct PolygonBuffers { + RD::VertexFormatID vertex_format_id; + RID vertex_buffer; + RID vertex_array; + RID index_buffer; + RID indices; + }; + + struct { + HashMap<PolygonID, PolygonBuffers> polygons; + PolygonID last_id; + } polygon_buffers; + + /********************/ + /**** PRIMITIVES ****/ + /********************/ + + struct { + RID index_array[4]; + } primitive_arrays; + + /*******************/ + /**** MATERIALS ****/ + /*******************/ + + /******************/ + /**** LIGHTING ****/ + /******************/ + + struct CanvasLight { + RID texture; + struct { + bool enabled = false; + float z_far; + float y_offset; + Transform2D directional_xform; + } shadow; + }; + + RID_Owner<CanvasLight> canvas_light_owner; + + struct ShadowRenderPushConstant { + float projection[16]; + float modelview[8]; + float direction[2]; + float z_far; + float pad; + }; + + struct OccluderPolygon { + RS::CanvasOccluderPolygonCullMode cull_mode; + int line_point_count; + RID vertex_buffer; + RID vertex_array; + RID index_buffer; + RID index_array; + + int sdf_point_count; + int sdf_index_count; + RID sdf_vertex_buffer; + RID sdf_vertex_array; + RID sdf_index_buffer; + RID sdf_index_array; + bool sdf_is_lines; + }; + + struct LightUniform { + float matrix[8]; //light to texture coordinate matrix + float shadow_matrix[8]; //light to shadow coordinate matrix + float color[4]; + + uint8_t shadow_color[4]; + uint32_t flags; //index to light texture + float shadow_pixel_size; + float height; + + float position[2]; + float shadow_z_far_inv; + float shadow_y_ofs; + + float atlas_rect[4]; + }; + + RID_Owner<OccluderPolygon> occluder_polygon_owner; + + enum ShadowRenderMode { + SHADOW_RENDER_MODE_SHADOW, + SHADOW_RENDER_MODE_SDF, + }; + + enum { + SHADOW_RENDER_SDF_TRIANGLES, + SHADOW_RENDER_SDF_LINES, + }; + + struct { + CanvasOcclusionShaderRD shader; + RID shader_version; + RID render_pipelines[3]; + RID sdf_render_pipelines[2]; + RD::VertexFormatID vertex_format; + RD::VertexFormatID sdf_vertex_format; + RD::FramebufferFormatID framebuffer_format; + RD::FramebufferFormatID sdf_framebuffer_format; + } shadow_render; + + /***************/ + /**** STATE ****/ + /***************/ + + //state that does not vary across rendering all items + + struct State { + //state buffer + struct Buffer { + float canvas_transform[16]; + float screen_transform[16]; + float canvas_normal_transform[16]; + float canvas_modulate[4]; + + float screen_pixel_size[2]; + float time; + uint32_t use_pixel_snap; + + float sdf_to_tex[4]; + float sdf_to_screen[2]; + float screen_to_sdf[2]; + + uint32_t directional_light_count; + float tex_to_sdf; + uint32_t pad1; + uint32_t pad2; + }; + + LightUniform *light_uniforms; + + RID lights_uniform_buffer; + RID canvas_state_buffer; + RID shadow_sampler; + RID shadow_texture; + RID shadow_depth_texture; + RID shadow_fb; + int shadow_texture_size = 2048; + + RID default_transforms_uniform_set; + + uint32_t max_lights_per_render; + uint32_t max_lights_per_item; + + double time; + + } state; + + struct PushConstant { + float world[6]; + uint32_t flags; + uint32_t specular_shininess; + union { + //rect + struct { + float modulation[4]; + float ninepatch_margins[4]; + float dst_rect[4]; + float src_rect[4]; + float pad[2]; + }; + //primitive + struct { + float points[6]; // vec2 points[3] + float uvs[6]; // vec2 points[3] + uint32_t colors[6]; // colors encoded as half + }; + }; + float color_texture_pixel_size[2]; + uint32_t lights[4]; + }; + + struct SkeletonUniform { + float skeleton_transform[16]; + float skeleton_inverse[16]; + }; + + Item *items[MAX_RENDER_ITEMS]; + + bool using_directional_lights = false; + RID default_canvas_texture; + + RID default_canvas_group_shader; + RID default_canvas_group_material; + + RS::CanvasItemTextureFilter default_filter = RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR; + RS::CanvasItemTextureRepeat default_repeat = RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED; + + RID _create_base_uniform_set(RID p_to_render_target, bool p_backbuffer); + + inline void _bind_canvas_texture(RD::DrawListID p_draw_list, RID p_texture, RS::CanvasItemTextureFilter p_base_filter, RS::CanvasItemTextureRepeat p_base_repeat, RID &r_last_texture, PushConstant &push_constant, Size2 &r_texpixel_size); //recursive, so regular inline used instead. + void _render_item(RenderingDevice::DrawListID p_draw_list, const Item *p_item, RenderingDevice::FramebufferFormatID p_framebuffer_format, const Transform2D &p_canvas_transform_inverse, Item *¤t_clip, Light *p_lights, PipelineVariants *p_pipeline_variants); + void _render_items(RID p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, bool p_to_backbuffer = false); + + _FORCE_INLINE_ void _update_transform_2d_to_mat2x4(const Transform2D &p_transform, float *p_mat2x4); + _FORCE_INLINE_ void _update_transform_2d_to_mat2x3(const Transform2D &p_transform, float *p_mat2x3); + + _FORCE_INLINE_ void _update_transform_2d_to_mat4(const Transform2D &p_transform, float *p_mat4); + _FORCE_INLINE_ void _update_transform_to_mat4(const Transform &p_transform, float *p_mat4); + + void _update_shadow_atlas(); + +public: + PolygonID request_polygon(const Vector<int> &p_indices, const Vector<Point2> &p_points, const Vector<Color> &p_colors, const Vector<Point2> &p_uvs = Vector<Point2>(), const Vector<int> &p_bones = Vector<int>(), const Vector<float> &p_weights = Vector<float>()); + void free_polygon(PolygonID p_polygon); + + RID light_create(); + void light_set_texture(RID p_rid, RID p_texture); + void light_set_use_shadow(RID p_rid, bool p_enable); + void light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders); + void light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders); + + virtual void render_sdf(RID p_render_target, LightOccluderInstance *p_occluders); + + RID occluder_polygon_create(); + void occluder_polygon_set_shape(RID p_occluder, const Vector<Vector2> &p_points, bool p_closed); + void occluder_polygon_set_cull_mode(RID p_occluder, RS::CanvasOccluderPolygonCullMode p_mode); + + void canvas_render_items(RID p_to_render_target, Item *p_item_list, const Color &p_modulate, Light *p_light_list, Light *p_directional_light_list, const Transform2D &p_canvas_transform, RS::CanvasItemTextureFilter p_default_filter, RS::CanvasItemTextureRepeat p_default_repeat, bool p_snap_2d_vertices_to_pixel, bool &r_sdf_used); + + void canvas_debug_viewport_shadows(Light *p_lights_with_shadow) {} + + void draw_window_margins(int *p_margins, RID *p_margin_textures) {} + + virtual void set_shadow_texture_size(int p_size); + + void set_time(double p_time); + void update(); + bool free(RID p_rid); + RendererCanvasRenderRD(RendererStorageRD *p_storage); + ~RendererCanvasRenderRD(); +}; + +#endif // RASTERIZER_CANVAS_RD_H diff --git a/servers/rendering/renderer_rd/renderer_compositor_rd.cpp b/servers/rendering/renderer_rd/renderer_compositor_rd.cpp new file mode 100644 index 0000000000..be2552bd32 --- /dev/null +++ b/servers/rendering/renderer_rd/renderer_compositor_rd.cpp @@ -0,0 +1,179 @@ +/*************************************************************************/ +/* renderer_compositor_rd.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "renderer_compositor_rd.h" + +#include "core/config/project_settings.h" + +void RendererCompositorRD::prepare_for_blitting_render_targets() { + RD::get_singleton()->prepare_screen_for_drawing(); +} + +void RendererCompositorRD::blit_render_targets_to_screen(DisplayServer::WindowID p_screen, const BlitToScreen *p_render_targets, int p_amount) { + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin_for_screen(p_screen); + + for (int i = 0; i < p_amount; i++) { + RID texture = storage->render_target_get_texture(p_render_targets[i].render_target); + ERR_CONTINUE(texture.is_null()); + RID rd_texture = storage->texture_get_rd_texture(texture); + ERR_CONTINUE(rd_texture.is_null()); + if (!render_target_descriptors.has(rd_texture) || !RD::get_singleton()->uniform_set_is_valid(render_target_descriptors[rd_texture])) { + Vector<RD::Uniform> uniforms; + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 0; + u.ids.push_back(copy_viewports_sampler); + u.ids.push_back(rd_texture); + uniforms.push_back(u); + RID uniform_set = RD::get_singleton()->uniform_set_create(uniforms, copy_viewports_rd_shader, 0); + + render_target_descriptors[rd_texture] = uniform_set; + } + + Size2 screen_size(RD::get_singleton()->screen_get_width(p_screen), RD::get_singleton()->screen_get_height(p_screen)); + + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, copy_viewports_rd_pipeline); + RD::get_singleton()->draw_list_bind_index_array(draw_list, copy_viewports_rd_array); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, render_target_descriptors[rd_texture], 0); + + float push_constant[4] = { + p_render_targets[i].rect.position.x / screen_size.width, + p_render_targets[i].rect.position.y / screen_size.height, + p_render_targets[i].rect.size.width / screen_size.width, + p_render_targets[i].rect.size.height / screen_size.height, + }; + RD::get_singleton()->draw_list_set_push_constant(draw_list, push_constant, 4 * sizeof(float)); + RD::get_singleton()->draw_list_draw(draw_list, true); + } + + RD::get_singleton()->draw_list_end(); +} + +void RendererCompositorRD::begin_frame(double frame_step) { + frame++; + delta = frame_step; + time += frame_step; + + double time_roll_over = GLOBAL_GET("rendering/limits/time/time_rollover_secs"); + time = Math::fmod(time, time_roll_over); + + canvas->set_time(time); + scene->set_time(time, frame_step); +} + +void RendererCompositorRD::end_frame(bool p_swap_buffers) { +#ifndef _MSC_VER +#warning TODO: likely pass a bool to swap buffers to avoid display? +#endif + RD::get_singleton()->swap_buffers(); //probably should pass some bool to avoid display? +} + +void RendererCompositorRD::initialize() { + { //create framebuffer copy shader + RenderingDevice::ShaderStageData vert; + vert.shader_stage = RenderingDevice::SHADER_STAGE_VERTEX; + vert.spir_v = RenderingDevice::get_singleton()->shader_compile_from_source(RenderingDevice::SHADER_STAGE_VERTEX, + "#version 450\n" + "layout(push_constant, binding = 0, std140) uniform Pos { vec4 dst_rect; } pos;\n" + "layout(location =0) out vec2 uv;\n" + "void main() { \n" + " vec2 base_arr[4] = vec2[](vec2(0.0,0.0),vec2(0.0,1.0),vec2(1.0,1.0),vec2(1.0,0.0));\n" + " uv = base_arr[gl_VertexIndex];\n" + " vec2 vtx = pos.dst_rect.xy+uv*pos.dst_rect.zw;\n" + " gl_Position = vec4(vtx * 2.0 - 1.0,0.0,1.0);\n" + "}\n"); + + RenderingDevice::ShaderStageData frag; + frag.shader_stage = RenderingDevice::SHADER_STAGE_FRAGMENT; + frag.spir_v = RenderingDevice::get_singleton()->shader_compile_from_source(RenderingDevice::SHADER_STAGE_FRAGMENT, + "#version 450\n" + "layout (location = 0) in vec2 uv;\n" + "layout (location = 0) out vec4 color;\n" + "layout (binding = 0) uniform sampler2D src_rt;\n" + "void main() { color=texture(src_rt,uv); }\n"); + + Vector<RenderingDevice::ShaderStageData> source; + source.push_back(vert); + source.push_back(frag); + String error; + copy_viewports_rd_shader = RD::get_singleton()->shader_create(source); + if (!copy_viewports_rd_shader.is_valid()) { + print_line("Failed compilation: " + error); + } + } + + { //create index array for copy shader + Vector<uint8_t> pv; + pv.resize(6 * 4); + { + uint8_t *w = pv.ptrw(); + int *p32 = (int *)w; + p32[0] = 0; + p32[1] = 1; + p32[2] = 2; + p32[3] = 0; + p32[4] = 2; + p32[5] = 3; + } + copy_viewports_rd_index_buffer = RD::get_singleton()->index_buffer_create(6, RenderingDevice::INDEX_BUFFER_FORMAT_UINT32, pv); + copy_viewports_rd_array = RD::get_singleton()->index_array_create(copy_viewports_rd_index_buffer, 0, 6); + } + + { //pipeline + copy_viewports_rd_pipeline = RD::get_singleton()->render_pipeline_create(copy_viewports_rd_shader, RD::get_singleton()->screen_get_framebuffer_format(), RD::INVALID_ID, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), RenderingDevice::PipelineColorBlendState::create_disabled(), 0); + } + { // sampler + copy_viewports_sampler = RD::get_singleton()->sampler_create(RD::SamplerState()); + } +} + +uint64_t RendererCompositorRD::frame = 1; + +void RendererCompositorRD::finalize() { + memdelete(scene); + memdelete(canvas); + memdelete(storage); + + //only need to erase these, the rest are erased by cascade + RD::get_singleton()->free(copy_viewports_rd_index_buffer); + RD::get_singleton()->free(copy_viewports_rd_shader); + RD::get_singleton()->free(copy_viewports_sampler); +} + +RendererCompositorRD *RendererCompositorRD::singleton = nullptr; + +RendererCompositorRD::RendererCompositorRD() { + singleton = this; + time = 0; + + storage = memnew(RendererStorageRD); + canvas = memnew(RendererCanvasRenderRD(storage)); + scene = memnew(RendererSceneRenderForward(storage)); +} diff --git a/servers/rendering/renderer_rd/renderer_compositor_rd.h b/servers/rendering/renderer_rd/renderer_compositor_rd.h new file mode 100644 index 0000000000..cb85fc79e0 --- /dev/null +++ b/servers/rendering/renderer_rd/renderer_compositor_rd.h @@ -0,0 +1,97 @@ +/*************************************************************************/ +/* renderer_compositor_rd.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef RENDERING_SERVER_COMPOSITOR_RD_H +#define RENDERING_SERVER_COMPOSITOR_RD_H + +#include "core/os/os.h" +#include "core/templates/thread_work_pool.h" +#include "servers/rendering/renderer_compositor.h" +#include "servers/rendering/renderer_rd/renderer_canvas_render_rd.h" +#include "servers/rendering/renderer_rd/renderer_scene_render_forward.h" +#include "servers/rendering/renderer_rd/renderer_storage_rd.h" + +class RendererCompositorRD : public RendererCompositor { +protected: + RendererCanvasRenderRD *canvas; + RendererStorageRD *storage; + RendererSceneRenderForward *scene; + + RID copy_viewports_rd_shader; + RID copy_viewports_rd_pipeline; + RID copy_viewports_rd_index_buffer; + RID copy_viewports_rd_array; + RID copy_viewports_sampler; + + Map<RID, RID> render_target_descriptors; + + double time; + float delta; + + static uint64_t frame; + +public: + RendererStorage *get_storage() { return storage; } + RendererCanvasRender *get_canvas() { return canvas; } + RendererSceneRender *get_scene() { return scene; } + + void set_boot_image(const Ref<Image> &p_image, const Color &p_color, bool p_scale, bool p_use_filter) {} + + void initialize(); + void begin_frame(double frame_step); + void prepare_for_blitting_render_targets(); + void blit_render_targets_to_screen(DisplayServer::WindowID p_screen, const BlitToScreen *p_render_targets, int p_amount); + + void end_frame(bool p_swap_buffers); + void finalize(); + + _ALWAYS_INLINE_ uint64_t get_frame_number() const { return frame; } + _ALWAYS_INLINE_ float get_frame_delta_time() const { return delta; } + _ALWAYS_INLINE_ double get_total_time() const { return time; } + + static Error is_viable() { + return OK; + } + + static RendererCompositor *_create_current() { + return memnew(RendererCompositorRD); + } + + static void make_current() { + _create_func = _create_current; + } + + virtual bool is_low_end() const { return false; } + + static RendererCompositorRD *singleton; + RendererCompositorRD(); + ~RendererCompositorRD() {} +}; +#endif // RASTERIZER_RD_H diff --git a/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp b/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp new file mode 100644 index 0000000000..a57dee7314 --- /dev/null +++ b/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp @@ -0,0 +1,3673 @@ +/*************************************************************************/ +/* renderer_scene_render_forward.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "renderer_scene_render_forward.h" +#include "core/config/project_settings.h" +#include "servers/rendering/rendering_device.h" +#include "servers/rendering/rendering_server_default.h" + +/* SCENE SHADER */ +void RendererSceneRenderForward::ShaderData::set_code(const String &p_code) { + //compile + + code = p_code; + valid = false; + ubo_size = 0; + uniforms.clear(); + uses_screen_texture = false; + + if (code == String()) { + return; //just invalid, but no error + } + + ShaderCompilerRD::GeneratedCode gen_code; + + int blend_mode = BLEND_MODE_MIX; + int depth_testi = DEPTH_TEST_ENABLED; + int alpha_antialiasing_mode = ALPHA_ANTIALIASING_OFF; + int cull = CULL_BACK; + + uses_point_size = false; + uses_alpha = false; + uses_blend_alpha = false; + uses_depth_pre_pass = false; + uses_discard = false; + uses_roughness = false; + uses_normal = false; + bool wireframe = false; + + unshaded = false; + uses_vertex = false; + uses_sss = false; + uses_transmittance = false; + uses_screen_texture = false; + uses_depth_texture = false; + uses_normal_texture = false; + uses_time = false; + writes_modelview_or_projection = false; + uses_world_coordinates = false; + + int depth_drawi = DEPTH_DRAW_OPAQUE; + + ShaderCompilerRD::IdentifierActions actions; + + actions.render_mode_values["blend_add"] = Pair<int *, int>(&blend_mode, BLEND_MODE_ADD); + actions.render_mode_values["blend_mix"] = Pair<int *, int>(&blend_mode, BLEND_MODE_MIX); + actions.render_mode_values["blend_sub"] = Pair<int *, int>(&blend_mode, BLEND_MODE_SUB); + actions.render_mode_values["blend_mul"] = Pair<int *, int>(&blend_mode, BLEND_MODE_MUL); + + actions.render_mode_values["alpha_to_coverage"] = Pair<int *, int>(&alpha_antialiasing_mode, ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE); + actions.render_mode_values["alpha_to_coverage_and_one"] = Pair<int *, int>(&alpha_antialiasing_mode, ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE_AND_TO_ONE); + + actions.render_mode_values["depth_draw_never"] = Pair<int *, int>(&depth_drawi, DEPTH_DRAW_DISABLED); + actions.render_mode_values["depth_draw_opaque"] = Pair<int *, int>(&depth_drawi, DEPTH_DRAW_OPAQUE); + actions.render_mode_values["depth_draw_always"] = Pair<int *, int>(&depth_drawi, DEPTH_DRAW_ALWAYS); + + actions.render_mode_values["depth_test_disabled"] = Pair<int *, int>(&depth_testi, DEPTH_TEST_DISABLED); + + actions.render_mode_values["cull_disabled"] = Pair<int *, int>(&cull, CULL_DISABLED); + actions.render_mode_values["cull_front"] = Pair<int *, int>(&cull, CULL_FRONT); + actions.render_mode_values["cull_back"] = Pair<int *, int>(&cull, CULL_BACK); + + actions.render_mode_flags["unshaded"] = &unshaded; + actions.render_mode_flags["wireframe"] = &wireframe; + + actions.usage_flag_pointers["ALPHA"] = &uses_alpha; + actions.render_mode_flags["depth_prepass_alpha"] = &uses_depth_pre_pass; + + actions.usage_flag_pointers["SSS_STRENGTH"] = &uses_sss; + actions.usage_flag_pointers["SSS_TRANSMITTANCE_DEPTH"] = &uses_transmittance; + + actions.usage_flag_pointers["SCREEN_TEXTURE"] = &uses_screen_texture; + actions.usage_flag_pointers["DEPTH_TEXTURE"] = &uses_depth_texture; + actions.usage_flag_pointers["NORMAL_TEXTURE"] = &uses_normal_texture; + actions.usage_flag_pointers["DISCARD"] = &uses_discard; + actions.usage_flag_pointers["TIME"] = &uses_time; + actions.usage_flag_pointers["ROUGHNESS"] = &uses_roughness; + actions.usage_flag_pointers["NORMAL"] = &uses_normal; + actions.usage_flag_pointers["NORMAL_MAP"] = &uses_normal; + + actions.usage_flag_pointers["POINT_SIZE"] = &uses_point_size; + actions.usage_flag_pointers["POINT_COORD"] = &uses_point_size; + + actions.write_flag_pointers["MODELVIEW_MATRIX"] = &writes_modelview_or_projection; + actions.write_flag_pointers["PROJECTION_MATRIX"] = &writes_modelview_or_projection; + actions.write_flag_pointers["VERTEX"] = &uses_vertex; + + actions.uniforms = &uniforms; + + RendererSceneRenderForward *scene_singleton = (RendererSceneRenderForward *)RendererSceneRenderForward::singleton; + + Error err = scene_singleton->shader.compiler.compile(RS::SHADER_SPATIAL, code, &actions, path, gen_code); + + ERR_FAIL_COND(err != OK); + + if (version.is_null()) { + version = scene_singleton->shader.scene_shader.version_create(); + } + + depth_draw = DepthDraw(depth_drawi); + depth_test = DepthTest(depth_testi); + +#if 0 + print_line("**compiling shader:"); + print_line("**defines:\n"); + for (int i = 0; i < gen_code.defines.size(); i++) { + print_line(gen_code.defines[i]); + } + print_line("\n**uniforms:\n" + gen_code.uniforms); + print_line("\n**vertex_globals:\n" + gen_code.vertex_global); + print_line("\n**vertex_code:\n" + gen_code.vertex); + print_line("\n**fragment_globals:\n" + gen_code.fragment_global); + print_line("\n**fragment_code:\n" + gen_code.fragment); + print_line("\n**light_code:\n" + gen_code.light); +#endif + scene_singleton->shader.scene_shader.version_set_code(version, gen_code.uniforms, gen_code.vertex_global, gen_code.vertex, gen_code.fragment_global, gen_code.light, gen_code.fragment, gen_code.defines); + ERR_FAIL_COND(!scene_singleton->shader.scene_shader.version_is_valid(version)); + + ubo_size = gen_code.uniform_total_size; + ubo_offsets = gen_code.uniform_offsets; + texture_uniforms = gen_code.texture_uniforms; + + //blend modes + + // if any form of Alpha Antialiasing is enabled, set the blend mode to alpha to coverage + if (alpha_antialiasing_mode != ALPHA_ANTIALIASING_OFF) { + blend_mode = BLEND_MODE_ALPHA_TO_COVERAGE; + } + + RD::PipelineColorBlendState::Attachment blend_attachment; + + switch (blend_mode) { + case BLEND_MODE_MIX: { + blend_attachment.enable_blend = true; + blend_attachment.alpha_blend_op = RD::BLEND_OP_ADD; + blend_attachment.color_blend_op = RD::BLEND_OP_ADD; + blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; + blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + + } break; + case BLEND_MODE_ADD: { + blend_attachment.enable_blend = true; + blend_attachment.alpha_blend_op = RD::BLEND_OP_ADD; + blend_attachment.color_blend_op = RD::BLEND_OP_ADD; + blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; + blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE; + blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; + blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + uses_blend_alpha = true; //force alpha used because of blend + + } break; + case BLEND_MODE_SUB: { + blend_attachment.enable_blend = true; + blend_attachment.alpha_blend_op = RD::BLEND_OP_SUBTRACT; + blend_attachment.color_blend_op = RD::BLEND_OP_SUBTRACT; + blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; + blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE; + blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; + blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + uses_blend_alpha = true; //force alpha used because of blend + + } break; + case BLEND_MODE_MUL: { + blend_attachment.enable_blend = true; + blend_attachment.alpha_blend_op = RD::BLEND_OP_ADD; + blend_attachment.color_blend_op = RD::BLEND_OP_ADD; + blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_DST_COLOR; + blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ZERO; + blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_DST_ALPHA; + blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ZERO; + uses_blend_alpha = true; //force alpha used because of blend + } break; + case BLEND_MODE_ALPHA_TO_COVERAGE: { + blend_attachment.enable_blend = true; + blend_attachment.alpha_blend_op = RD::BLEND_OP_ADD; + blend_attachment.color_blend_op = RD::BLEND_OP_ADD; + blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; + blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ZERO; + } + } + + RD::PipelineColorBlendState blend_state_blend; + blend_state_blend.attachments.push_back(blend_attachment); + RD::PipelineColorBlendState blend_state_opaque = RD::PipelineColorBlendState::create_disabled(1); + RD::PipelineColorBlendState blend_state_opaque_specular = RD::PipelineColorBlendState::create_disabled(2); + RD::PipelineColorBlendState blend_state_depth_normal_roughness = RD::PipelineColorBlendState::create_disabled(1); + RD::PipelineColorBlendState blend_state_depth_normal_roughness_giprobe = RD::PipelineColorBlendState::create_disabled(2); + + //update pipelines + + RD::PipelineDepthStencilState depth_stencil_state; + + if (depth_test != DEPTH_TEST_DISABLED) { + depth_stencil_state.enable_depth_test = true; + depth_stencil_state.depth_compare_operator = RD::COMPARE_OP_LESS_OR_EQUAL; + depth_stencil_state.enable_depth_write = depth_draw != DEPTH_DRAW_DISABLED ? true : false; + } + + for (int i = 0; i < CULL_VARIANT_MAX; i++) { + RD::PolygonCullMode cull_mode_rd_table[CULL_VARIANT_MAX][3] = { + { RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_FRONT, RD::POLYGON_CULL_BACK }, + { RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_BACK, RD::POLYGON_CULL_FRONT }, + { RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_DISABLED } + }; + + RD::PolygonCullMode cull_mode_rd = cull_mode_rd_table[i][cull]; + + for (int j = 0; j < RS::PRIMITIVE_MAX; j++) { + RD::RenderPrimitive primitive_rd_table[RS::PRIMITIVE_MAX] = { + RD::RENDER_PRIMITIVE_POINTS, + RD::RENDER_PRIMITIVE_LINES, + RD::RENDER_PRIMITIVE_LINESTRIPS, + RD::RENDER_PRIMITIVE_TRIANGLES, + RD::RENDER_PRIMITIVE_TRIANGLE_STRIPS, + }; + + RD::RenderPrimitive primitive_rd = uses_point_size ? RD::RENDER_PRIMITIVE_POINTS : primitive_rd_table[j]; + + for (int k = 0; k < SHADER_VERSION_MAX; k++) { + if (!static_cast<RendererSceneRenderForward *>(singleton)->shader.scene_shader.is_variant_enabled(k)) { + continue; + } + RD::PipelineRasterizationState raster_state; + raster_state.cull_mode = cull_mode_rd; + raster_state.wireframe = wireframe; + + RD::PipelineColorBlendState blend_state; + RD::PipelineDepthStencilState depth_stencil = depth_stencil_state; + RD::PipelineMultisampleState multisample_state; + + if (uses_alpha || uses_blend_alpha) { + // only allow these flags to go through if we have some form of msaa + if (alpha_antialiasing_mode == ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE) { + multisample_state.enable_alpha_to_coverage = true; + } else if (alpha_antialiasing_mode == ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE_AND_TO_ONE) { + multisample_state.enable_alpha_to_coverage = true; + multisample_state.enable_alpha_to_one = true; + } + + if (k == SHADER_VERSION_COLOR_PASS || k == SHADER_VERSION_COLOR_PASS_WITH_FORWARD_GI || k == SHADER_VERSION_LIGHTMAP_COLOR_PASS) { + blend_state = blend_state_blend; + if (depth_draw == DEPTH_DRAW_OPAQUE) { + depth_stencil.enable_depth_write = false; //alpha does not draw depth + } + } else if (uses_depth_pre_pass && (k == SHADER_VERSION_DEPTH_PASS || k == SHADER_VERSION_DEPTH_PASS_DP || k == SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS || k == SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL)) { + if (k == SHADER_VERSION_DEPTH_PASS || k == SHADER_VERSION_DEPTH_PASS_DP) { + //none, blend state contains nothing + } else if (k == SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL) { + blend_state = RD::PipelineColorBlendState::create_disabled(5); //writes to normal and roughness in opaque way + } else { + blend_state = blend_state_opaque; //writes to normal and roughness in opaque way + } + } else { + pipelines[i][j][k].clear(); + continue; // do not use this version (will error if using it is attempted) + } + } else { + if (k == SHADER_VERSION_COLOR_PASS || k == SHADER_VERSION_COLOR_PASS_WITH_FORWARD_GI || k == SHADER_VERSION_LIGHTMAP_COLOR_PASS) { + blend_state = blend_state_opaque; + } else if (k == SHADER_VERSION_DEPTH_PASS || k == SHADER_VERSION_DEPTH_PASS_DP) { + //none, leave empty + } else if (k == SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS) { + blend_state = blend_state_depth_normal_roughness; + } else if (k == SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_GIPROBE) { + blend_state = blend_state_depth_normal_roughness_giprobe; + } else if (k == SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL) { + blend_state = RD::PipelineColorBlendState::create_disabled(5); //writes to normal and roughness in opaque way + } else if (k == SHADER_VERSION_DEPTH_PASS_WITH_SDF) { + blend_state = RD::PipelineColorBlendState(); //no color targets for SDF + } else { + //specular write + blend_state = blend_state_opaque_specular; + depth_stencil.enable_depth_test = false; + depth_stencil.enable_depth_write = false; + } + } + + RID shader_variant = scene_singleton->shader.scene_shader.version_get_shader(version, k); + pipelines[i][j][k].setup(shader_variant, primitive_rd, raster_state, multisample_state, depth_stencil, blend_state, 0); + } + } + } + + valid = true; +} + +void RendererSceneRenderForward::ShaderData::set_default_texture_param(const StringName &p_name, RID p_texture) { + if (!p_texture.is_valid()) { + default_texture_params.erase(p_name); + } else { + default_texture_params[p_name] = p_texture; + } +} + +void RendererSceneRenderForward::ShaderData::get_param_list(List<PropertyInfo> *p_param_list) const { + Map<int, StringName> order; + + for (Map<StringName, ShaderLanguage::ShaderNode::Uniform>::Element *E = uniforms.front(); E; E = E->next()) { + if (E->get().scope != ShaderLanguage::ShaderNode::Uniform::SCOPE_LOCAL) { + continue; + } + + if (E->get().texture_order >= 0) { + order[E->get().texture_order + 100000] = E->key(); + } else { + order[E->get().order] = E->key(); + } + } + + for (Map<int, StringName>::Element *E = order.front(); E; E = E->next()) { + PropertyInfo pi = ShaderLanguage::uniform_to_property_info(uniforms[E->get()]); + pi.name = E->get(); + p_param_list->push_back(pi); + } +} + +void RendererSceneRenderForward::ShaderData::get_instance_param_list(List<RendererStorage::InstanceShaderParam> *p_param_list) const { + for (Map<StringName, ShaderLanguage::ShaderNode::Uniform>::Element *E = uniforms.front(); E; E = E->next()) { + if (E->get().scope != ShaderLanguage::ShaderNode::Uniform::SCOPE_INSTANCE) { + continue; + } + + RendererStorage::InstanceShaderParam p; + p.info = ShaderLanguage::uniform_to_property_info(E->get()); + p.info.name = E->key(); //supply name + p.index = E->get().instance_index; + p.default_value = ShaderLanguage::constant_value_to_variant(E->get().default_value, E->get().type, E->get().hint); + p_param_list->push_back(p); + } +} + +bool RendererSceneRenderForward::ShaderData::is_param_texture(const StringName &p_param) const { + if (!uniforms.has(p_param)) { + return false; + } + + return uniforms[p_param].texture_order >= 0; +} + +bool RendererSceneRenderForward::ShaderData::is_animated() const { + return false; +} + +bool RendererSceneRenderForward::ShaderData::casts_shadows() const { + return false; +} + +Variant RendererSceneRenderForward::ShaderData::get_default_parameter(const StringName &p_parameter) const { + if (uniforms.has(p_parameter)) { + ShaderLanguage::ShaderNode::Uniform uniform = uniforms[p_parameter]; + Vector<ShaderLanguage::ConstantNode::Value> default_value = uniform.default_value; + return ShaderLanguage::constant_value_to_variant(default_value, uniform.type, uniform.hint); + } + return Variant(); +} + +RS::ShaderNativeSourceCode RendererSceneRenderForward::ShaderData::get_native_source_code() const { + RendererSceneRenderForward *scene_singleton = (RendererSceneRenderForward *)RendererSceneRenderForward::singleton; + + return scene_singleton->shader.scene_shader.version_get_native_source_code(version); +} + +RendererSceneRenderForward::ShaderData::ShaderData() { + valid = false; + uses_screen_texture = false; +} + +RendererSceneRenderForward::ShaderData::~ShaderData() { + RendererSceneRenderForward *scene_singleton = (RendererSceneRenderForward *)RendererSceneRenderForward::singleton; + ERR_FAIL_COND(!scene_singleton); + //pipeline variants will clear themselves if shader is gone + if (version.is_valid()) { + scene_singleton->shader.scene_shader.version_free(version); + } +} + +RendererStorageRD::ShaderData *RendererSceneRenderForward::_create_shader_func() { + ShaderData *shader_data = memnew(ShaderData); + return shader_data; +} + +void RendererSceneRenderForward::MaterialData::set_render_priority(int p_priority) { + priority = p_priority - RS::MATERIAL_RENDER_PRIORITY_MIN; //8 bits +} + +void RendererSceneRenderForward::MaterialData::set_next_pass(RID p_pass) { + next_pass = p_pass; +} + +void RendererSceneRenderForward::MaterialData::update_parameters(const Map<StringName, Variant> &p_parameters, bool p_uniform_dirty, bool p_textures_dirty) { + RendererSceneRenderForward *scene_singleton = (RendererSceneRenderForward *)RendererSceneRenderForward::singleton; + + if ((uint32_t)ubo_data.size() != shader_data->ubo_size) { + p_uniform_dirty = true; + if (uniform_buffer.is_valid()) { + RD::get_singleton()->free(uniform_buffer); + uniform_buffer = RID(); + } + + ubo_data.resize(shader_data->ubo_size); + if (ubo_data.size()) { + uniform_buffer = RD::get_singleton()->uniform_buffer_create(ubo_data.size()); + memset(ubo_data.ptrw(), 0, ubo_data.size()); //clear + } + + //clear previous uniform set + if (uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + RD::get_singleton()->free(uniform_set); + uniform_set = RID(); + } + } + + //check whether buffer changed + if (p_uniform_dirty && ubo_data.size()) { + update_uniform_buffer(shader_data->uniforms, shader_data->ubo_offsets.ptr(), p_parameters, ubo_data.ptrw(), ubo_data.size(), false); + RD::get_singleton()->buffer_update(uniform_buffer, 0, ubo_data.size(), ubo_data.ptrw(), RD::BARRIER_MASK_RASTER); + } + + uint32_t tex_uniform_count = shader_data->texture_uniforms.size(); + + if ((uint32_t)texture_cache.size() != tex_uniform_count) { + texture_cache.resize(tex_uniform_count); + p_textures_dirty = true; + + //clear previous uniform set + if (uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + RD::get_singleton()->free(uniform_set); + uniform_set = RID(); + } + } + + if (p_textures_dirty && tex_uniform_count) { + update_textures(p_parameters, shader_data->default_texture_params, shader_data->texture_uniforms, texture_cache.ptrw(), true); + } + + if (shader_data->ubo_size == 0 && shader_data->texture_uniforms.size() == 0) { + // This material does not require an uniform set, so don't create it. + return; + } + + if (!p_textures_dirty && uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + //no reason to update uniform set, only UBO (or nothing) was needed to update + return; + } + + Vector<RD::Uniform> uniforms; + + { + if (shader_data->ubo_size) { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 0; + u.ids.push_back(uniform_buffer); + uniforms.push_back(u); + } + + const RID *textures = texture_cache.ptrw(); + for (uint32_t i = 0; i < tex_uniform_count; i++) { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 1 + i; + u.ids.push_back(textures[i]); + uniforms.push_back(u); + } + } + + uniform_set = RD::get_singleton()->uniform_set_create(uniforms, scene_singleton->shader.scene_shader.version_get_shader(shader_data->version, 0), MATERIAL_UNIFORM_SET); +} + +RendererSceneRenderForward::MaterialData::~MaterialData() { + if (uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + RD::get_singleton()->free(uniform_set); + } + + if (uniform_buffer.is_valid()) { + RD::get_singleton()->free(uniform_buffer); + } +} + +RendererStorageRD::MaterialData *RendererSceneRenderForward::_create_material_func(ShaderData *p_shader) { + MaterialData *material_data = memnew(MaterialData); + material_data->shader_data = p_shader; + material_data->last_frame = false; + //update will happen later anyway so do nothing. + return material_data; +} + +RendererSceneRenderForward::RenderBufferDataForward::~RenderBufferDataForward() { + clear(); +} + +void RendererSceneRenderForward::RenderBufferDataForward::ensure_specular() { + if (!specular.is_valid()) { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf.width = width; + tf.height = height; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + if (msaa != RS::VIEWPORT_MSAA_DISABLED) { + tf.usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + } else { + tf.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + } + + specular = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + if (msaa == RS::VIEWPORT_MSAA_DISABLED) { + { + Vector<RID> fb; + fb.push_back(color); + fb.push_back(specular); + fb.push_back(depth); + + color_specular_fb = RD::get_singleton()->framebuffer_create(fb); + } + { + Vector<RID> fb; + fb.push_back(specular); + + specular_only_fb = RD::get_singleton()->framebuffer_create(fb); + } + + } else { + tf.samples = texture_samples; + tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + specular_msaa = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + { + Vector<RID> fb; + fb.push_back(color_msaa); + fb.push_back(specular_msaa); + fb.push_back(depth_msaa); + + color_specular_fb = RD::get_singleton()->framebuffer_create(fb); + } + { + Vector<RID> fb; + fb.push_back(specular_msaa); + + specular_only_fb = RD::get_singleton()->framebuffer_create(fb); + } + } + } +} + +void RendererSceneRenderForward::RenderBufferDataForward::ensure_giprobe() { + if (!giprobe_buffer.is_valid()) { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8G8_UINT; + tf.width = width; + tf.height = height; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT; + + if (msaa != RS::VIEWPORT_MSAA_DISABLED) { + RD::TextureFormat tf_aa = tf; + tf_aa.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + tf_aa.samples = texture_samples; + giprobe_buffer_msaa = RD::get_singleton()->texture_create(tf_aa, RD::TextureView()); + } else { + tf.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + } + + tf.usage_bits |= RD::TEXTURE_USAGE_STORAGE_BIT; + + giprobe_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + Vector<RID> fb; + if (msaa != RS::VIEWPORT_MSAA_DISABLED) { + fb.push_back(depth_msaa); + fb.push_back(normal_roughness_buffer_msaa); + fb.push_back(giprobe_buffer_msaa); + } else { + fb.push_back(depth); + fb.push_back(normal_roughness_buffer); + fb.push_back(giprobe_buffer); + } + + depth_normal_roughness_giprobe_fb = RD::get_singleton()->framebuffer_create(fb); + } +} + +void RendererSceneRenderForward::RenderBufferDataForward::clear() { + if (giprobe_buffer != RID()) { + RD::get_singleton()->free(giprobe_buffer); + giprobe_buffer = RID(); + + if (giprobe_buffer_msaa.is_valid()) { + RD::get_singleton()->free(giprobe_buffer_msaa); + giprobe_buffer_msaa = RID(); + } + + depth_normal_roughness_giprobe_fb = RID(); + } + + if (color_msaa.is_valid()) { + RD::get_singleton()->free(color_msaa); + color_msaa = RID(); + } + + if (depth_msaa.is_valid()) { + RD::get_singleton()->free(depth_msaa); + depth_msaa = RID(); + } + + if (specular.is_valid()) { + if (specular_msaa.is_valid()) { + RD::get_singleton()->free(specular_msaa); + specular_msaa = RID(); + } + RD::get_singleton()->free(specular); + specular = RID(); + } + + color = RID(); + depth = RID(); + color_specular_fb = RID(); + specular_only_fb = RID(); + color_fb = RID(); + depth_fb = RID(); + + if (normal_roughness_buffer.is_valid()) { + RD::get_singleton()->free(normal_roughness_buffer); + if (normal_roughness_buffer_msaa.is_valid()) { + RD::get_singleton()->free(normal_roughness_buffer_msaa); + normal_roughness_buffer_msaa = RID(); + } + normal_roughness_buffer = RID(); + depth_normal_roughness_fb = RID(); + } + + if (!render_sdfgi_uniform_set.is_null() && RD::get_singleton()->uniform_set_is_valid(render_sdfgi_uniform_set)) { + RD::get_singleton()->free(render_sdfgi_uniform_set); + } +} + +void RendererSceneRenderForward::RenderBufferDataForward::configure(RID p_color_buffer, RID p_depth_buffer, int p_width, int p_height, RS::ViewportMSAA p_msaa) { + clear(); + + msaa = p_msaa; + + width = p_width; + height = p_height; + + color = p_color_buffer; + depth = p_depth_buffer; + + if (p_msaa == RS::VIEWPORT_MSAA_DISABLED) { + { + Vector<RID> fb; + fb.push_back(p_color_buffer); + fb.push_back(depth); + + color_fb = RD::get_singleton()->framebuffer_create(fb); + } + { + Vector<RID> fb; + fb.push_back(depth); + + depth_fb = RD::get_singleton()->framebuffer_create(fb); + } + } else { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf.width = p_width; + tf.height = p_height; + tf.texture_type = RD::TEXTURE_TYPE_2D; + tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; + + RD::TextureSamples ts[RS::VIEWPORT_MSAA_MAX] = { + RD::TEXTURE_SAMPLES_1, + RD::TEXTURE_SAMPLES_2, + RD::TEXTURE_SAMPLES_4, + RD::TEXTURE_SAMPLES_8, + RD::TEXTURE_SAMPLES_16 + }; + + texture_samples = ts[p_msaa]; + tf.samples = texture_samples; + + color_msaa = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + tf.format = RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_D24_UNORM_S8_UINT, RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ? RD::DATA_FORMAT_D24_UNORM_S8_UINT : RD::DATA_FORMAT_D32_SFLOAT_S8_UINT; + tf.usage_bits = RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; + + depth_msaa = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + { + Vector<RID> fb; + fb.push_back(color_msaa); + fb.push_back(depth_msaa); + + color_fb = RD::get_singleton()->framebuffer_create(fb); + } + { + Vector<RID> fb; + fb.push_back(depth_msaa); + + depth_fb = RD::get_singleton()->framebuffer_create(fb); + } + } +} + +void RendererSceneRenderForward::_allocate_normal_roughness_texture(RenderBufferDataForward *rb) { + if (rb->normal_roughness_buffer.is_valid()) { + return; + } + + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + tf.width = rb->width; + tf.height = rb->height; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + + if (rb->msaa != RS::VIEWPORT_MSAA_DISABLED) { + tf.usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + } else { + tf.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + } + + rb->normal_roughness_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + if (rb->msaa == RS::VIEWPORT_MSAA_DISABLED) { + Vector<RID> fb; + fb.push_back(rb->depth); + fb.push_back(rb->normal_roughness_buffer); + rb->depth_normal_roughness_fb = RD::get_singleton()->framebuffer_create(fb); + } else { + tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + tf.samples = rb->texture_samples; + rb->normal_roughness_buffer_msaa = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + Vector<RID> fb; + fb.push_back(rb->depth_msaa); + fb.push_back(rb->normal_roughness_buffer_msaa); + rb->depth_normal_roughness_fb = RD::get_singleton()->framebuffer_create(fb); + } + + _render_buffers_clear_uniform_set(rb); +} + +RendererSceneRenderRD::RenderBufferData *RendererSceneRenderForward::_create_render_buffer_data() { + return memnew(RenderBufferDataForward); +} + +bool RendererSceneRenderForward::free(RID p_rid) { + if (RendererSceneRenderRD::free(p_rid)) { + return true; + } + return false; +} + +/// RENDERING /// + +template <RendererSceneRenderForward::PassMode p_pass_mode> +void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element) { + RD::DrawListID draw_list = p_draw_list; + RD::FramebufferFormatID framebuffer_format = p_framebuffer_Format; + + //global scope bindings + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, render_base_uniform_set, SCENE_UNIFORM_SET); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, p_params->render_pass_uniform_set, RENDER_PASS_UNIFORM_SET); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, default_vec4_xform_uniform_set, TRANSFORMS_UNIFORM_SET); + + RID prev_material_uniform_set; + + RID prev_vertex_array_rd; + RID prev_index_array_rd; + RID prev_pipeline_rd; + RID prev_xforms_uniform_set; + + bool shadow_pass = (p_params->pass_mode == PASS_MODE_SHADOW) || (p_params->pass_mode == PASS_MODE_SHADOW_DP); + + SceneState::PushConstant push_constant; + + if (p_params->pass_mode == PASS_MODE_DEPTH_MATERIAL) { + push_constant.uv_offset = Math::make_half_float(p_params->uv_offset.y) << 16; + push_constant.uv_offset |= Math::make_half_float(p_params->uv_offset.x); + } else { + push_constant.uv_offset = 0; + } + + for (uint32_t i = p_from_element; i < p_to_element; i++) { + const GeometryInstanceSurfaceDataCache *surf = p_params->elements[i]; + const RenderElementInfo &element_info = p_params->element_info[i]; + + push_constant.base_index = i + p_params->element_offset; + + RID material_uniform_set; + ShaderData *shader; + void *mesh_surface; + + if (shadow_pass || p_params->pass_mode == PASS_MODE_DEPTH) { //regular depth pass can use these too + material_uniform_set = surf->material_uniform_set_shadow; + shader = surf->shader_shadow; + mesh_surface = surf->surface_shadow; + + } else { + material_uniform_set = surf->material_uniform_set; + shader = surf->shader; + mesh_surface = surf->surface; + } + + if (!mesh_surface) { + continue; + } + + //find cull variant + ShaderData::CullVariant cull_variant; + + if (p_params->pass_mode == PASS_MODE_DEPTH_MATERIAL || p_params->pass_mode == PASS_MODE_SDF || ((p_params->pass_mode == PASS_MODE_SHADOW || p_params->pass_mode == PASS_MODE_SHADOW_DP) && surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_DOUBLE_SIDED_SHADOWS)) { + cull_variant = ShaderData::CULL_VARIANT_DOUBLE_SIDED; + } else { + bool mirror = surf->owner->mirror; + if (p_params->reverse_cull) { + mirror = !mirror; + } + cull_variant = mirror ? ShaderData::CULL_VARIANT_REVERSED : ShaderData::CULL_VARIANT_NORMAL; + } + + RS::PrimitiveType primitive = surf->primitive; + RID xforms_uniform_set = surf->owner->transforms_uniform_set; + + ShaderVersion shader_version = SHADER_VERSION_MAX; // Assigned to silence wrong -Wmaybe-initialized. + + switch (p_params->pass_mode) { + case PASS_MODE_COLOR: + case PASS_MODE_COLOR_TRANSPARENT: { + if (element_info.uses_lightmap) { + shader_version = SHADER_VERSION_LIGHTMAP_COLOR_PASS; + } else if (element_info.uses_forward_gi) { + shader_version = SHADER_VERSION_COLOR_PASS_WITH_FORWARD_GI; + } else { + shader_version = SHADER_VERSION_COLOR_PASS; + } + } break; + case PASS_MODE_COLOR_SPECULAR: { + if (element_info.uses_lightmap) { + shader_version = SHADER_VERSION_LIGHTMAP_COLOR_PASS_WITH_SEPARATE_SPECULAR; + } else { + shader_version = SHADER_VERSION_COLOR_PASS_WITH_SEPARATE_SPECULAR; + } + } break; + case PASS_MODE_SHADOW: + case PASS_MODE_DEPTH: { + shader_version = SHADER_VERSION_DEPTH_PASS; + } break; + case PASS_MODE_SHADOW_DP: { + shader_version = SHADER_VERSION_DEPTH_PASS_DP; + } break; + case PASS_MODE_DEPTH_NORMAL_ROUGHNESS: { + shader_version = SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS; + } break; + case PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE: { + shader_version = SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_GIPROBE; + } break; + case PASS_MODE_DEPTH_MATERIAL: { + shader_version = SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL; + } break; + case PASS_MODE_SDF: { + shader_version = SHADER_VERSION_DEPTH_PASS_WITH_SDF; + } break; + } + + PipelineCacheRD *pipeline = nullptr; + + pipeline = &shader->pipelines[cull_variant][primitive][shader_version]; + + RD::VertexFormatID vertex_format = -1; + RID vertex_array_rd; + RID index_array_rd; + + //skeleton and blend shape + if (surf->owner->mesh_instance.is_valid()) { + storage->mesh_instance_surface_get_vertex_arrays_and_format(surf->owner->mesh_instance, surf->surface_index, pipeline->get_vertex_input_mask(), vertex_array_rd, vertex_format); + } else { + storage->mesh_surface_get_vertex_arrays_and_format(mesh_surface, pipeline->get_vertex_input_mask(), vertex_array_rd, vertex_format); + } + + index_array_rd = storage->mesh_surface_get_index_array(mesh_surface, element_info.lod_index); + + if (prev_vertex_array_rd != vertex_array_rd) { + RD::get_singleton()->draw_list_bind_vertex_array(draw_list, vertex_array_rd); + prev_vertex_array_rd = vertex_array_rd; + } + + if (prev_index_array_rd != index_array_rd) { + if (index_array_rd.is_valid()) { + RD::get_singleton()->draw_list_bind_index_array(draw_list, index_array_rd); + } + prev_index_array_rd = index_array_rd; + } + + RID pipeline_rd = pipeline->get_render_pipeline(vertex_format, framebuffer_format, p_params->force_wireframe); + + if (pipeline_rd != prev_pipeline_rd) { + // checking with prev shader does not make so much sense, as + // the pipeline may still be different. + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, pipeline_rd); + prev_pipeline_rd = pipeline_rd; + } + + if (xforms_uniform_set.is_valid() && prev_xforms_uniform_set != xforms_uniform_set) { + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, xforms_uniform_set, TRANSFORMS_UNIFORM_SET); + prev_xforms_uniform_set = xforms_uniform_set; + } + + if (material_uniform_set != prev_material_uniform_set) { + //update uniform set + if (material_uniform_set.is_valid()) { + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, material_uniform_set, MATERIAL_UNIFORM_SET); + } + + prev_material_uniform_set = material_uniform_set; + } + + RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(SceneState::PushConstant)); + + uint32_t instance_count = surf->owner->instance_count > 1 ? surf->owner->instance_count : element_info.repeat; + RD::get_singleton()->draw_list_draw(draw_list, index_array_rd.is_valid(), instance_count); + i += element_info.repeat - 1; //skip equal elements + } +} + +void RendererSceneRenderForward::_render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element) { + //use template for faster performance (pass mode comparisons are inlined) + + switch (p_params->pass_mode) { + case PASS_MODE_COLOR: { + _render_list_template<PASS_MODE_COLOR>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + case PASS_MODE_COLOR_SPECULAR: { + _render_list_template<PASS_MODE_COLOR_SPECULAR>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + case PASS_MODE_COLOR_TRANSPARENT: { + _render_list_template<PASS_MODE_COLOR_TRANSPARENT>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + case PASS_MODE_SHADOW: { + _render_list_template<PASS_MODE_SHADOW>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + case PASS_MODE_SHADOW_DP: { + _render_list_template<PASS_MODE_SHADOW_DP>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + case PASS_MODE_DEPTH: { + _render_list_template<PASS_MODE_DEPTH>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + case PASS_MODE_DEPTH_NORMAL_ROUGHNESS: { + _render_list_template<PASS_MODE_DEPTH_NORMAL_ROUGHNESS>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + case PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE: { + _render_list_template<PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + case PASS_MODE_DEPTH_MATERIAL: { + _render_list_template<PASS_MODE_DEPTH_MATERIAL>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + case PASS_MODE_SDF: { + _render_list_template<PASS_MODE_SDF>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); + } break; + } +} + +void RendererSceneRenderForward::_render_list_thread_function(uint32_t p_thread, RenderListParameters *p_params) { + uint32_t render_total = p_params->element_count; + uint32_t total_threads = RendererThreadPool::singleton->thread_work_pool.get_thread_count(); + uint32_t render_from = p_thread * render_total / total_threads; + uint32_t render_to = (p_thread + 1 == total_threads) ? render_total : ((p_thread + 1) * render_total / total_threads); + _render_list(thread_draw_lists[p_thread], p_params->framebuffer_format, p_params, render_from, render_to); +} + +void RendererSceneRenderForward::_render_list_with_threads(RenderListParameters *p_params, RID p_framebuffer, RD::InitialAction p_initial_color_action, RD::FinalAction p_final_color_action, RD::InitialAction p_initial_depth_action, RD::FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const Vector<RID> &p_storage_textures) { + RD::FramebufferFormatID fb_format = RD::get_singleton()->framebuffer_get_format(p_framebuffer); + p_params->framebuffer_format = fb_format; + + if ((uint32_t)p_params->element_count > render_list_thread_threshold && false) { // secondary command buffers need more testing at this time + //multi threaded + thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count()); + RD::get_singleton()->draw_list_begin_split(p_framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures); + RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RendererSceneRenderForward::_render_list_thread_function, p_params); + RD::get_singleton()->draw_list_end(p_params->barrier); + } else { + //single threaded + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures); + _render_list(draw_list, fb_format, p_params, 0, p_params->element_count); + RD::get_singleton()->draw_list_end(p_params->barrier); + } +} + +void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers, bool p_pancake_shadows, int p_index) { + //CameraMatrix projection = p_cam_projection; + //projection.flip_y(); // Vulkan and modern APIs use Y-Down + CameraMatrix correction; + correction.set_depth_correction(p_flip_y); + CameraMatrix projection = correction * p_cam_projection; + + //store camera into ubo + RendererStorageRD::store_camera(projection, scene_state.ubo.projection_matrix); + RendererStorageRD::store_camera(projection.inverse(), scene_state.ubo.inv_projection_matrix); + RendererStorageRD::store_transform(p_cam_transform, scene_state.ubo.camera_matrix); + RendererStorageRD::store_transform(p_cam_transform.affine_inverse(), scene_state.ubo.inv_camera_matrix); + + scene_state.ubo.z_far = p_zfar; + scene_state.ubo.z_near = p_znear; + + scene_state.ubo.pancake_shadows = p_pancake_shadows; + + RendererStorageRD::store_soft_shadow_kernel(directional_penumbra_shadow_kernel_get(), scene_state.ubo.directional_penumbra_shadow_kernel); + RendererStorageRD::store_soft_shadow_kernel(directional_soft_shadow_kernel_get(), scene_state.ubo.directional_soft_shadow_kernel); + RendererStorageRD::store_soft_shadow_kernel(penumbra_shadow_kernel_get(), scene_state.ubo.penumbra_shadow_kernel); + RendererStorageRD::store_soft_shadow_kernel(soft_shadow_kernel_get(), scene_state.ubo.soft_shadow_kernel); + + scene_state.ubo.directional_penumbra_shadow_samples = directional_penumbra_shadow_samples_get(); + scene_state.ubo.directional_soft_shadow_samples = directional_soft_shadow_samples_get(); + scene_state.ubo.penumbra_shadow_samples = penumbra_shadow_samples_get(); + scene_state.ubo.soft_shadow_samples = soft_shadow_samples_get(); + + Size2 screen_pixel_size = Vector2(1.0, 1.0) / Size2(p_screen_size); + scene_state.ubo.screen_pixel_size[0] = screen_pixel_size.x; + scene_state.ubo.screen_pixel_size[1] = screen_pixel_size.y; + + scene_state.ubo.cluster_shift = get_shift_from_power_of_2(p_cluster_size); + scene_state.ubo.max_cluster_element_count_div_32 = p_max_cluster_elements / 32; + { + uint32_t cluster_screen_width = (p_screen_size.width - 1) / p_cluster_size + 1; + uint32_t cluster_screen_height = (p_screen_size.height - 1) / p_cluster_size + 1; + scene_state.ubo.cluster_type_size = cluster_screen_width * cluster_screen_height * (scene_state.ubo.max_cluster_element_count_div_32 + 32); + scene_state.ubo.cluster_width = cluster_screen_width; + } + + if (p_shadow_atlas.is_valid()) { + Vector2 sas = shadow_atlas_get_size(p_shadow_atlas); + scene_state.ubo.shadow_atlas_pixel_size[0] = 1.0 / sas.x; + scene_state.ubo.shadow_atlas_pixel_size[1] = 1.0 / sas.y; + } + { + Vector2 dss = directional_shadow_get_size(); + scene_state.ubo.directional_shadow_pixel_size[0] = 1.0 / dss.x; + scene_state.ubo.directional_shadow_pixel_size[1] = 1.0 / dss.y; + } + //time global variables + scene_state.ubo.time = time; + + scene_state.ubo.gi_upscale_for_msaa = false; + scene_state.ubo.volumetric_fog_enabled = false; + scene_state.ubo.fog_enabled = false; + + if (p_render_buffers.is_valid()) { + RenderBufferDataForward *render_buffers = (RenderBufferDataForward *)render_buffers_get_data(p_render_buffers); + if (render_buffers->msaa != RS::VIEWPORT_MSAA_DISABLED) { + scene_state.ubo.gi_upscale_for_msaa = true; + } + + if (render_buffers_has_volumetric_fog(p_render_buffers)) { + scene_state.ubo.volumetric_fog_enabled = true; + float fog_end = render_buffers_get_volumetric_fog_end(p_render_buffers); + if (fog_end > 0.0) { + scene_state.ubo.volumetric_fog_inv_length = 1.0 / fog_end; + } else { + scene_state.ubo.volumetric_fog_inv_length = 1.0; + } + + float fog_detail_spread = render_buffers_get_volumetric_fog_detail_spread(p_render_buffers); //reverse lookup + if (fog_detail_spread > 0.0) { + scene_state.ubo.volumetric_fog_detail_spread = 1.0 / fog_detail_spread; + } else { + scene_state.ubo.volumetric_fog_detail_spread = 1.0; + } + } + } +#if 0 + if (p_render_buffers.is_valid() && render_buffers_is_sdfgi_enabled(p_render_buffers)) { + scene_state.ubo.sdfgi_cascade_count = render_buffers_get_sdfgi_cascade_count(p_render_buffers); + scene_state.ubo.sdfgi_probe_axis_size = render_buffers_get_sdfgi_cascade_probe_count(p_render_buffers); + scene_state.ubo.sdfgi_cascade_probe_size[0] = scene_state.ubo.sdfgi_probe_axis_size - 1; //float version for performance + scene_state.ubo.sdfgi_cascade_probe_size[1] = scene_state.ubo.sdfgi_probe_axis_size - 1; + scene_state.ubo.sdfgi_cascade_probe_size[2] = scene_state.ubo.sdfgi_probe_axis_size - 1; + + float csize = render_buffers_get_sdfgi_cascade_size(p_render_buffers); + scene_state.ubo.sdfgi_probe_to_uvw = 1.0 / float(scene_state.ubo.sdfgi_cascade_probe_size[0]); + float occ_bias = 0.0; + scene_state.ubo.sdfgi_occlusion_bias = occ_bias / csize; + scene_state.ubo.sdfgi_use_occlusion = render_buffers_is_sdfgi_using_occlusion(p_render_buffers); + scene_state.ubo.sdfgi_energy = render_buffers_get_sdfgi_energy(p_render_buffers); + + float cascade_voxel_size = (csize / scene_state.ubo.sdfgi_cascade_probe_size[0]); + float occlusion_clamp = (cascade_voxel_size - 0.5) / cascade_voxel_size; + scene_state.ubo.sdfgi_occlusion_clamp[0] = occlusion_clamp; + scene_state.ubo.sdfgi_occlusion_clamp[1] = occlusion_clamp; + scene_state.ubo.sdfgi_occlusion_clamp[2] = occlusion_clamp; + scene_state.ubo.sdfgi_normal_bias = (render_buffers_get_sdfgi_normal_bias(p_render_buffers) / csize) * scene_state.ubo.sdfgi_cascade_probe_size[0]; + + //vec2 tex_pixel_size = 1.0 / vec2(ivec2( (OCT_SIZE+2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE+2) * params.probe_axis_size ) ); + //vec3 probe_uv_offset = (ivec3(OCT_SIZE+2,OCT_SIZE+2,(OCT_SIZE+2) * params.probe_axis_size)) * tex_pixel_size.xyx; + + uint32_t oct_size = sdfgi_get_lightprobe_octahedron_size(); + + scene_state.ubo.sdfgi_lightprobe_tex_pixel_size[0] = 1.0 / ((oct_size + 2) * scene_state.ubo.sdfgi_probe_axis_size * scene_state.ubo.sdfgi_probe_axis_size); + scene_state.ubo.sdfgi_lightprobe_tex_pixel_size[1] = 1.0 / ((oct_size + 2) * scene_state.ubo.sdfgi_probe_axis_size); + scene_state.ubo.sdfgi_lightprobe_tex_pixel_size[2] = 1.0; + + scene_state.ubo.sdfgi_probe_uv_offset[0] = float(oct_size + 2) * scene_state.ubo.sdfgi_lightprobe_tex_pixel_size[0]; + scene_state.ubo.sdfgi_probe_uv_offset[1] = float(oct_size + 2) * scene_state.ubo.sdfgi_lightprobe_tex_pixel_size[1]; + scene_state.ubo.sdfgi_probe_uv_offset[2] = float((oct_size + 2) * scene_state.ubo.sdfgi_probe_axis_size) * scene_state.ubo.sdfgi_lightprobe_tex_pixel_size[0]; + + scene_state.ubo.sdfgi_occlusion_renormalize[0] = 0.5; + scene_state.ubo.sdfgi_occlusion_renormalize[1] = 1.0; + scene_state.ubo.sdfgi_occlusion_renormalize[2] = 1.0 / float(scene_state.ubo.sdfgi_cascade_count); + + for (uint32_t i = 0; i < scene_state.ubo.sdfgi_cascade_count; i++) { + SceneState::UBO::SDFGICascade &c = scene_state.ubo.sdfgi_cascades[i]; + Vector3 pos = render_buffers_get_sdfgi_cascade_offset(p_render_buffers, i); + pos -= p_cam_transform.origin; //make pos local to camera, to reduce numerical error + c.position[0] = pos.x; + c.position[1] = pos.y; + c.position[2] = pos.z; + c.to_probe = 1.0 / render_buffers_get_sdfgi_cascade_probe_size(p_render_buffers, i); + + Vector3i probe_ofs = render_buffers_get_sdfgi_cascade_probe_offset(p_render_buffers, i); + c.probe_world_offset[0] = probe_ofs.x; + c.probe_world_offset[1] = probe_ofs.y; + c.probe_world_offset[2] = probe_ofs.z; + } + } +#endif + if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_UNSHADED) { + scene_state.ubo.use_ambient_light = true; + scene_state.ubo.ambient_light_color_energy[0] = 1; + scene_state.ubo.ambient_light_color_energy[1] = 1; + scene_state.ubo.ambient_light_color_energy[2] = 1; + scene_state.ubo.ambient_light_color_energy[3] = 1.0; + scene_state.ubo.use_ambient_cubemap = false; + scene_state.ubo.use_reflection_cubemap = false; + scene_state.ubo.ssao_enabled = false; + + } else if (is_environment(p_environment)) { + RS::EnvironmentBG env_bg = environment_get_background(p_environment); + RS::EnvironmentAmbientSource ambient_src = environment_get_ambient_source(p_environment); + + float bg_energy = environment_get_bg_energy(p_environment); + scene_state.ubo.ambient_light_color_energy[3] = bg_energy; + + scene_state.ubo.ambient_color_sky_mix = environment_get_ambient_sky_contribution(p_environment); + + //ambient + if (ambient_src == RS::ENV_AMBIENT_SOURCE_BG && (env_bg == RS::ENV_BG_CLEAR_COLOR || env_bg == RS::ENV_BG_COLOR)) { + Color color = env_bg == RS::ENV_BG_CLEAR_COLOR ? p_default_bg_color : environment_get_bg_color(p_environment); + color = color.to_linear(); + + scene_state.ubo.ambient_light_color_energy[0] = color.r * bg_energy; + scene_state.ubo.ambient_light_color_energy[1] = color.g * bg_energy; + scene_state.ubo.ambient_light_color_energy[2] = color.b * bg_energy; + scene_state.ubo.use_ambient_light = true; + scene_state.ubo.use_ambient_cubemap = false; + } else { + float energy = environment_get_ambient_light_energy(p_environment); + Color color = environment_get_ambient_light_color(p_environment); + color = color.to_linear(); + scene_state.ubo.ambient_light_color_energy[0] = color.r * energy; + scene_state.ubo.ambient_light_color_energy[1] = color.g * energy; + scene_state.ubo.ambient_light_color_energy[2] = color.b * energy; + + Basis sky_transform = environment_get_sky_orientation(p_environment); + sky_transform = sky_transform.inverse() * p_cam_transform.basis; + RendererStorageRD::store_transform_3x3(sky_transform, scene_state.ubo.radiance_inverse_xform); + + scene_state.ubo.use_ambient_cubemap = (ambient_src == RS::ENV_AMBIENT_SOURCE_BG && env_bg == RS::ENV_BG_SKY) || ambient_src == RS::ENV_AMBIENT_SOURCE_SKY; + scene_state.ubo.use_ambient_light = scene_state.ubo.use_ambient_cubemap || ambient_src == RS::ENV_AMBIENT_SOURCE_COLOR; + } + + //specular + RS::EnvironmentReflectionSource ref_src = environment_get_reflection_source(p_environment); + if ((ref_src == RS::ENV_REFLECTION_SOURCE_BG && env_bg == RS::ENV_BG_SKY) || ref_src == RS::ENV_REFLECTION_SOURCE_SKY) { + scene_state.ubo.use_reflection_cubemap = true; + } else { + scene_state.ubo.use_reflection_cubemap = false; + } + + scene_state.ubo.ssao_enabled = p_opaque_render_buffers && environment_is_ssao_enabled(p_environment); + scene_state.ubo.ssao_ao_affect = environment_get_ssao_ao_affect(p_environment); + scene_state.ubo.ssao_light_affect = environment_get_ssao_light_affect(p_environment); + + Color ao_color = environment_get_ao_color(p_environment).to_linear(); + scene_state.ubo.ao_color[0] = ao_color.r; + scene_state.ubo.ao_color[1] = ao_color.g; + scene_state.ubo.ao_color[2] = ao_color.b; + scene_state.ubo.ao_color[3] = ao_color.a; + + scene_state.ubo.fog_enabled = environment_is_fog_enabled(p_environment); + scene_state.ubo.fog_density = environment_get_fog_density(p_environment); + scene_state.ubo.fog_height = environment_get_fog_height(p_environment); + scene_state.ubo.fog_height_density = environment_get_fog_height_density(p_environment); + if (scene_state.ubo.fog_height_density >= 0.0001) { + scene_state.ubo.fog_height_density = 1.0 / scene_state.ubo.fog_height_density; + } + scene_state.ubo.fog_aerial_perspective = environment_get_fog_aerial_perspective(p_environment); + + Color fog_color = environment_get_fog_light_color(p_environment).to_linear(); + float fog_energy = environment_get_fog_light_energy(p_environment); + + scene_state.ubo.fog_light_color[0] = fog_color.r * fog_energy; + scene_state.ubo.fog_light_color[1] = fog_color.g * fog_energy; + scene_state.ubo.fog_light_color[2] = fog_color.b * fog_energy; + + scene_state.ubo.fog_sun_scatter = environment_get_fog_sun_scatter(p_environment); + + } else { + if (p_reflection_probe.is_valid() && storage->reflection_probe_is_interior(reflection_probe_instance_get_probe(p_reflection_probe))) { + scene_state.ubo.use_ambient_light = false; + } else { + scene_state.ubo.use_ambient_light = true; + Color clear_color = p_default_bg_color; + clear_color = clear_color.to_linear(); + scene_state.ubo.ambient_light_color_energy[0] = clear_color.r; + scene_state.ubo.ambient_light_color_energy[1] = clear_color.g; + scene_state.ubo.ambient_light_color_energy[2] = clear_color.b; + scene_state.ubo.ambient_light_color_energy[3] = 1.0; + } + + scene_state.ubo.use_ambient_cubemap = false; + scene_state.ubo.use_reflection_cubemap = false; + scene_state.ubo.ssao_enabled = false; + } + + scene_state.ubo.roughness_limiter_enabled = p_opaque_render_buffers && screen_space_roughness_limiter_is_active(); + scene_state.ubo.roughness_limiter_amount = screen_space_roughness_limiter_get_amount(); + scene_state.ubo.roughness_limiter_limit = screen_space_roughness_limiter_get_limit(); + + if (p_index >= (int)scene_state.uniform_buffers.size()) { + uint32_t from = scene_state.uniform_buffers.size(); + scene_state.uniform_buffers.resize(p_index + 1); + render_pass_uniform_sets.resize(p_index + 1); + for (uint32_t i = from; i < scene_state.uniform_buffers.size(); i++) { + scene_state.uniform_buffers[i] = RD::get_singleton()->uniform_buffer_create(sizeof(SceneState::UBO)); + } + } + RD::get_singleton()->buffer_update(scene_state.uniform_buffers[p_index], 0, sizeof(SceneState::UBO), &scene_state.ubo, RD::BARRIER_MASK_RASTER); +} + +void RendererSceneRenderForward::_update_instance_data_buffer(RenderListType p_render_list) { + if (scene_state.instance_data[p_render_list].size() > 0) { + if (scene_state.instance_buffer[p_render_list] == RID() || scene_state.instance_buffer_size[p_render_list] < scene_state.instance_data[p_render_list].size()) { + if (scene_state.instance_buffer[p_render_list] != RID()) { + RD::get_singleton()->free(scene_state.instance_buffer[p_render_list]); + } + uint32_t new_size = nearest_power_of_2_templated(MAX(uint64_t(INSTANCE_DATA_BUFFER_MIN_SIZE), scene_state.instance_data[p_render_list].size())); + scene_state.instance_buffer[p_render_list] = RD::get_singleton()->storage_buffer_create(new_size * sizeof(SceneState::InstanceData)); + scene_state.instance_buffer_size[p_render_list] = new_size; + } + RD::get_singleton()->buffer_update(scene_state.instance_buffer[p_render_list], 0, sizeof(SceneState::InstanceData) * scene_state.instance_data[p_render_list].size(), scene_state.instance_data[p_render_list].ptr(), RD::BARRIER_MASK_RASTER); + } +} +void RendererSceneRenderForward::_fill_instance_data(RenderListType p_render_list, uint32_t p_offset, int32_t p_max_elements, bool p_update_buffer) { + RenderList *rl = &render_list[p_render_list]; + uint32_t element_total = p_max_elements >= 0 ? uint32_t(p_max_elements) : rl->elements.size(); + + scene_state.instance_data[p_render_list].resize(p_offset + element_total); + rl->element_info.resize(p_offset + element_total); + + uint32_t repeats = 0; + GeometryInstanceSurfaceDataCache *prev_surface = nullptr; + for (uint32_t i = 0; i < element_total; i++) { + GeometryInstanceSurfaceDataCache *surface = rl->elements[i + p_offset]; + GeometryInstanceForward *inst = surface->owner; + + SceneState::InstanceData &instance_data = scene_state.instance_data[p_render_list][i + p_offset]; + + if (inst->store_transform_cache) { + RendererStorageRD::store_transform(inst->transform, instance_data.transform); + } else { + RendererStorageRD::store_transform(Transform(), instance_data.transform); + } + + instance_data.flags = inst->flags_cache; + instance_data.gi_offset = inst->gi_offset_cache; + instance_data.layer_mask = inst->layer_mask; + instance_data.instance_uniforms_ofs = uint32_t(inst->shader_parameters_offset); + instance_data.lightmap_uv_scale[0] = inst->lightmap_uv_scale.position.x; + instance_data.lightmap_uv_scale[1] = inst->lightmap_uv_scale.position.y; + instance_data.lightmap_uv_scale[2] = inst->lightmap_uv_scale.size.x; + instance_data.lightmap_uv_scale[3] = inst->lightmap_uv_scale.size.y; + + bool cant_repeat = instance_data.flags & INSTANCE_DATA_FLAG_MULTIMESH || inst->mesh_instance.is_valid(); + + if (prev_surface != nullptr && !cant_repeat && prev_surface->sort.sort_key1 == surface->sort.sort_key1 && prev_surface->sort.sort_key2 == surface->sort.sort_key2) { + //this element is the same as the previous one, count repeats to draw it using instancing + repeats++; + } else { + if (repeats > 0) { + for (uint32_t j = 1; j <= repeats; j++) { + rl->element_info[p_offset + i - j].repeat = j; + } + } + repeats = 1; + } + + RenderElementInfo &element_info = rl->element_info[p_offset + i]; + + element_info.lod_index = surface->sort.lod_index; + element_info.uses_forward_gi = surface->sort.uses_forward_gi; + element_info.uses_lightmap = surface->sort.uses_lightmap; + + if (cant_repeat) { + prev_surface = nullptr; + } else { + prev_surface = surface; + } + } + + if (repeats > 0) { + for (uint32_t j = 1; j <= repeats; j++) { + rl->element_info[p_offset + element_total - j].repeat = j; + } + } + + if (p_update_buffer) { + _update_instance_data_buffer(p_render_list); + } +} + +void RendererSceneRenderForward::_fill_render_list(RenderListType p_render_list, const PagedArray<GeometryInstance *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi, bool p_using_opaque_gi, const Plane &p_lod_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold, bool p_append) { + if (p_render_list == RENDER_LIST_OPAQUE) { + scene_state.used_sss = false; + scene_state.used_screen_texture = false; + scene_state.used_normal_texture = false; + scene_state.used_depth_texture = false; + } + uint32_t lightmap_captures_used = 0; + + Plane near_plane(p_cam_transform.origin, -p_cam_transform.basis.get_axis(Vector3::AXIS_Z)); + near_plane.d += p_cam_projection.get_z_near(); + float z_max = p_cam_projection.get_z_far() - p_cam_projection.get_z_near(); + + RenderList *rl = &render_list[p_render_list]; + _update_dirty_geometry_instances(); + + if (!p_append) { + rl->clear(); + if (p_render_list == RENDER_LIST_OPAQUE) { + render_list[RENDER_LIST_ALPHA].clear(); //opaque fills alpha too + } + } + + //fill list + + for (int i = 0; i < (int)p_instances.size(); i++) { + GeometryInstanceForward *inst = static_cast<GeometryInstanceForward *>(p_instances[i]); + + Vector3 support_min = inst->transformed_aabb.get_support(-near_plane.normal); + inst->depth = near_plane.distance_to(support_min); + uint32_t depth_layer = CLAMP(int(inst->depth * 16 / z_max), 0, 15); + + uint32_t flags = inst->base_flags; //fill flags if appropriate + + bool uses_lightmap = false; + bool uses_gi = false; + + if (p_render_list == RENDER_LIST_OPAQUE) { + //setup GI + + if (inst->lightmap_instance.is_valid()) { + int32_t lightmap_cull_index = -1; + for (uint32_t j = 0; j < scene_state.lightmaps_used; j++) { + if (scene_state.lightmap_ids[j] == inst->lightmap_instance) { + lightmap_cull_index = j; + break; + } + } + if (lightmap_cull_index >= 0) { + inst->gi_offset_cache = inst->lightmap_slice_index << 16; + inst->gi_offset_cache |= lightmap_cull_index; + flags |= INSTANCE_DATA_FLAG_USE_LIGHTMAP; + if (scene_state.lightmap_has_sh[lightmap_cull_index]) { + flags |= INSTANCE_DATA_FLAG_USE_SH_LIGHTMAP; + } + uses_lightmap = true; + } else { + inst->gi_offset_cache = 0xFFFFFFFF; + } + + } else if (inst->lightmap_sh) { + if (lightmap_captures_used < scene_state.max_lightmap_captures) { + const Color *src_capture = inst->lightmap_sh->sh; + LightmapCaptureData &lcd = scene_state.lightmap_captures[lightmap_captures_used]; + for (int j = 0; j < 9; j++) { + lcd.sh[j * 4 + 0] = src_capture[j].r; + lcd.sh[j * 4 + 1] = src_capture[j].g; + lcd.sh[j * 4 + 2] = src_capture[j].b; + lcd.sh[j * 4 + 3] = src_capture[j].a; + } + flags |= INSTANCE_DATA_FLAG_USE_LIGHTMAP_CAPTURE; + inst->gi_offset_cache = lightmap_captures_used; + lightmap_captures_used++; + uses_lightmap = true; + } + + } else if (!low_end) { + if (p_using_opaque_gi) { + flags |= INSTANCE_DATA_FLAG_USE_GI_BUFFERS; + } + + if (inst->gi_probes[0].is_valid()) { + uint32_t probe0_index = 0xFFFF; + uint32_t probe1_index = 0xFFFF; + + for (uint32_t j = 0; j < scene_state.giprobes_used; j++) { + if (scene_state.giprobe_ids[j] == inst->gi_probes[0]) { + probe0_index = j; + } else if (scene_state.giprobe_ids[j] == inst->gi_probes[1]) { + probe1_index = j; + } + } + + if (probe0_index == 0xFFFF && probe1_index != 0xFFFF) { + //0 must always exist if a probe exists + SWAP(probe0_index, probe1_index); + } + + inst->gi_offset_cache = probe0_index | (probe1_index << 16); + flags |= INSTANCE_DATA_FLAG_USE_GIPROBE; + uses_gi = true; + } else { + if (p_using_sdfgi && inst->can_sdfgi) { + flags |= INSTANCE_DATA_FLAG_USE_SDFGI; + uses_gi = true; + } + inst->gi_offset_cache = 0xFFFFFFFF; + } + } + } + inst->flags_cache = flags; + + GeometryInstanceSurfaceDataCache *surf = inst->surface_caches; + + while (surf) { + surf->sort.uses_forward_gi = 0; + surf->sort.uses_lightmap = 0; + + // LOD + + if (p_screen_lod_threshold > 0.0 && storage->mesh_surface_has_lod(surf->surface)) { + //lod + Vector3 lod_support_min = inst->transformed_aabb.get_support(-p_lod_plane.normal); + Vector3 lod_support_max = inst->transformed_aabb.get_support(p_lod_plane.normal); + + float distance_min = p_lod_plane.distance_to(lod_support_min); + float distance_max = p_lod_plane.distance_to(lod_support_max); + + float distance = 0.0; + + if (distance_min * distance_max < 0.0) { + //crossing plane + distance = 0.0; + } else if (distance_min >= 0.0) { + distance = distance_min; + } else if (distance_max <= 0.0) { + distance = -distance_max; + } + + surf->sort.lod_index = storage->mesh_surface_get_lod(surf->surface, inst->lod_model_scale * inst->lod_bias, distance * p_lod_distance_multiplier, p_screen_lod_threshold); + } else { + surf->sort.lod_index = 0; + } + + // ADD Element + if (p_pass_mode == PASS_MODE_COLOR) { + if (surf->flags & (GeometryInstanceSurfaceDataCache::FLAG_PASS_DEPTH | GeometryInstanceSurfaceDataCache::FLAG_PASS_OPAQUE)) { + rl->add_element(surf); + } + if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_PASS_ALPHA) { + render_list[RENDER_LIST_ALPHA].add_element(surf); + if (uses_gi) { + surf->sort.uses_forward_gi = 1; + } + } + + if (uses_lightmap) { + surf->sort.uses_lightmap = 1; + } + + if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_SUBSURFACE_SCATTERING) { + scene_state.used_sss = true; + } + if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_SCREEN_TEXTURE) { + scene_state.used_screen_texture = true; + } + if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_NORMAL_TEXTURE) { + scene_state.used_normal_texture = true; + } + if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_DEPTH_TEXTURE) { + scene_state.used_depth_texture = true; + } + + } else if (p_pass_mode == PASS_MODE_SHADOW || p_pass_mode == PASS_MODE_SHADOW_DP) { + if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_PASS_SHADOW) { + rl->add_element(surf); + } + } else { + if (surf->flags & (GeometryInstanceSurfaceDataCache::FLAG_PASS_DEPTH | GeometryInstanceSurfaceDataCache::FLAG_PASS_OPAQUE)) { + rl->add_element(surf); + } + } + + surf->sort.depth_layer = depth_layer; + + surf = surf->next; + } + } + + if (p_render_list == RENDER_LIST_OPAQUE && lightmap_captures_used) { + RD::get_singleton()->buffer_update(scene_state.lightmap_capture_buffer, 0, sizeof(LightmapCaptureData) * lightmap_captures_used, scene_state.lightmap_captures, RD::BARRIER_MASK_RASTER); + } +} + +void RendererSceneRenderForward::_setup_giprobes(const PagedArray<RID> &p_giprobes) { + scene_state.giprobes_used = MIN(p_giprobes.size(), uint32_t(MAX_GI_PROBES)); + for (uint32_t i = 0; i < scene_state.giprobes_used; i++) { + scene_state.giprobe_ids[i] = p_giprobes[i]; + } +} + +void RendererSceneRenderForward::_setup_lightmaps(const PagedArray<RID> &p_lightmaps, const Transform &p_cam_transform) { + scene_state.lightmaps_used = 0; + for (int i = 0; i < (int)p_lightmaps.size(); i++) { + if (i >= (int)scene_state.max_lightmaps) { + break; + } + + RID lightmap = lightmap_instance_get_lightmap(p_lightmaps[i]); + + Basis to_lm = lightmap_instance_get_transform(p_lightmaps[i]).basis.inverse() * p_cam_transform.basis; + to_lm = to_lm.inverse().transposed(); //will transform normals + RendererStorageRD::store_transform_3x3(to_lm, scene_state.lightmaps[i].normal_xform); + scene_state.lightmap_ids[i] = p_lightmaps[i]; + scene_state.lightmap_has_sh[i] = storage->lightmap_uses_spherical_harmonics(lightmap); + + scene_state.lightmaps_used++; + } + if (scene_state.lightmaps_used > 0) { + RD::get_singleton()->buffer_update(scene_state.lightmap_buffer, 0, sizeof(LightmapData) * scene_state.lightmaps_used, scene_state.lightmaps, RD::BARRIER_MASK_RASTER); + } +} + +void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_screen_lod_threshold) { + RenderBufferDataForward *render_buffer = nullptr; + if (p_render_buffer.is_valid()) { + render_buffer = (RenderBufferDataForward *)render_buffers_get_data(p_render_buffer); + } + + //first of all, make a new render pass + //fill up ubo + + RENDER_TIMESTAMP("Setup 3D Scene"); + + float lod_distance_multiplier = p_cam_projection.get_lod_multiplier(); + Plane lod_camera_plane(p_cam_transform.get_origin(), -p_cam_transform.basis.get_axis(Vector3::AXIS_Z)); + + if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_DISABLE_LOD) { + p_screen_lod_threshold = 0.0; + } + + //scene_state.ubo.subsurface_scatter_width = subsurface_scatter_size; + + Vector2 vp_he = p_cam_projection.get_viewport_half_extents(); + scene_state.ubo.viewport_size[0] = vp_he.x; + scene_state.ubo.viewport_size[1] = vp_he.y; + scene_state.ubo.directional_light_count = 0; + + Size2i screen_size; + RID opaque_framebuffer; + RID opaque_specular_framebuffer; + RID depth_framebuffer; + RID alpha_framebuffer; + + PassMode depth_pass_mode = PASS_MODE_DEPTH; + Vector<Color> depth_pass_clear; + bool using_separate_specular = false; + bool using_ssr = false; + bool using_sdfgi = false; + bool using_giprobe = false; + + if (render_buffer) { + screen_size.x = render_buffer->width; + screen_size.y = render_buffer->height; + + opaque_framebuffer = render_buffer->color_fb; + + if (!low_end && p_gi_probes.size() > 0) { + using_giprobe = true; + } + + if (!p_environment.is_valid() && using_giprobe) { + depth_pass_mode = PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE; + + } else if (p_environment.is_valid() && (environment_is_ssr_enabled(p_environment) || environment_is_sdfgi_enabled(p_environment) || using_giprobe)) { + if (environment_is_sdfgi_enabled(p_environment)) { + depth_pass_mode = using_giprobe ? PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE : PASS_MODE_DEPTH_NORMAL_ROUGHNESS; // also giprobe + using_sdfgi = true; + } else { + depth_pass_mode = using_giprobe ? PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE : PASS_MODE_DEPTH_NORMAL_ROUGHNESS; + } + + if (environment_is_ssr_enabled(p_environment)) { + render_buffer->ensure_specular(); + using_separate_specular = true; + using_ssr = true; + opaque_specular_framebuffer = render_buffer->color_specular_fb; + } + + } else if (p_environment.is_valid() && (environment_is_ssao_enabled(p_environment) || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_NORMAL_BUFFER)) { + depth_pass_mode = PASS_MODE_DEPTH_NORMAL_ROUGHNESS; + } + + switch (depth_pass_mode) { + case PASS_MODE_DEPTH: { + depth_framebuffer = render_buffer->depth_fb; + } break; + case PASS_MODE_DEPTH_NORMAL_ROUGHNESS: { + _allocate_normal_roughness_texture(render_buffer); + depth_framebuffer = render_buffer->depth_normal_roughness_fb; + depth_pass_clear.push_back(Color(0.5, 0.5, 0.5, 0)); + } break; + case PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE: { + _allocate_normal_roughness_texture(render_buffer); + render_buffer->ensure_giprobe(); + depth_framebuffer = render_buffer->depth_normal_roughness_giprobe_fb; + depth_pass_clear.push_back(Color(0.5, 0.5, 0.5, 0)); + depth_pass_clear.push_back(Color(0, 0, 0, 0)); + } break; + default: { + }; + } + + alpha_framebuffer = opaque_framebuffer; + } else if (p_reflection_probe.is_valid()) { + uint32_t resolution = reflection_probe_instance_get_resolution(p_reflection_probe); + screen_size.x = resolution; + screen_size.y = resolution; + + opaque_framebuffer = reflection_probe_instance_get_framebuffer(p_reflection_probe, p_reflection_probe_pass); + depth_framebuffer = reflection_probe_instance_get_depth_framebuffer(p_reflection_probe, p_reflection_probe_pass); + alpha_framebuffer = opaque_framebuffer; + + if (storage->reflection_probe_is_interior(reflection_probe_instance_get_probe(p_reflection_probe))) { + p_environment = RID(); //no environment on interiors + } + } else { + ERR_FAIL(); //bug? + } + + RD::get_singleton()->draw_command_begin_label("Render Setup"); + + _setup_lightmaps(p_lightmaps, p_cam_transform); + _setup_giprobes(p_gi_probes); + _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); + + _update_render_base_uniform_set(); //may have changed due to the above (light buffer enlarged, as an example) + + _fill_render_list(RENDER_LIST_OPAQUE, p_instances, PASS_MODE_COLOR, p_cam_projection, p_cam_transform, using_sdfgi, using_sdfgi || using_giprobe, lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + render_list[RENDER_LIST_OPAQUE].sort_by_key(); + render_list[RENDER_LIST_ALPHA].sort_by_depth(); + _fill_instance_data(RENDER_LIST_OPAQUE); + _fill_instance_data(RENDER_LIST_ALPHA); + + RD::get_singleton()->draw_command_end_label(); + + bool using_sss = !low_end && render_buffer && scene_state.used_sss && sub_surface_scattering_get_quality() != RS::SUB_SURFACE_SCATTERING_QUALITY_DISABLED; + + if (using_sss) { + using_separate_specular = true; + render_buffer->ensure_specular(); + using_separate_specular = true; + opaque_specular_framebuffer = render_buffer->color_specular_fb; + } + RID radiance_texture; + bool draw_sky = false; + bool draw_sky_fog_only = false; + + Color clear_color; + bool keep_color = false; + + if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_OVERDRAW) { + clear_color = Color(0, 0, 0, 1); //in overdraw mode, BG should always be black + } else if (is_environment(p_environment)) { + RS::EnvironmentBG bg_mode = environment_get_background(p_environment); + float bg_energy = environment_get_bg_energy(p_environment); + switch (bg_mode) { + case RS::ENV_BG_CLEAR_COLOR: { + clear_color = p_default_bg_color; + clear_color.r *= bg_energy; + clear_color.g *= bg_energy; + clear_color.b *= bg_energy; + if (render_buffers_has_volumetric_fog(p_render_buffer) || environment_is_fog_enabled(p_environment)) { + draw_sky_fog_only = true; + storage->material_set_param(sky_scene_state.fog_material, "clear_color", Variant(clear_color.to_linear())); + } + } break; + case RS::ENV_BG_COLOR: { + clear_color = environment_get_bg_color(p_environment); + clear_color.r *= bg_energy; + clear_color.g *= bg_energy; + clear_color.b *= bg_energy; + if (render_buffers_has_volumetric_fog(p_render_buffer) || environment_is_fog_enabled(p_environment)) { + draw_sky_fog_only = true; + storage->material_set_param(sky_scene_state.fog_material, "clear_color", Variant(clear_color.to_linear())); + } + } break; + case RS::ENV_BG_SKY: { + draw_sky = true; + } break; + case RS::ENV_BG_CANVAS: { + keep_color = true; + } break; + case RS::ENV_BG_KEEP: { + keep_color = true; + } break; + case RS::ENV_BG_CAMERA_FEED: { + } break; + default: { + } + } + // setup sky if used for ambient, reflections, or background + if (draw_sky || draw_sky_fog_only || environment_get_reflection_source(p_environment) == RS::ENV_REFLECTION_SOURCE_SKY || environment_get_ambient_source(p_environment) == RS::ENV_AMBIENT_SOURCE_SKY) { + RENDER_TIMESTAMP("Setup Sky"); + RD::get_singleton()->draw_command_begin_label("Setup Sky"); + CameraMatrix projection = p_cam_projection; + if (p_reflection_probe.is_valid()) { + CameraMatrix correction; + correction.set_depth_correction(true); + projection = correction * p_cam_projection; + } + + _setup_sky(p_environment, p_render_buffer, projection, p_cam_transform, screen_size); + + RID sky = environment_get_sky(p_environment); + if (sky.is_valid()) { + _update_sky(p_environment, projection, p_cam_transform); + radiance_texture = sky_get_radiance_texture_rd(sky); + } else { + // do not try to draw sky if invalid + draw_sky = false; + } + RD::get_singleton()->draw_command_end_label(); + } + } else { + clear_color = p_default_bg_color; + } + + bool debug_giprobes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_ALBEDO || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_LIGHTING || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_EMISSION; + bool debug_sdfgi_probes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_SDFGI_PROBES; + bool depth_pre_pass = !low_end && depth_framebuffer.is_valid(); + + bool using_ssao = depth_pre_pass && p_render_buffer.is_valid() && p_environment.is_valid() && environment_is_ssao_enabled(p_environment); + bool continue_depth = false; + if (depth_pre_pass) { //depth pre pass + + bool needs_pre_resolve = _needs_post_prepass_render(using_sdfgi || using_giprobe); + if (needs_pre_resolve) { + RENDER_TIMESTAMP("GI + Render Depth Pre-Pass (parallel)"); + } else { + RENDER_TIMESTAMP("Render Depth Pre-Pass"); + } + if (needs_pre_resolve) { + //pre clear the depth framebuffer, as AMD (and maybe others?) use compute for it, and barrier other compute shaders. + RD::get_singleton()->draw_list_begin(depth_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_CONTINUE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_CONTINUE, depth_pass_clear); + RD::get_singleton()->draw_list_end(); + //start compute processes here, so they run at the same time as depth pre-pass + _post_prepass_render(using_sdfgi || using_giprobe); + } + + RD::get_singleton()->draw_command_begin_label("Render Depth Pre-Pass"); + + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_OPAQUE, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + + bool finish_depth = using_ssao || using_sdfgi || using_giprobe; + RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].element_info.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), false, depth_pass_mode, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + _render_list_with_threads(&render_list_params, depth_framebuffer, needs_pre_resolve ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, needs_pre_resolve ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_CLEAR, finish_depth ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE, needs_pre_resolve ? Vector<Color>() : depth_pass_clear); + + RD::get_singleton()->draw_command_end_label(); + + if (needs_pre_resolve) { + _pre_resolve_render(using_sdfgi || using_giprobe); + } + + if (render_buffer && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { + RENDER_TIMESTAMP("Resolve Depth Pre-Pass"); + RD::get_singleton()->draw_command_begin_label("Resolve Depth Pre-Pass"); + if (depth_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS || depth_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE) { + if (needs_pre_resolve) { + RD::get_singleton()->barrier(RD::BARRIER_MASK_RASTER, RD::BARRIER_MASK_COMPUTE); + } + static int texture_samples[RS::VIEWPORT_MSAA_MAX] = { 1, 2, 4, 8, 16 }; + storage->get_effects()->resolve_gi(render_buffer->depth_msaa, render_buffer->normal_roughness_buffer_msaa, using_giprobe ? render_buffer->giprobe_buffer_msaa : RID(), render_buffer->depth, render_buffer->normal_roughness_buffer, using_giprobe ? render_buffer->giprobe_buffer : RID(), Vector2i(render_buffer->width, render_buffer->height), texture_samples[render_buffer->msaa]); + } else if (finish_depth) { + RD::get_singleton()->texture_resolve_multisample(render_buffer->depth_msaa, render_buffer->depth); + } + RD::get_singleton()->draw_command_end_label(); + } + + continue_depth = !finish_depth; + } + + _pre_opaque_render(using_ssao, using_sdfgi || using_giprobe, render_buffer ? render_buffer->normal_roughness_buffer : RID(), render_buffer ? render_buffer->giprobe_buffer : RID()); + + RD::get_singleton()->draw_command_begin_label("Render Opaque Pass"); + + scene_state.ubo.directional_light_count = _get_render_state_directional_light_count(); + + _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), p_render_buffer.is_valid()); + + RENDER_TIMESTAMP("Render Opaque Pass"); + + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_OPAQUE, p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_cluster_buffer, p_gi_probes, p_lightmaps, true); + + bool can_continue_color = !scene_state.used_screen_texture && !using_ssr && !using_sss; + bool can_continue_depth = !scene_state.used_depth_texture && !using_ssr && !using_sss; + + { + bool will_continue_color = (can_continue_color || draw_sky || draw_sky_fog_only || debug_giprobes || debug_sdfgi_probes); + bool will_continue_depth = (can_continue_depth || draw_sky || draw_sky_fog_only || debug_giprobes || debug_sdfgi_probes); + + //regular forward for now + Vector<Color> c; + if (using_separate_specular) { + Color cc = clear_color.to_linear(); + cc.a = 0; //subsurf scatter must be 0 + c.push_back(cc); + c.push_back(Color(0, 0, 0, 0)); + } else { + c.push_back(clear_color.to_linear()); + } + + RID framebuffer = using_separate_specular ? opaque_specular_framebuffer : opaque_framebuffer; + RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].element_info.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), false, using_separate_specular ? PASS_MODE_COLOR_SPECULAR : PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + _render_list_with_threads(&render_list_params, framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, depth_pre_pass ? (continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP) : RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0); + if (will_continue_color && using_separate_specular) { + // close the specular framebuffer, as it's no longer used + RD::get_singleton()->draw_list_begin(render_buffer->specular_only_fb, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_CONTINUE); + RD::get_singleton()->draw_list_end(); + } + } + + RD::get_singleton()->draw_command_end_label(); + + if (debug_giprobes) { + //debug giprobes + bool will_continue_color = (can_continue_color || draw_sky || draw_sky_fog_only); + bool will_continue_depth = (can_continue_depth || draw_sky || draw_sky_fog_only); + + CameraMatrix dc; + dc.set_depth_correction(true); + CameraMatrix cm = (dc * p_cam_projection) * CameraMatrix(p_cam_transform.affine_inverse()); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(opaque_framebuffer, RD::INITIAL_ACTION_CONTINUE, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ); + RD::get_singleton()->draw_command_begin_label("Debug GIProbes"); + for (int i = 0; i < (int)p_gi_probes.size(); i++) { + _debug_giprobe(p_gi_probes[i], draw_list, opaque_framebuffer, cm, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_LIGHTING, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_EMISSION, 1.0); + } + RD::get_singleton()->draw_command_end_label(); + RD::get_singleton()->draw_list_end(); + } + + if (debug_sdfgi_probes) { + //debug giprobes + bool will_continue_color = (can_continue_color || draw_sky || draw_sky_fog_only); + bool will_continue_depth = (can_continue_depth || draw_sky || draw_sky_fog_only); + + CameraMatrix dc; + dc.set_depth_correction(true); + CameraMatrix cm = (dc * p_cam_projection) * CameraMatrix(p_cam_transform.affine_inverse()); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(opaque_framebuffer, RD::INITIAL_ACTION_CONTINUE, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ); + RD::get_singleton()->draw_command_begin_label("Debug SDFGI"); + _debug_sdfgi_probes(p_render_buffer, draw_list, opaque_framebuffer, cm); + RD::get_singleton()->draw_command_end_label(); + RD::get_singleton()->draw_list_end(); + } + + if (draw_sky || draw_sky_fog_only) { + RENDER_TIMESTAMP("Render Sky"); + + CameraMatrix projection = p_cam_projection; + if (p_reflection_probe.is_valid()) { + CameraMatrix correction; + correction.set_depth_correction(true); + projection = correction * p_cam_projection; + } + RD::get_singleton()->draw_command_begin_label("Draw Sky"); + _draw_sky(can_continue_color, can_continue_depth, opaque_framebuffer, p_environment, projection, p_cam_transform); + RD::get_singleton()->draw_command_end_label(); + } + + if (render_buffer && !can_continue_color && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { + RD::get_singleton()->texture_resolve_multisample(render_buffer->color_msaa, render_buffer->color); + if (using_separate_specular) { + RD::get_singleton()->texture_resolve_multisample(render_buffer->specular_msaa, render_buffer->specular); + } + } + + if (render_buffer && !can_continue_depth && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { + RD::get_singleton()->texture_resolve_multisample(render_buffer->depth_msaa, render_buffer->depth); + } + + if (using_separate_specular) { + if (using_sss) { + RENDER_TIMESTAMP("Sub Surface Scattering"); + RD::get_singleton()->draw_command_begin_label("Process Sub Surface Scattering"); + _process_sss(p_render_buffer, p_cam_projection); + RD::get_singleton()->draw_command_end_label(); + } + + if (using_ssr) { + RENDER_TIMESTAMP("Screen Space Reflection"); + RD::get_singleton()->draw_command_begin_label("Process Screen Space Reflections"); + _process_ssr(p_render_buffer, render_buffer->color_fb, render_buffer->normal_roughness_buffer, render_buffer->specular, render_buffer->specular, Color(0, 0, 0, 1), p_environment, p_cam_projection, render_buffer->msaa == RS::VIEWPORT_MSAA_DISABLED); + RD::get_singleton()->draw_command_end_label(); + } else { + //just mix specular back + RENDER_TIMESTAMP("Merge Specular"); + storage->get_effects()->merge_specular(render_buffer->color_fb, render_buffer->specular, render_buffer->msaa == RS::VIEWPORT_MSAA_DISABLED ? RID() : render_buffer->color, RID()); + } + } + + RENDER_TIMESTAMP("Render Transparent Pass"); + + RD::get_singleton()->draw_command_begin_label("Render Transparent Pass"); + + rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_ALPHA, p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_cluster_buffer, p_gi_probes, p_lightmaps, true); + + _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); + + { + RenderListParameters render_list_params(render_list[RENDER_LIST_ALPHA].elements.ptr(), render_list[RENDER_LIST_ALPHA].element_info.ptr(), render_list[RENDER_LIST_ALPHA].elements.size(), false, PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + _render_list_with_threads(&render_list_params, alpha_framebuffer, can_continue_color ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, can_continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ); + } + + RD::get_singleton()->draw_command_end_label(); + + RD::get_singleton()->draw_command_begin_label("Resolve"); + + if (render_buffer && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { + RD::get_singleton()->texture_resolve_multisample(render_buffer->color_msaa, render_buffer->color); + } + + RD::get_singleton()->draw_command_end_label(); +} + +void RendererSceneRenderForward::_render_shadow_begin() { + scene_state.shadow_passes.clear(); + RD::get_singleton()->draw_command_begin_label("Shadow Setup"); + _update_render_base_uniform_set(); + + render_list[RENDER_LIST_SECONDARY].clear(); + scene_state.instance_data[RENDER_LIST_SECONDARY].clear(); +} +void RendererSceneRenderForward::_render_shadow_append(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold, const Rect2i &p_rect, bool p_flip_y, bool p_clear_region, bool p_begin, bool p_end) { + uint32_t shadow_pass_index = scene_state.shadow_passes.size(); + + SceneState::ShadowPass shadow_pass; + + scene_state.ubo.dual_paraboloid_side = p_use_dp_flip ? -1 : 1; + + _setup_environment(RID(), RID(), p_projection, p_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), !p_flip_y, Color(), 0, p_zfar, false, p_use_pancake, shadow_pass_index); + + if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_DISABLE_LOD) { + p_screen_lod_threshold = 0.0; + } + + PassMode pass_mode = p_use_dp ? PASS_MODE_SHADOW_DP : PASS_MODE_SHADOW; + + uint32_t render_list_from = render_list[RENDER_LIST_SECONDARY].elements.size(); + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, p_projection, p_transform, false, false, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold, true); + uint32_t render_list_size = render_list[RENDER_LIST_SECONDARY].elements.size() - render_list_from; + render_list[RENDER_LIST_SECONDARY].sort_by_key_range(render_list_from, render_list_size); + _fill_instance_data(RENDER_LIST_SECONDARY, render_list_from, render_list_size, false); + + { + //regular forward for now + bool flip_cull = p_use_dp_flip; + if (p_flip_y) { + flip_cull = !flip_cull; + } + + shadow_pass.element_from = render_list_from; + shadow_pass.element_count = render_list_size; + shadow_pass.flip_cull = flip_cull; + shadow_pass.pass_mode = pass_mode; + + shadow_pass.rp_uniform_set = RID(); //will be filled later when instance buffer is complete + shadow_pass.camera_plane = p_camera_plane; + shadow_pass.screen_lod_threshold = p_screen_lod_threshold; + shadow_pass.lod_distance_multiplier = p_lod_distance_multiplier; + + shadow_pass.framebuffer = p_framebuffer; + shadow_pass.initial_depth_action = p_begin ? (p_clear_region ? RD::INITIAL_ACTION_CLEAR_REGION : RD::INITIAL_ACTION_CLEAR) : (p_clear_region ? RD::INITIAL_ACTION_CLEAR_REGION_CONTINUE : RD::INITIAL_ACTION_CONTINUE); + shadow_pass.final_depth_action = p_end ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE; + shadow_pass.rect = p_rect; + + scene_state.shadow_passes.push_back(shadow_pass); + } +} + +void RendererSceneRenderForward::_render_shadow_process() { + _update_instance_data_buffer(RENDER_LIST_SECONDARY); + //render shadows one after the other, so this can be done un-barriered and the driver can optimize (as well as allow us to run compute at the same time) + + for (uint32_t i = 0; i < scene_state.shadow_passes.size(); i++) { + //render passes need to be configured after instance buffer is done, since they need the latest version + SceneState::ShadowPass &shadow_pass = scene_state.shadow_passes[i]; + shadow_pass.rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>(), false, i); + } + + RD::get_singleton()->draw_command_end_label(); +} +void RendererSceneRenderForward::_render_shadow_end(uint32_t p_barrier) { + RD::get_singleton()->draw_command_begin_label("Shadow Render"); + + for (uint32_t i = 0; i < scene_state.shadow_passes.size(); i++) { + SceneState::ShadowPass &shadow_pass = scene_state.shadow_passes[i]; + RenderListParameters render_list_parameters(render_list[RENDER_LIST_SECONDARY].elements.ptr() + shadow_pass.element_from, render_list[RENDER_LIST_SECONDARY].element_info.ptr() + shadow_pass.element_from, shadow_pass.element_count, shadow_pass.flip_cull, shadow_pass.pass_mode, true, shadow_pass.rp_uniform_set, false, Vector2(), shadow_pass.camera_plane, shadow_pass.lod_distance_multiplier, shadow_pass.screen_lod_threshold, shadow_pass.element_from, RD::BARRIER_MASK_NO_BARRIER); + _render_list_with_threads(&render_list_parameters, shadow_pass.framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, shadow_pass.initial_depth_action, shadow_pass.final_depth_action, Vector<Color>(), 1.0, 0, shadow_pass.rect); + } + + if (p_barrier != RD::BARRIER_MASK_NO_BARRIER) { + RD::get_singleton()->barrier(RD::BARRIER_MASK_RASTER, p_barrier); + } + RD::get_singleton()->draw_command_end_label(); +} + +void RendererSceneRenderForward::_render_particle_collider_heightfield(RID p_fb, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, const PagedArray<GeometryInstance *> &p_instances) { + RENDER_TIMESTAMP("Setup Render Collider Heightfield"); + + RD::get_singleton()->draw_command_begin_label("Render Collider Heightfield"); + + _update_render_base_uniform_set(); + scene_state.ubo.dual_paraboloid_side = 0; + + _setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), true, Color(), 0, p_cam_projection.get_z_far(), false, false); + + PassMode pass_mode = PASS_MODE_SHADOW; + + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, p_cam_projection, p_cam_transform); + render_list[RENDER_LIST_SECONDARY].sort_by_key(); + _fill_instance_data(RENDER_LIST_SECONDARY); + + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + + RENDER_TIMESTAMP("Render Collider Heightfield"); + + { + //regular forward for now + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), false, pass_mode, true, rp_uniform_set); + _render_list_with_threads(&render_list_params, p_fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ); + } + RD::get_singleton()->draw_command_end_label(); +} + +void RendererSceneRenderForward::_render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) { + RENDER_TIMESTAMP("Setup Rendering Material"); + + RD::get_singleton()->draw_command_begin_label("Render Material"); + + _update_render_base_uniform_set(); + + scene_state.ubo.dual_paraboloid_side = 0; + scene_state.ubo.material_uv2_mode = false; + + _setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0); + + PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL; + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, p_cam_projection, p_cam_transform); + render_list[RENDER_LIST_SECONDARY].sort_by_key(); + _fill_instance_data(RENDER_LIST_SECONDARY); + + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + + RENDER_TIMESTAMP("Render Material"); + + { + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), true, pass_mode, true, rp_uniform_set); + //regular forward for now + Vector<Color> clear; + clear.push_back(Color(0, 0, 0, 0)); + clear.push_back(Color(0, 0, 0, 0)); + clear.push_back(Color(0, 0, 0, 0)); + clear.push_back(Color(0, 0, 0, 0)); + clear.push_back(Color(0, 0, 0, 0)); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, clear, 1.0, 0, p_region); + _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), &render_list_params, 0, render_list_params.element_count); + RD::get_singleton()->draw_list_end(); + } + + RD::get_singleton()->draw_command_end_label(); +} + +void RendererSceneRenderForward::_render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) { + RENDER_TIMESTAMP("Setup Rendering UV2"); + + RD::get_singleton()->draw_command_begin_label("Render UV2"); + + _update_render_base_uniform_set(); + + scene_state.ubo.dual_paraboloid_side = 0; + scene_state.ubo.material_uv2_mode = true; + + _setup_environment(RID(), RID(), CameraMatrix(), Transform(), RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0); + + PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL; + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, CameraMatrix(), Transform()); + render_list[RENDER_LIST_SECONDARY].sort_by_key(); + _fill_instance_data(RENDER_LIST_SECONDARY); + + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + + RENDER_TIMESTAMP("Render Material"); + + { + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), true, pass_mode, true, rp_uniform_set, true); + //regular forward for now + Vector<Color> clear; + clear.push_back(Color(0, 0, 0, 0)); + clear.push_back(Color(0, 0, 0, 0)); + clear.push_back(Color(0, 0, 0, 0)); + clear.push_back(Color(0, 0, 0, 0)); + clear.push_back(Color(0, 0, 0, 0)); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, clear, 1.0, 0, p_region); + + const int uv_offset_count = 9; + static const Vector2 uv_offsets[uv_offset_count] = { + Vector2(-1, 1), + Vector2(1, 1), + Vector2(1, -1), + Vector2(-1, -1), + Vector2(-1, 0), + Vector2(1, 0), + Vector2(0, -1), + Vector2(0, 1), + Vector2(0, 0), + + }; + + for (int i = 0; i < uv_offset_count; i++) { + Vector2 ofs = uv_offsets[i]; + ofs.x /= p_region.size.width; + ofs.y /= p_region.size.height; + render_list_params.uv_offset = ofs; + _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), &render_list_params, 0, render_list_params.element_count); //first wireframe, for pseudo conservative + } + render_list_params.uv_offset = Vector2(); + _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), &render_list_params, 0, render_list_params.element_count); //second regular triangles + + RD::get_singleton()->draw_list_end(); + } + + RD::get_singleton()->draw_command_end_label(); +} + +void RendererSceneRenderForward::_render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<GeometryInstance *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) { + RENDER_TIMESTAMP("Render SDFGI"); + + RD::get_singleton()->draw_command_begin_label("Render SDFGI Voxel"); + + _update_render_base_uniform_set(); + + RenderBufferDataForward *render_buffer = (RenderBufferDataForward *)render_buffers_get_data(p_render_buffers); + ERR_FAIL_COND(!render_buffer); + + PassMode pass_mode = PASS_MODE_SDF; + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, CameraMatrix(), Transform()); + render_list[RENDER_LIST_SECONDARY].sort_by_key(); + _fill_instance_data(RENDER_LIST_SECONDARY); + + Vector3 half_extents = p_bounds.size * 0.5; + Vector3 center = p_bounds.position + half_extents; + + Vector<RID> sbs; + sbs.push_back(p_albedo_texture); + sbs.push_back(p_emission_texture); + sbs.push_back(p_emission_aniso_texture); + sbs.push_back(p_geom_facing_texture); + + //print_line("re-render " + p_from + " - " + p_size + " bounds " + p_bounds); + for (int i = 0; i < 3; i++) { + scene_state.ubo.sdf_offset[i] = p_from[i]; + scene_state.ubo.sdf_size[i] = p_size[i]; + } + + for (int i = 0; i < 3; i++) { + Vector3 axis; + axis[i] = 1.0; + Vector3 up, right; + int right_axis = (i + 1) % 3; + int up_axis = (i + 2) % 3; + up[up_axis] = 1.0; + right[right_axis] = 1.0; + + Size2i fb_size; + fb_size.x = p_size[right_axis]; + fb_size.y = p_size[up_axis]; + + Transform cam_xform; + cam_xform.origin = center + axis * half_extents; + cam_xform.basis.set_axis(0, right); + cam_xform.basis.set_axis(1, up); + cam_xform.basis.set_axis(2, axis); + + //print_line("pass: " + itos(i) + " xform " + cam_xform); + + float h_size = half_extents[right_axis]; + float v_size = half_extents[up_axis]; + float d_size = half_extents[i] * 2.0; + CameraMatrix camera_proj; + camera_proj.set_orthogonal(-h_size, h_size, -v_size, v_size, 0, d_size); + //print_line("pass: " + itos(i) + " cam hsize: " + rtos(h_size) + " vsize: " + rtos(v_size) + " dsize " + rtos(d_size)); + + Transform to_bounds; + to_bounds.origin = p_bounds.position; + to_bounds.basis.scale(p_bounds.size); + + RendererStorageRD::store_transform(to_bounds.affine_inverse() * cam_xform, scene_state.ubo.sdf_to_bounds); + + _setup_environment(RID(), RID(), camera_proj, cam_xform, RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0); + + RID rp_uniform_set = _setup_sdfgi_render_pass_uniform_set(p_albedo_texture, p_emission_texture, p_emission_aniso_texture, p_geom_facing_texture); + + Map<Size2i, RID>::Element *E = sdfgi_framebuffer_size_cache.find(fb_size); + if (!E) { + RID fb = RD::get_singleton()->framebuffer_create_empty(fb_size); + E = sdfgi_framebuffer_size_cache.insert(fb_size, fb); + } + + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), true, pass_mode, true, rp_uniform_set, false); + _render_list_with_threads(&render_list_params, E->get(), RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, Vector<Color>(), 1.0, 0, Rect2(), sbs); + } + + RD::get_singleton()->draw_command_end_label(); +} + +void RendererSceneRenderForward::_base_uniforms_changed() { + if (!render_base_uniform_set.is_null() && RD::get_singleton()->uniform_set_is_valid(render_base_uniform_set)) { + RD::get_singleton()->free(render_base_uniform_set); + } + render_base_uniform_set = RID(); +} + +void RendererSceneRenderForward::_update_render_base_uniform_set() { + if (render_base_uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(render_base_uniform_set) || (lightmap_texture_array_version != storage->lightmap_array_get_version())) { + if (render_base_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(render_base_uniform_set)) { + RD::get_singleton()->free(render_base_uniform_set); + } + + lightmap_texture_array_version = storage->lightmap_array_get_version(); + + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 1; + u.ids.resize(12); + RID *ids_ptr = u.ids.ptrw(); + ids_ptr[0] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[1] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[2] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[3] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[4] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[5] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[6] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[7] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[8] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[9] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[10] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[11] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.binding = 2; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.ids.push_back(shadow_sampler); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.binding = 3; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(get_omni_light_buffer()); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 4; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(get_spot_light_buffer()); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.binding = 5; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(get_reflection_probe_buffer()); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 6; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.ids.push_back(get_directional_light_buffer()); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 7; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(scene_state.lightmap_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 8; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(scene_state.lightmap_capture_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 9; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + RID decal_atlas = storage->decal_atlas_get_texture(); + u.ids.push_back(decal_atlas); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 10; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + RID decal_atlas = storage->decal_atlas_get_texture_srgb(); + u.ids.push_back(decal_atlas); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 11; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(get_decal_buffer()); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 12; + u.ids.push_back(storage->global_variables_get_storage_buffer()); + uniforms.push_back(u); + } + + if (!low_end) { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 13; + u.ids.push_back(sdfgi_get_ubo()); + uniforms.push_back(u); + } + + render_base_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, default_shader_rd, SCENE_UNIFORM_SET); + } +} + +RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RenderListType p_render_list, RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, bool p_use_directional_shadow_atlas, int p_index) { + //there should always be enough uniform buffers for render passes, otherwise bugs + ERR_FAIL_INDEX_V(p_index, (int)scene_state.uniform_buffers.size(), RID()); + + RenderBufferDataForward *rb = nullptr; + if (p_render_buffers.is_valid()) { + rb = (RenderBufferDataForward *)render_buffers_get_data(p_render_buffers); + } + + //default render buffer and scene state uniform set + + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.binding = 0; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.ids.push_back(scene_state.uniform_buffers[p_index]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + RID instance_buffer = scene_state.instance_buffer[p_render_list]; + if (instance_buffer == RID()) { + instance_buffer = default_vec4_xform_buffer; // any buffer will do since its not used + } + u.ids.push_back(instance_buffer); + uniforms.push_back(u); + } + { + RID radiance_texture; + if (p_radiance_texture.is_valid()) { + radiance_texture = p_radiance_texture; + } else { + radiance_texture = storage->texture_rd_get_default(is_using_radiance_cubemap_array() ? RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_ARRAY_BLACK : RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_BLACK); + } + RD::Uniform u; + u.binding = 2; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.ids.push_back(radiance_texture); + uniforms.push_back(u); + } + + { + RID ref_texture = p_reflection_atlas.is_valid() ? reflection_atlas_get_texture(p_reflection_atlas) : RID(); + RD::Uniform u; + u.binding = 3; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + if (ref_texture.is_valid()) { + u.ids.push_back(ref_texture); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_ARRAY_BLACK)); + } + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.binding = 4; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + RID texture; + if (p_shadow_atlas.is_valid()) { + texture = shadow_atlas_get_texture(p_shadow_atlas); + } + if (!texture.is_valid()) { + texture = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE); + } + u.ids.push_back(texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 5; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + if (p_use_directional_shadow_atlas && directional_shadow_get_texture().is_valid()) { + u.ids.push_back(directional_shadow_get_texture()); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE)); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 6; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.ids.resize(scene_state.max_lightmaps); + RID default_tex = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE); + for (uint32_t i = 0; i < scene_state.max_lightmaps; i++) { + if (i < p_lightmaps.size()) { + RID base = lightmap_instance_get_lightmap(p_lightmaps[i]); + RID texture = storage->lightmap_get_texture(base); + RID rd_texture = storage->texture_get_rd_texture(texture); + u.ids.write[i] = rd_texture; + } else { + u.ids.write[i] = default_tex; + } + } + + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 7; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.ids.resize(MAX_GI_PROBES); + RID default_tex = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE); + for (int i = 0; i < MAX_GI_PROBES; i++) { + if (i < (int)p_gi_probes.size()) { + RID tex = gi_probe_instance_get_texture(p_gi_probes[i]); + if (!tex.is_valid()) { + tex = default_tex; + } + u.ids.write[i] = tex; + } else { + u.ids.write[i] = default_tex; + } + } + + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.binding = 8; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + RID cb = p_cluster_buffer.is_valid() ? p_cluster_buffer : default_vec4_xform_buffer; + u.ids.push_back(cb); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.binding = 9; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + RID texture = (false && rb && rb->depth.is_valid()) ? rb->depth : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE); + u.ids.push_back(texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 10; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + RID bbt = rb ? render_buffers_get_back_buffer_texture(p_render_buffers) : RID(); + RID texture = bbt.is_valid() ? bbt : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); + u.ids.push_back(texture); + uniforms.push_back(u); + } + + if (!low_end) { + { + RD::Uniform u; + u.binding = 11; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + RID texture = rb && rb->normal_roughness_buffer.is_valid() ? rb->normal_roughness_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_NORMAL); + u.ids.push_back(texture); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.binding = 12; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + RID aot = rb ? render_buffers_get_ao_texture(p_render_buffers) : RID(); + RID texture = aot.is_valid() ? aot : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); + u.ids.push_back(texture); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.binding = 13; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + RID ambient_buffer = p_render_buffers.is_valid() ? render_buffers_get_gi_ambient_texture(p_render_buffers) : RID(); + RID texture = ambient_buffer.is_valid() ? ambient_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); + u.ids.push_back(texture); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.binding = 14; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + RID reflection_buffer = p_render_buffers.is_valid() ? render_buffers_get_gi_reflection_texture(p_render_buffers) : RID(); + RID texture = reflection_buffer.is_valid() ? reflection_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); + u.ids.push_back(texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 15; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + RID t; + if (rb && render_buffers_is_sdfgi_enabled(p_render_buffers)) { + t = render_buffers_get_sdfgi_irradiance_probes(p_render_buffers); + } else { + t = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE); + } + u.ids.push_back(t); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 16; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + if (rb && render_buffers_is_sdfgi_enabled(p_render_buffers)) { + u.ids.push_back(render_buffers_get_sdfgi_occlusion_texture(p_render_buffers)); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 17; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.ids.push_back(rb ? render_buffers_get_gi_probe_buffer(p_render_buffers) : render_buffers_get_default_gi_probe_buffer()); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 18; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + RID vfog = RID(); + if (rb && render_buffers_has_volumetric_fog(p_render_buffers)) { + vfog = render_buffers_get_volumetric_fog_texture(p_render_buffers); + if (vfog.is_null()) { + vfog = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE); + } + } else { + vfog = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE); + } + u.ids.push_back(vfog); + uniforms.push_back(u); + } + } + + if (p_index >= (int)render_pass_uniform_sets.size()) { + render_pass_uniform_sets.resize(p_index + 1); + } + + if (render_pass_uniform_sets[p_index].is_valid() && RD::get_singleton()->uniform_set_is_valid(render_pass_uniform_sets[p_index])) { + RD::get_singleton()->free(render_pass_uniform_sets[p_index]); + } + + render_pass_uniform_sets[p_index] = RD::get_singleton()->uniform_set_create(uniforms, default_shader_rd, RENDER_PASS_UNIFORM_SET); + return render_pass_uniform_sets[p_index]; +} + +RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albedo_texture, RID p_emission_texture, RID p_emission_aniso_texture, RID p_geom_facing_texture) { + if (sdfgi_pass_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(sdfgi_pass_uniform_set)) { + RD::get_singleton()->free(sdfgi_pass_uniform_set); + } + + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.binding = 0; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.ids.push_back(scene_state.uniform_buffers[0]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(scene_state.instance_buffer[RENDER_LIST_SECONDARY]); + uniforms.push_back(u); + } + { + // No radiance texture. + RID radiance_texture = storage->texture_rd_get_default(is_using_radiance_cubemap_array() ? RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_ARRAY_BLACK : RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_BLACK); + RD::Uniform u; + u.binding = 2; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.ids.push_back(radiance_texture); + uniforms.push_back(u); + } + + { + // No reflection atlas. + RID ref_texture = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_ARRAY_BLACK); + RD::Uniform u; + u.binding = 3; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.ids.push_back(ref_texture); + uniforms.push_back(u); + } + + { + // No shadow atlas. + RD::Uniform u; + u.binding = 4; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + RID texture = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE); + u.ids.push_back(texture); + uniforms.push_back(u); + } + + { + // No directional shadow atlas. + RD::Uniform u; + u.binding = 5; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + RID texture = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE); + u.ids.push_back(texture); + uniforms.push_back(u); + } + + { + // No Lightmaps + RD::Uniform u; + u.binding = 6; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.ids.resize(scene_state.max_lightmaps); + RID default_tex = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE); + for (uint32_t i = 0; i < scene_state.max_lightmaps; i++) { + u.ids.write[i] = default_tex; + } + + uniforms.push_back(u); + } + + { + // No GIProbes + RD::Uniform u; + u.binding = 7; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.ids.resize(MAX_GI_PROBES); + RID default_tex = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE); + for (int i = 0; i < MAX_GI_PROBES; i++) { + u.ids.write[i] = default_tex; + } + + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.binding = 8; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + RID cb = default_vec4_xform_buffer; + u.ids.push_back(cb); + uniforms.push_back(u); + } + + // actual sdfgi stuff + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 9; + u.ids.push_back(p_albedo_texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 10; + u.ids.push_back(p_emission_texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 11; + u.ids.push_back(p_emission_aniso_texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 12; + u.ids.push_back(p_geom_facing_texture); + uniforms.push_back(u); + } + + sdfgi_pass_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, default_shader_sdfgi_rd, RENDER_PASS_UNIFORM_SET); + return sdfgi_pass_uniform_set; +} + +void RendererSceneRenderForward::_render_buffers_clear_uniform_set(RenderBufferDataForward *rb) { +} + +void RendererSceneRenderForward::_render_buffers_uniform_set_changed(RID p_render_buffers) { + RenderBufferDataForward *rb = (RenderBufferDataForward *)render_buffers_get_data(p_render_buffers); + + _render_buffers_clear_uniform_set(rb); +} + +RID RendererSceneRenderForward::_render_buffers_get_normal_texture(RID p_render_buffers) { + RenderBufferDataForward *rb = (RenderBufferDataForward *)render_buffers_get_data(p_render_buffers); + + return rb->normal_roughness_buffer; +} + +RendererSceneRenderForward *RendererSceneRenderForward::singleton = nullptr; + +void RendererSceneRenderForward::set_time(double p_time, double p_step) { + time = p_time; + RendererSceneRenderRD::set_time(p_time, p_step); +} + +void RendererSceneRenderForward::_geometry_instance_mark_dirty(GeometryInstance *p_geometry_instance) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + if (ginstance->dirty_list_element.in_list()) { + return; + } + + //clear surface caches + GeometryInstanceSurfaceDataCache *surf = ginstance->surface_caches; + + while (surf) { + GeometryInstanceSurfaceDataCache *next = surf->next; + geometry_instance_surface_alloc.free(surf); + surf = next; + } + + ginstance->surface_caches = nullptr; + + geometry_instance_dirty_list.add(&ginstance->dirty_list_element); +} + +void RendererSceneRenderForward::_geometry_instance_add_surface_with_material(GeometryInstanceForward *ginstance, uint32_t p_surface, MaterialData *p_material, uint32_t p_material_id, uint32_t p_shader_id, RID p_mesh) { + bool has_read_screen_alpha = p_material->shader_data->uses_screen_texture || p_material->shader_data->uses_depth_texture || p_material->shader_data->uses_normal_texture; + bool has_base_alpha = (p_material->shader_data->uses_alpha || has_read_screen_alpha); + bool has_blend_alpha = p_material->shader_data->uses_blend_alpha; + bool has_alpha = has_base_alpha || has_blend_alpha; + + uint32_t flags = 0; + + if (p_material->shader_data->uses_sss) { + flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_SUBSURFACE_SCATTERING; + } + + if (p_material->shader_data->uses_screen_texture) { + flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_SCREEN_TEXTURE; + } + + if (p_material->shader_data->uses_depth_texture) { + flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_DEPTH_TEXTURE; + } + + if (p_material->shader_data->uses_normal_texture) { + flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_NORMAL_TEXTURE; + } + + if (ginstance->data->cast_double_sided_shaodows) { + flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_DOUBLE_SIDED_SHADOWS; + } + + if (has_alpha || has_read_screen_alpha || p_material->shader_data->depth_draw == ShaderData::DEPTH_DRAW_DISABLED || p_material->shader_data->depth_test == ShaderData::DEPTH_TEST_DISABLED) { + //material is only meant for alpha pass + flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_ALPHA; + if (p_material->shader_data->uses_depth_pre_pass && !(p_material->shader_data->depth_draw == ShaderData::DEPTH_DRAW_DISABLED || p_material->shader_data->depth_test == ShaderData::DEPTH_TEST_DISABLED)) { + flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_DEPTH; + flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_SHADOW; + } + } else { + flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_OPAQUE; + flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_DEPTH; + flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_SHADOW; + } + + MaterialData *material_shadow = nullptr; + void *surface_shadow = nullptr; + if (!p_material->shader_data->writes_modelview_or_projection && !p_material->shader_data->uses_vertex && !p_material->shader_data->uses_discard && !p_material->shader_data->uses_depth_pre_pass) { + flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_SHARED_SHADOW_MATERIAL; + material_shadow = (MaterialData *)storage->material_get_data(default_material, RendererStorageRD::SHADER_TYPE_3D); + + RID shadow_mesh = storage->mesh_get_shadow_mesh(p_mesh); + + if (shadow_mesh.is_valid()) { + surface_shadow = storage->mesh_get_surface(shadow_mesh, p_surface); + } + + } else { + material_shadow = p_material; + } + + GeometryInstanceSurfaceDataCache *sdcache = geometry_instance_surface_alloc.alloc(); + + sdcache->flags = flags; + + sdcache->shader = p_material->shader_data; + sdcache->material_uniform_set = p_material->uniform_set; + sdcache->surface = storage->mesh_get_surface(p_mesh, p_surface); + sdcache->primitive = storage->mesh_surface_get_primitive(sdcache->surface); + sdcache->surface_index = p_surface; + + if (ginstance->data->dirty_dependencies) { + storage->base_update_dependency(p_mesh, &ginstance->data->dependency_tracker); + } + + //shadow + sdcache->shader_shadow = material_shadow->shader_data; + sdcache->material_uniform_set_shadow = material_shadow->uniform_set; + + sdcache->surface_shadow = surface_shadow ? surface_shadow : sdcache->surface; + + sdcache->owner = ginstance; + + sdcache->next = ginstance->surface_caches; + ginstance->surface_caches = sdcache; + + //sortkey + + sdcache->sort.sort_key1 = 0; + sdcache->sort.sort_key2 = 0; + + sdcache->sort.surface_index = p_surface; + sdcache->sort.material_id_low = p_material_id & 0x3FFF; + sdcache->sort.material_id_hi = p_material_id >> 14; + sdcache->sort.shader_id = p_shader_id; + sdcache->sort.geometry_id = p_mesh.get_local_index(); //only meshes can repeat anyway + sdcache->sort.uses_forward_gi = ginstance->can_sdfgi; + sdcache->sort.priority = p_material->priority; +} + +void RendererSceneRenderForward::_geometry_instance_add_surface(GeometryInstanceForward *ginstance, uint32_t p_surface, RID p_material, RID p_mesh) { + RID m_src; + + m_src = ginstance->data->material_override.is_valid() ? ginstance->data->material_override : p_material; + + MaterialData *material = nullptr; + + if (m_src.is_valid()) { + material = (MaterialData *)storage->material_get_data(m_src, RendererStorageRD::SHADER_TYPE_3D); + if (!material || !material->shader_data->valid) { + material = nullptr; + } + } + + if (material) { + if (ginstance->data->dirty_dependencies) { + storage->material_update_dependency(m_src, &ginstance->data->dependency_tracker); + } + } else { + material = (MaterialData *)storage->material_get_data(default_material, RendererStorageRD::SHADER_TYPE_3D); + m_src = default_material; + } + + ERR_FAIL_COND(!material); + + _geometry_instance_add_surface_with_material(ginstance, p_surface, material, m_src.get_local_index(), storage->material_get_shader_id(m_src), p_mesh); + + while (material->next_pass.is_valid()) { + RID next_pass = material->next_pass; + material = (MaterialData *)storage->material_get_data(next_pass, RendererStorageRD::SHADER_TYPE_3D); + if (!material || !material->shader_data->valid) { + break; + } + if (ginstance->data->dirty_dependencies) { + storage->material_update_dependency(next_pass, &ginstance->data->dependency_tracker); + } + _geometry_instance_add_surface_with_material(ginstance, p_surface, material, next_pass.get_local_index(), storage->material_get_shader_id(next_pass), p_mesh); + } +} + +void RendererSceneRenderForward::_geometry_instance_update(GeometryInstance *p_geometry_instance) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + + if (ginstance->data->dirty_dependencies) { + ginstance->data->dependency_tracker.update_begin(); + } + + //add geometry for drawing + switch (ginstance->data->base_type) { + case RS::INSTANCE_MESH: { + const RID *materials = nullptr; + uint32_t surface_count; + RID mesh = ginstance->data->base; + + materials = storage->mesh_get_surface_count_and_materials(mesh, surface_count); + if (materials) { + //if no materials, no surfaces. + const RID *inst_materials = ginstance->data->surface_materials.ptr(); + uint32_t surf_mat_count = ginstance->data->surface_materials.size(); + + for (uint32_t j = 0; j < surface_count; j++) { + RID material = (j < surf_mat_count && inst_materials[j].is_valid()) ? inst_materials[j] : materials[j]; + _geometry_instance_add_surface(ginstance, j, material, mesh); + } + } + + ginstance->instance_count = 1; + + } break; + + case RS::INSTANCE_MULTIMESH: { + RID mesh = storage->multimesh_get_mesh(ginstance->data->base); + if (mesh.is_valid()) { + const RID *materials = nullptr; + uint32_t surface_count; + + materials = storage->mesh_get_surface_count_and_materials(mesh, surface_count); + if (materials) { + for (uint32_t j = 0; j < surface_count; j++) { + _geometry_instance_add_surface(ginstance, j, materials[j], mesh); + } + } + + ginstance->instance_count = storage->multimesh_get_instances_to_draw(ginstance->data->base); + } + + } break; +#if 0 + case RS::INSTANCE_IMMEDIATE: { + RasterizerStorageGLES3::Immediate *immediate = storage->immediate_owner.getornull(inst->base); + ERR_CONTINUE(!immediate); + + _add_geometry(immediate, inst, nullptr, -1, p_depth_pass, p_shadow_pass); + + } break; +#endif + case RS::INSTANCE_PARTICLES: { + int draw_passes = storage->particles_get_draw_passes(ginstance->data->base); + + for (int j = 0; j < draw_passes; j++) { + RID mesh = storage->particles_get_draw_pass_mesh(ginstance->data->base, j); + if (!mesh.is_valid()) + continue; + + const RID *materials = nullptr; + uint32_t surface_count; + + materials = storage->mesh_get_surface_count_and_materials(mesh, surface_count); + if (materials) { + for (uint32_t k = 0; k < surface_count; k++) { + _geometry_instance_add_surface(ginstance, k, materials[k], mesh); + } + } + } + + ginstance->instance_count = storage->particles_get_amount(ginstance->data->base); + + } break; + + default: { + } + } + + //Fill push constant + + bool store_transform = true; + + if (ginstance->data->base_type == RS::INSTANCE_MULTIMESH) { + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH; + uint32_t stride; + if (storage->multimesh_get_transform_format(ginstance->data->base) == RS::MULTIMESH_TRANSFORM_2D) { + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_FORMAT_2D; + stride = 2; + } else { + stride = 3; + } + if (storage->multimesh_uses_colors(ginstance->data->base)) { + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_COLOR; + stride += 1; + } + if (storage->multimesh_uses_custom_data(ginstance->data->base)) { + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_CUSTOM_DATA; + stride += 1; + } + + ginstance->base_flags |= (stride << INSTANCE_DATA_FLAGS_MULTIMESH_STRIDE_SHIFT); + ginstance->transforms_uniform_set = storage->multimesh_get_3d_uniform_set(ginstance->data->base, default_shader_rd, TRANSFORMS_UNIFORM_SET); + + } else if (ginstance->data->base_type == RS::INSTANCE_PARTICLES) { + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH; + uint32_t stride; + if (false) { // 2D particles + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_FORMAT_2D; + stride = 2; + } else { + stride = 3; + } + + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_COLOR; + stride += 1; + + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_CUSTOM_DATA; + stride += 1; + + ginstance->base_flags |= (stride << INSTANCE_DATA_FLAGS_MULTIMESH_STRIDE_SHIFT); + + if (!storage->particles_is_using_local_coords(ginstance->data->base)) { + store_transform = false; + } + ginstance->transforms_uniform_set = storage->particles_get_instance_buffer_uniform_set(ginstance->data->base, default_shader_rd, TRANSFORMS_UNIFORM_SET); + + } else if (ginstance->data->base_type == RS::INSTANCE_MESH) { + if (storage->skeleton_is_valid(ginstance->data->skeleton)) { + ginstance->base_flags |= INSTANCE_DATA_FLAG_SKELETON; + ginstance->transforms_uniform_set = storage->skeleton_get_3d_uniform_set(ginstance->data->skeleton, default_shader_rd, TRANSFORMS_UNIFORM_SET); + if (ginstance->data->dirty_dependencies) { + storage->skeleton_update_dependency(ginstance->data->skeleton, &ginstance->data->dependency_tracker); + } + } + } + + ginstance->store_transform_cache = store_transform; + ginstance->can_sdfgi = false; + + if (!lightmap_instance_is_valid(ginstance->lightmap_instance) && !low_end) { + if (ginstance->gi_probes[0].is_null() && (ginstance->data->use_baked_light || ginstance->data->use_dynamic_gi)) { + ginstance->can_sdfgi = true; + } + } + + if (ginstance->data->dirty_dependencies) { + ginstance->data->dependency_tracker.update_end(); + ginstance->data->dirty_dependencies = false; + } + + ginstance->dirty_list_element.remove_from_list(); +} + +void RendererSceneRenderForward::_update_dirty_geometry_instances() { + while (geometry_instance_dirty_list.first()) { + _geometry_instance_update(geometry_instance_dirty_list.first()->self()); + } +} + +void RendererSceneRenderForward::_geometry_instance_dependency_changed(RendererStorage::DependencyChangedNotification p_notification, RendererStorage::DependencyTracker *p_tracker) { + switch (p_notification) { + case RendererStorage::DEPENDENCY_CHANGED_MATERIAL: + case RendererStorage::DEPENDENCY_CHANGED_MESH: + case RendererStorage::DEPENDENCY_CHANGED_MULTIMESH: + case RendererStorage::DEPENDENCY_CHANGED_SKELETON_DATA: { + static_cast<RendererSceneRenderForward *>(singleton)->_geometry_instance_mark_dirty(static_cast<GeometryInstance *>(p_tracker->userdata)); + } break; + case RendererStorage::DEPENDENCY_CHANGED_MULTIMESH_VISIBLE_INSTANCES: { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_tracker->userdata); + if (ginstance->data->base_type == RS::INSTANCE_MULTIMESH) { + ginstance->instance_count = static_cast<RendererSceneRenderForward *>(singleton)->storage->multimesh_get_instances_to_draw(ginstance->data->base); + } + } break; + default: { + //rest of notifications of no interest + } break; + } +} +void RendererSceneRenderForward::_geometry_instance_dependency_deleted(const RID &p_dependency, RendererStorage::DependencyTracker *p_tracker) { + static_cast<RendererSceneRenderForward *>(singleton)->_geometry_instance_mark_dirty(static_cast<GeometryInstance *>(p_tracker->userdata)); +} + +RendererSceneRender::GeometryInstance *RendererSceneRenderForward::geometry_instance_create(RID p_base) { + RS::InstanceType type = storage->get_base_type(p_base); + ERR_FAIL_COND_V(!((1 << type) & RS::INSTANCE_GEOMETRY_MASK), nullptr); + + GeometryInstanceForward *ginstance = geometry_instance_alloc.alloc(); + ginstance->data = memnew(GeometryInstanceForward::Data); + + ginstance->data->base = p_base; + ginstance->data->base_type = type; + ginstance->data->dependency_tracker.userdata = ginstance; + ginstance->data->dependency_tracker.changed_callback = _geometry_instance_dependency_changed; + ginstance->data->dependency_tracker.deleted_callback = _geometry_instance_dependency_deleted; + + _geometry_instance_mark_dirty(ginstance); + + return ginstance; +} +void RendererSceneRenderForward::geometry_instance_set_skeleton(GeometryInstance *p_geometry_instance, RID p_skeleton) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->data->skeleton = p_skeleton; + _geometry_instance_mark_dirty(ginstance); + ginstance->data->dirty_dependencies = true; +} +void RendererSceneRenderForward::geometry_instance_set_material_override(GeometryInstance *p_geometry_instance, RID p_override) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->data->material_override = p_override; + _geometry_instance_mark_dirty(ginstance); + ginstance->data->dirty_dependencies = true; +} +void RendererSceneRenderForward::geometry_instance_set_surface_materials(GeometryInstance *p_geometry_instance, const Vector<RID> &p_materials) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->data->surface_materials = p_materials; + _geometry_instance_mark_dirty(ginstance); + ginstance->data->dirty_dependencies = true; +} +void RendererSceneRenderForward::geometry_instance_set_mesh_instance(GeometryInstance *p_geometry_instance, RID p_mesh_instance) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->mesh_instance = p_mesh_instance; + _geometry_instance_mark_dirty(ginstance); +} +void RendererSceneRenderForward::geometry_instance_set_transform(GeometryInstance *p_geometry_instance, const Transform &p_transform, const AABB &p_aabb, const AABB &p_transformed_aabb) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->transform = p_transform; + ginstance->mirror = p_transform.basis.determinant() < 0; + ginstance->data->aabb = p_aabb; + ginstance->transformed_aabb = p_transformed_aabb; + + Vector3 model_scale_vec = p_transform.basis.get_scale_abs(); + // handle non uniform scale here + + float max_scale = MAX(model_scale_vec.x, MAX(model_scale_vec.y, model_scale_vec.z)); + float min_scale = MIN(model_scale_vec.x, MIN(model_scale_vec.y, model_scale_vec.z)); + ginstance->non_uniform_scale = max_scale >= 0.0 && (min_scale / max_scale) < 0.9; + + ginstance->lod_model_scale = max_scale; +} +void RendererSceneRenderForward::geometry_instance_set_lod_bias(GeometryInstance *p_geometry_instance, float p_lod_bias) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->lod_bias = p_lod_bias; +} +void RendererSceneRenderForward::geometry_instance_set_use_baked_light(GeometryInstance *p_geometry_instance, bool p_enable) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->data->use_baked_light = p_enable; + _geometry_instance_mark_dirty(ginstance); +} +void RendererSceneRenderForward::geometry_instance_set_use_dynamic_gi(GeometryInstance *p_geometry_instance, bool p_enable) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->data->use_dynamic_gi = p_enable; + _geometry_instance_mark_dirty(ginstance); +} +void RendererSceneRenderForward::geometry_instance_set_use_lightmap(GeometryInstance *p_geometry_instance, RID p_lightmap_instance, const Rect2 &p_lightmap_uv_scale, int p_lightmap_slice_index) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->lightmap_instance = p_lightmap_instance; + ginstance->lightmap_uv_scale = p_lightmap_uv_scale; + ginstance->lightmap_slice_index = p_lightmap_slice_index; + _geometry_instance_mark_dirty(ginstance); +} +void RendererSceneRenderForward::geometry_instance_set_lightmap_capture(GeometryInstance *p_geometry_instance, const Color *p_sh9) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + if (p_sh9) { + if (ginstance->lightmap_sh == nullptr) { + ginstance->lightmap_sh = geometry_instance_lightmap_sh.alloc(); + } + + copymem(ginstance->lightmap_sh->sh, p_sh9, sizeof(Color) * 9); + } else { + if (ginstance->lightmap_sh != nullptr) { + geometry_instance_lightmap_sh.free(ginstance->lightmap_sh); + ginstance->lightmap_sh = nullptr; + } + } + _geometry_instance_mark_dirty(ginstance); +} +void RendererSceneRenderForward::geometry_instance_set_instance_shader_parameters_offset(GeometryInstance *p_geometry_instance, int32_t p_offset) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->shader_parameters_offset = p_offset; + _geometry_instance_mark_dirty(ginstance); +} +void RendererSceneRenderForward::geometry_instance_set_cast_double_sided_shadows(GeometryInstance *p_geometry_instance, bool p_enable) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + + ginstance->data->cast_double_sided_shaodows = p_enable; + _geometry_instance_mark_dirty(ginstance); +} + +void RendererSceneRenderForward::geometry_instance_set_layer_mask(GeometryInstance *p_geometry_instance, uint32_t p_layer_mask) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + ginstance->layer_mask = p_layer_mask; +} + +void RendererSceneRenderForward::geometry_instance_free(GeometryInstance *p_geometry_instance) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + if (ginstance->lightmap_sh != nullptr) { + geometry_instance_lightmap_sh.free(ginstance->lightmap_sh); + } + GeometryInstanceSurfaceDataCache *surf = ginstance->surface_caches; + while (surf) { + GeometryInstanceSurfaceDataCache *next = surf->next; + geometry_instance_surface_alloc.free(surf); + surf = next; + } + memdelete(ginstance->data); + geometry_instance_alloc.free(ginstance); +} + +uint32_t RendererSceneRenderForward::geometry_instance_get_pair_mask() { + return (1 << RS::INSTANCE_GI_PROBE); +} +void RendererSceneRenderForward::geometry_instance_pair_light_instances(GeometryInstance *p_geometry_instance, const RID *p_light_instances, uint32_t p_light_instance_count) { +} +void RendererSceneRenderForward::geometry_instance_pair_reflection_probe_instances(GeometryInstance *p_geometry_instance, const RID *p_reflection_probe_instances, uint32_t p_reflection_probe_instance_count) { +} +void RendererSceneRenderForward::geometry_instance_pair_decal_instances(GeometryInstance *p_geometry_instance, const RID *p_decal_instances, uint32_t p_decal_instance_count) { +} + +Transform RendererSceneRenderForward::geometry_instance_get_transform(GeometryInstance *p_instance) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_instance); + ERR_FAIL_COND_V(!ginstance, Transform()); + return ginstance->transform; +} +AABB RendererSceneRenderForward::geometry_instance_get_aabb(GeometryInstance *p_instance) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_instance); + ERR_FAIL_COND_V(!ginstance, AABB()); + return ginstance->data->aabb; +} + +void RendererSceneRenderForward::geometry_instance_pair_gi_probe_instances(GeometryInstance *p_geometry_instance, const RID *p_gi_probe_instances, uint32_t p_gi_probe_instance_count) { + GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); + ERR_FAIL_COND(!ginstance); + if (p_gi_probe_instance_count > 0) { + ginstance->gi_probes[0] = p_gi_probe_instances[0]; + } else { + ginstance->gi_probes[0] = RID(); + } + + if (p_gi_probe_instance_count > 1) { + ginstance->gi_probes[1] = p_gi_probe_instances[1]; + } else { + ginstance->gi_probes[1] = RID(); + } +} + +RendererSceneRenderForward::RendererSceneRenderForward(RendererStorageRD *p_storage) : + RendererSceneRenderRD(p_storage) { + singleton = this; + low_end = is_low_end(); + storage = p_storage; + + /* SCENE SHADER */ + + { + String defines; + if (low_end) { + defines += "\n#define LOW_END_MODE \n"; + } + + defines += "\n#define MAX_ROUGHNESS_LOD " + itos(get_roughness_layers() - 1) + ".0\n"; + if (is_using_radiance_cubemap_array()) { + defines += "\n#define USE_RADIANCE_CUBEMAP_ARRAY \n"; + } + defines += "\n#define SDFGI_OCT_SIZE " + itos(sdfgi_get_lightprobe_octahedron_size()) + "\n"; + defines += "\n#define MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS " + itos(get_max_directional_lights()) + "\n"; + + { + //lightmaps + scene_state.max_lightmaps = low_end ? 2 : MAX_LIGHTMAPS; + defines += "\n#define MAX_LIGHTMAP_TEXTURES " + itos(scene_state.max_lightmaps) + "\n"; + defines += "\n#define MAX_LIGHTMAPS " + itos(scene_state.max_lightmaps) + "\n"; + + scene_state.lightmap_buffer = RD::get_singleton()->storage_buffer_create(sizeof(LightmapData) * scene_state.max_lightmaps); + } + { + //captures + scene_state.max_lightmap_captures = 2048; + scene_state.lightmap_captures = memnew_arr(LightmapCaptureData, scene_state.max_lightmap_captures); + scene_state.lightmap_capture_buffer = RD::get_singleton()->storage_buffer_create(sizeof(LightmapCaptureData) * scene_state.max_lightmap_captures); + } + { + defines += "\n#define MATERIAL_UNIFORM_SET " + itos(MATERIAL_UNIFORM_SET) + "\n"; + } + + Vector<String> shader_versions; + shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n"); + shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_DUAL_PARABOLOID\n"); + shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL_ROUGHNESS\n"); + shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL_ROUGHNESS\n#define MODE_RENDER_GIPROBE\n"); + shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_MATERIAL\n"); + shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_SDF\n"); + shader_versions.push_back(""); + shader_versions.push_back("\n#define USE_FORWARD_GI\n"); + shader_versions.push_back("\n#define MODE_MULTIPLE_RENDER_TARGETS\n"); + shader_versions.push_back("\n#define USE_LIGHTMAP\n"); + shader_versions.push_back("\n#define MODE_MULTIPLE_RENDER_TARGETS\n#define USE_LIGHTMAP\n"); + shader.scene_shader.initialize(shader_versions, defines); + + if (is_low_end()) { + //disable the high end versions + shader.scene_shader.set_variant_enabled(SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS, false); + shader.scene_shader.set_variant_enabled(SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_GIPROBE, false); + shader.scene_shader.set_variant_enabled(SHADER_VERSION_DEPTH_PASS_WITH_SDF, false); + shader.scene_shader.set_variant_enabled(SHADER_VERSION_COLOR_PASS_WITH_FORWARD_GI, false); + shader.scene_shader.set_variant_enabled(SHADER_VERSION_COLOR_PASS_WITH_SEPARATE_SPECULAR, false); + shader.scene_shader.set_variant_enabled(SHADER_VERSION_LIGHTMAP_COLOR_PASS_WITH_SEPARATE_SPECULAR, false); + } + } + + storage->shader_set_data_request_function(RendererStorageRD::SHADER_TYPE_3D, _create_shader_funcs); + storage->material_set_data_request_function(RendererStorageRD::SHADER_TYPE_3D, _create_material_funcs); + + { + //shader compiler + ShaderCompilerRD::DefaultIdentifierActions actions; + + actions.renames["WORLD_MATRIX"] = "world_matrix"; + actions.renames["WORLD_NORMAL_MATRIX"] = "world_normal_matrix"; + actions.renames["INV_CAMERA_MATRIX"] = "scene_data.inv_camera_matrix"; + actions.renames["CAMERA_MATRIX"] = "scene_data.camera_matrix"; + actions.renames["PROJECTION_MATRIX"] = "projection_matrix"; + actions.renames["INV_PROJECTION_MATRIX"] = "scene_data.inv_projection_matrix"; + actions.renames["MODELVIEW_MATRIX"] = "modelview"; + actions.renames["MODELVIEW_NORMAL_MATRIX"] = "modelview_normal"; + + actions.renames["VERTEX"] = "vertex"; + actions.renames["NORMAL"] = "normal"; + actions.renames["TANGENT"] = "tangent"; + actions.renames["BINORMAL"] = "binormal"; + actions.renames["POSITION"] = "position"; + actions.renames["UV"] = "uv_interp"; + actions.renames["UV2"] = "uv2_interp"; + actions.renames["COLOR"] = "color_interp"; + actions.renames["POINT_SIZE"] = "gl_PointSize"; + actions.renames["INSTANCE_ID"] = "gl_InstanceIndex"; + + actions.renames["ALPHA_SCISSOR_THRESHOLD"] = "alpha_scissor_threshold"; + actions.renames["ALPHA_HASH_SCALE"] = "alpha_hash_scale"; + actions.renames["ALPHA_ANTIALIASING_EDGE"] = "alpha_antialiasing_edge"; + actions.renames["ALPHA_TEXTURE_COORDINATE"] = "alpha_texture_coordinate"; + + //builtins + + actions.renames["TIME"] = "scene_data.time"; + actions.renames["VIEWPORT_SIZE"] = "scene_data.viewport_size"; + + actions.renames["FRAGCOORD"] = "gl_FragCoord"; + actions.renames["FRONT_FACING"] = "gl_FrontFacing"; + actions.renames["NORMAL_MAP"] = "normal_map"; + actions.renames["NORMAL_MAP_DEPTH"] = "normal_map_depth"; + actions.renames["ALBEDO"] = "albedo"; + actions.renames["ALPHA"] = "alpha"; + actions.renames["METALLIC"] = "metallic"; + actions.renames["SPECULAR"] = "specular"; + actions.renames["ROUGHNESS"] = "roughness"; + actions.renames["RIM"] = "rim"; + actions.renames["RIM_TINT"] = "rim_tint"; + actions.renames["CLEARCOAT"] = "clearcoat"; + actions.renames["CLEARCOAT_GLOSS"] = "clearcoat_gloss"; + actions.renames["ANISOTROPY"] = "anisotropy"; + actions.renames["ANISOTROPY_FLOW"] = "anisotropy_flow"; + actions.renames["SSS_STRENGTH"] = "sss_strength"; + actions.renames["SSS_TRANSMITTANCE_COLOR"] = "transmittance_color"; + actions.renames["SSS_TRANSMITTANCE_DEPTH"] = "transmittance_depth"; + actions.renames["SSS_TRANSMITTANCE_CURVE"] = "transmittance_curve"; + actions.renames["SSS_TRANSMITTANCE_BOOST"] = "transmittance_boost"; + actions.renames["BACKLIGHT"] = "backlight"; + actions.renames["AO"] = "ao"; + actions.renames["AO_LIGHT_AFFECT"] = "ao_light_affect"; + actions.renames["EMISSION"] = "emission"; + actions.renames["POINT_COORD"] = "gl_PointCoord"; + actions.renames["INSTANCE_CUSTOM"] = "instance_custom"; + actions.renames["SCREEN_UV"] = "screen_uv"; + actions.renames["SCREEN_TEXTURE"] = "color_buffer"; + actions.renames["DEPTH_TEXTURE"] = "depth_buffer"; + actions.renames["NORMAL_ROUGHNESS_TEXTURE"] = "normal_roughness_buffer"; + actions.renames["DEPTH"] = "gl_FragDepth"; + actions.renames["OUTPUT_IS_SRGB"] = "true"; + actions.renames["FOG"] = "custom_fog"; + actions.renames["RADIANCE"] = "custom_radiance"; + actions.renames["IRRADIANCE"] = "custom_irradiance"; + actions.renames["BONE_INDICES"] = "bone_attrib"; + actions.renames["BONE_WEIGHTS"] = "weight_attrib"; + actions.renames["CUSTOM0"] = "custom0_attrib"; + actions.renames["CUSTOM1"] = "custom1_attrib"; + actions.renames["CUSTOM2"] = "custom2_attrib"; + actions.renames["CUSTOM3"] = "custom3_attrib"; + + //for light + actions.renames["VIEW"] = "view"; + actions.renames["LIGHT_COLOR"] = "light_color"; + actions.renames["LIGHT"] = "light"; + actions.renames["ATTENUATION"] = "attenuation"; + actions.renames["SHADOW_ATTENUATION"] = "shadow_attenuation"; + actions.renames["DIFFUSE_LIGHT"] = "diffuse_light"; + actions.renames["SPECULAR_LIGHT"] = "specular_light"; + + actions.usage_defines["NORMAL"] = "#define NORMAL_USED\n"; + actions.usage_defines["TANGENT"] = "#define TANGENT_USED\n"; + actions.usage_defines["BINORMAL"] = "@TANGENT"; + actions.usage_defines["RIM"] = "#define LIGHT_RIM_USED\n"; + actions.usage_defines["RIM_TINT"] = "@RIM"; + actions.usage_defines["CLEARCOAT"] = "#define LIGHT_CLEARCOAT_USED\n"; + actions.usage_defines["CLEARCOAT_GLOSS"] = "@CLEARCOAT"; + actions.usage_defines["ANISOTROPY"] = "#define LIGHT_ANISOTROPY_USED\n"; + actions.usage_defines["ANISOTROPY_FLOW"] = "@ANISOTROPY"; + actions.usage_defines["AO"] = "#define AO_USED\n"; + actions.usage_defines["AO_LIGHT_AFFECT"] = "#define AO_USED\n"; + actions.usage_defines["UV"] = "#define UV_USED\n"; + actions.usage_defines["UV2"] = "#define UV2_USED\n"; + actions.usage_defines["BONE_INDICES"] = "#define BONES_USED\n"; + actions.usage_defines["BONE_WEIGHTS"] = "#define WEIGHTS_USED\n"; + actions.usage_defines["CUSTOM0"] = "#define CUSTOM0\n"; + actions.usage_defines["CUSTOM1"] = "#define CUSTOM1\n"; + actions.usage_defines["CUSTOM2"] = "#define CUSTOM2\n"; + actions.usage_defines["CUSTOM3"] = "#define CUSTOM3\n"; + actions.usage_defines["NORMAL_MAP"] = "#define NORMAL_MAP_USED\n"; + actions.usage_defines["NORMAL_MAP_DEPTH"] = "@NORMAL_MAP"; + actions.usage_defines["COLOR"] = "#define COLOR_USED\n"; + actions.usage_defines["INSTANCE_CUSTOM"] = "#define ENABLE_INSTANCE_CUSTOM\n"; + actions.usage_defines["POSITION"] = "#define OVERRIDE_POSITION\n"; + + actions.usage_defines["ALPHA_SCISSOR_THRESHOLD"] = "#define ALPHA_SCISSOR_USED\n"; + actions.usage_defines["ALPHA_HASH_SCALE"] = "#define ALPHA_HASH_USED\n"; + actions.usage_defines["ALPHA_ANTIALIASING_EDGE"] = "#define ALPHA_ANTIALIASING_EDGE_USED\n"; + actions.usage_defines["ALPHA_TEXTURE_COORDINATE"] = "@ALPHA_ANTIALIASING_EDGE"; + + actions.usage_defines["SSS_STRENGTH"] = "#define ENABLE_SSS\n"; + actions.usage_defines["SSS_TRANSMITTANCE_DEPTH"] = "#define ENABLE_TRANSMITTANCE\n"; + actions.usage_defines["BACKLIGHT"] = "#define LIGHT_BACKLIGHT_USED\n"; + actions.usage_defines["SCREEN_TEXTURE"] = "#define SCREEN_TEXTURE_USED\n"; + actions.usage_defines["SCREEN_UV"] = "#define SCREEN_UV_USED\n"; + + actions.usage_defines["DIFFUSE_LIGHT"] = "#define USE_LIGHT_SHADER_CODE\n"; + actions.usage_defines["SPECULAR_LIGHT"] = "#define USE_LIGHT_SHADER_CODE\n"; + + actions.usage_defines["FOG"] = "#define CUSTOM_FOG_USED\n"; + actions.usage_defines["RADIANCE"] = "#define CUSTOM_RADIANCE_USED\n"; + actions.usage_defines["IRRADIANCE"] = "#define CUSTOM_IRRADIANCE_USED\n"; + + actions.render_mode_defines["skip_vertex_transform"] = "#define SKIP_TRANSFORM_USED\n"; + actions.render_mode_defines["world_vertex_coords"] = "#define VERTEX_WORLD_COORDS_USED\n"; + actions.render_mode_defines["ensure_correct_normals"] = "#define ENSURE_CORRECT_NORMALS\n"; + actions.render_mode_defines["cull_front"] = "#define DO_SIDE_CHECK\n"; + actions.render_mode_defines["cull_disabled"] = "#define DO_SIDE_CHECK\n"; + + bool force_lambert = GLOBAL_GET("rendering/shading/overrides/force_lambert_over_burley"); + + if (!force_lambert) { + actions.render_mode_defines["diffuse_burley"] = "#define DIFFUSE_BURLEY\n"; + } + + actions.render_mode_defines["diffuse_oren_nayar"] = "#define DIFFUSE_OREN_NAYAR\n"; + actions.render_mode_defines["diffuse_lambert_wrap"] = "#define DIFFUSE_LAMBERT_WRAP\n"; + actions.render_mode_defines["diffuse_toon"] = "#define DIFFUSE_TOON\n"; + + actions.render_mode_defines["sss_mode_skin"] = "#define SSS_MODE_SKIN\n"; + + bool force_blinn = GLOBAL_GET("rendering/shading/overrides/force_blinn_over_ggx"); + + if (!force_blinn) { + actions.render_mode_defines["specular_schlick_ggx"] = "#define SPECULAR_SCHLICK_GGX\n"; + } else { + actions.render_mode_defines["specular_schlick_ggx"] = "#define SPECULAR_BLINN\n"; + } + + actions.render_mode_defines["specular_blinn"] = "#define SPECULAR_BLINN\n"; + actions.render_mode_defines["specular_phong"] = "#define SPECULAR_PHONG\n"; + actions.render_mode_defines["specular_toon"] = "#define SPECULAR_TOON\n"; + actions.render_mode_defines["specular_disabled"] = "#define SPECULAR_DISABLED\n"; + actions.render_mode_defines["shadows_disabled"] = "#define SHADOWS_DISABLED\n"; + actions.render_mode_defines["ambient_light_disabled"] = "#define AMBIENT_LIGHT_DISABLED\n"; + actions.render_mode_defines["shadow_to_opacity"] = "#define USE_SHADOW_TO_OPACITY\n"; + actions.render_mode_defines["unshaded"] = "#define MODE_UNSHADED\n"; + + actions.sampler_array_name = "material_samplers"; + actions.base_texture_binding_index = 1; + actions.texture_layout_set = MATERIAL_UNIFORM_SET; + actions.base_uniform_string = "material."; + actions.base_varying_index = 10; + + actions.default_filter = ShaderLanguage::FILTER_LINEAR_MIPMAP; + actions.default_repeat = ShaderLanguage::REPEAT_ENABLE; + actions.global_buffer_array_variable = "global_variables.data"; + actions.instance_uniform_index_variable = "draw_call.instance_uniforms_ofs"; + + shader.compiler.initialize(actions); + } + + { + //default material and shader + default_shader = storage->shader_allocate(); + storage->shader_initialize(default_shader); + storage->shader_set_code(default_shader, "shader_type spatial; void vertex() { ROUGHNESS = 0.8; } void fragment() { ALBEDO=vec3(0.6); ROUGHNESS=0.8; METALLIC=0.2; } \n"); + default_material = storage->material_allocate(); + storage->material_initialize(default_material); + storage->material_set_shader(default_material, default_shader); + + MaterialData *md = (MaterialData *)storage->material_get_data(default_material, RendererStorageRD::SHADER_TYPE_3D); + default_shader_rd = shader.scene_shader.version_get_shader(md->shader_data->version, SHADER_VERSION_COLOR_PASS); + if (!low_end) { + default_shader_sdfgi_rd = shader.scene_shader.version_get_shader(md->shader_data->version, SHADER_VERSION_DEPTH_PASS_WITH_SDF); + } + } + + { + overdraw_material_shader = storage->shader_allocate(); + storage->shader_initialize(overdraw_material_shader); + storage->shader_set_code(overdraw_material_shader, "shader_type spatial;\nrender_mode blend_add,unshaded;\n void fragment() { ALBEDO=vec3(0.4,0.8,0.8); ALPHA=0.2; }"); + overdraw_material = storage->material_allocate(); + storage->material_initialize(overdraw_material); + storage->material_set_shader(overdraw_material, overdraw_material_shader); + + wireframe_material_shader = storage->shader_allocate(); + storage->shader_initialize(wireframe_material_shader); + storage->shader_set_code(wireframe_material_shader, "shader_type spatial;\nrender_mode wireframe,unshaded;\n void fragment() { ALBEDO=vec3(0.0,0.0,0.0); }"); + wireframe_material = storage->material_allocate(); + storage->material_initialize(wireframe_material); + storage->material_set_shader(wireframe_material, wireframe_material_shader); + } + + { + default_vec4_xform_buffer = RD::get_singleton()->storage_buffer_create(256); + Vector<RD::Uniform> uniforms; + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(default_vec4_xform_buffer); + u.binding = 0; + uniforms.push_back(u); + + default_vec4_xform_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, default_shader_rd, TRANSFORMS_UNIFORM_SET); + } + { + RD::SamplerState sampler; + sampler.mag_filter = RD::SAMPLER_FILTER_LINEAR; + sampler.min_filter = RD::SAMPLER_FILTER_LINEAR; + sampler.enable_compare = true; + sampler.compare_op = RD::COMPARE_OP_LESS; + shadow_sampler = RD::get_singleton()->sampler_create(sampler); + } + + render_list_thread_threshold = GLOBAL_GET("rendering/limits/forward_renderer/threaded_render_minimum_instances"); +} + +RendererSceneRenderForward::~RendererSceneRenderForward() { + directional_shadow_atlas_set_size(0); + + //clear base uniform set if still valid + for (uint32_t i = 0; i < render_pass_uniform_sets.size(); i++) { + if (render_pass_uniform_sets[i].is_valid() && RD::get_singleton()->uniform_set_is_valid(render_pass_uniform_sets[i])) { + RD::get_singleton()->free(render_pass_uniform_sets[i]); + } + } + + if (sdfgi_pass_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(sdfgi_pass_uniform_set)) { + RD::get_singleton()->free(sdfgi_pass_uniform_set); + } + + RD::get_singleton()->free(default_vec4_xform_buffer); + RD::get_singleton()->free(shadow_sampler); + + storage->free(wireframe_material_shader); + storage->free(overdraw_material_shader); + storage->free(default_shader); + + storage->free(wireframe_material); + storage->free(overdraw_material); + storage->free(default_material); + + { + for (uint32_t i = 0; i < scene_state.uniform_buffers.size(); i++) { + RD::get_singleton()->free(scene_state.uniform_buffers[i]); + } + RD::get_singleton()->free(scene_state.lightmap_buffer); + RD::get_singleton()->free(scene_state.lightmap_capture_buffer); + for (uint32_t i = 0; i < RENDER_LIST_MAX; i++) { + if (scene_state.instance_buffer[i] != RID()) { + RD::get_singleton()->free(scene_state.instance_buffer[i]); + } + } + memdelete_arr(scene_state.lightmap_captures); + } + + while (sdfgi_framebuffer_size_cache.front()) { + RD::get_singleton()->free(sdfgi_framebuffer_size_cache.front()->get()); + sdfgi_framebuffer_size_cache.erase(sdfgi_framebuffer_size_cache.front()); + } +} diff --git a/servers/rendering/renderer_rd/renderer_scene_render_forward.h b/servers/rendering/renderer_rd/renderer_scene_render_forward.h new file mode 100644 index 0000000000..af78c50fda --- /dev/null +++ b/servers/rendering/renderer_rd/renderer_scene_render_forward.h @@ -0,0 +1,771 @@ +/*************************************************************************/ +/* renderer_scene_render_forward.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef RENDERING_SERVER_SCENE_RENDER_FORWARD_H +#define RENDERING_SERVER_SCENE_RENDER_FORWARD_H + +#include "core/templates/paged_allocator.h" +#include "servers/rendering/renderer_rd/pipeline_cache_rd.h" +#include "servers/rendering/renderer_rd/renderer_scene_render_rd.h" +#include "servers/rendering/renderer_rd/renderer_storage_rd.h" +#include "servers/rendering/renderer_rd/shaders/scene_forward.glsl.gen.h" + +class RendererSceneRenderForward : public RendererSceneRenderRD { + enum { + SCENE_UNIFORM_SET = 0, + RENDER_PASS_UNIFORM_SET = 1, + TRANSFORMS_UNIFORM_SET = 2, + MATERIAL_UNIFORM_SET = 3 + }; + + enum { + SDFGI_MAX_CASCADES = 8, + MAX_GI_PROBES = 8, + MAX_LIGHTMAPS = 8, + MAX_GI_PROBES_PER_INSTANCE = 2, + INSTANCE_DATA_BUFFER_MIN_SIZE = 4096 + }; + + enum RenderListType { + RENDER_LIST_OPAQUE, //used for opaque objects + RENDER_LIST_ALPHA, //used for transparent objects + RENDER_LIST_SECONDARY, //used for shadows and other objects + RENDER_LIST_MAX + + }; + + /* Scene Shader */ + + enum ShaderVersion { + SHADER_VERSION_DEPTH_PASS, + SHADER_VERSION_DEPTH_PASS_DP, + SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS, + SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_GIPROBE, + SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL, + SHADER_VERSION_DEPTH_PASS_WITH_SDF, + SHADER_VERSION_COLOR_PASS, + SHADER_VERSION_COLOR_PASS_WITH_FORWARD_GI, + SHADER_VERSION_COLOR_PASS_WITH_SEPARATE_SPECULAR, + SHADER_VERSION_LIGHTMAP_COLOR_PASS, + SHADER_VERSION_LIGHTMAP_COLOR_PASS_WITH_SEPARATE_SPECULAR, + SHADER_VERSION_MAX + }; + + struct { + SceneForwardShaderRD scene_shader; + ShaderCompilerRD compiler; + } shader; + + RendererStorageRD *storage; + + /* Material */ + + struct ShaderData : public RendererStorageRD::ShaderData { + enum BlendMode { //used internally + BLEND_MODE_MIX, + BLEND_MODE_ADD, + BLEND_MODE_SUB, + BLEND_MODE_MUL, + BLEND_MODE_ALPHA_TO_COVERAGE + }; + + enum DepthDraw { + DEPTH_DRAW_DISABLED, + DEPTH_DRAW_OPAQUE, + DEPTH_DRAW_ALWAYS + }; + + enum DepthTest { + DEPTH_TEST_DISABLED, + DEPTH_TEST_ENABLED + }; + + enum Cull { + CULL_DISABLED, + CULL_FRONT, + CULL_BACK + }; + + enum CullVariant { + CULL_VARIANT_NORMAL, + CULL_VARIANT_REVERSED, + CULL_VARIANT_DOUBLE_SIDED, + CULL_VARIANT_MAX + + }; + + enum AlphaAntiAliasing { + ALPHA_ANTIALIASING_OFF, + ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE, + ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE_AND_TO_ONE + }; + + bool valid; + RID version; + uint32_t vertex_input_mask; + PipelineCacheRD pipelines[CULL_VARIANT_MAX][RS::PRIMITIVE_MAX][SHADER_VERSION_MAX]; + + String path; + + Map<StringName, ShaderLanguage::ShaderNode::Uniform> uniforms; + Vector<ShaderCompilerRD::GeneratedCode::Texture> texture_uniforms; + + Vector<uint32_t> ubo_offsets; + uint32_t ubo_size; + + String code; + Map<StringName, RID> default_texture_params; + + DepthDraw depth_draw; + DepthTest depth_test; + + bool uses_point_size; + bool uses_alpha; + bool uses_blend_alpha; + bool uses_alpha_clip; + bool uses_depth_pre_pass; + bool uses_discard; + bool uses_roughness; + bool uses_normal; + + bool unshaded; + bool uses_vertex; + bool uses_sss; + bool uses_transmittance; + bool uses_screen_texture; + bool uses_depth_texture; + bool uses_normal_texture; + bool uses_time; + bool writes_modelview_or_projection; + bool uses_world_coordinates; + + uint64_t last_pass = 0; + uint32_t index = 0; + + virtual void set_code(const String &p_Code); + virtual void set_default_texture_param(const StringName &p_name, RID p_texture); + virtual void get_param_list(List<PropertyInfo> *p_param_list) const; + void get_instance_param_list(List<RendererStorage::InstanceShaderParam> *p_param_list) const; + + virtual bool is_param_texture(const StringName &p_param) const; + virtual bool is_animated() const; + virtual bool casts_shadows() const; + virtual Variant get_default_parameter(const StringName &p_parameter) const; + virtual RS::ShaderNativeSourceCode get_native_source_code() const; + + ShaderData(); + virtual ~ShaderData(); + }; + + RendererStorageRD::ShaderData *_create_shader_func(); + static RendererStorageRD::ShaderData *_create_shader_funcs() { + return static_cast<RendererSceneRenderForward *>(singleton)->_create_shader_func(); + } + + struct MaterialData : public RendererStorageRD::MaterialData { + uint64_t last_frame; + ShaderData *shader_data; + RID uniform_buffer; + RID uniform_set; + Vector<RID> texture_cache; + Vector<uint8_t> ubo_data; + uint64_t last_pass = 0; + uint32_t index = 0; + RID next_pass; + uint8_t priority; + virtual void set_render_priority(int p_priority); + virtual void set_next_pass(RID p_pass); + virtual void update_parameters(const Map<StringName, Variant> &p_parameters, bool p_uniform_dirty, bool p_textures_dirty); + virtual ~MaterialData(); + }; + + RendererStorageRD::MaterialData *_create_material_func(ShaderData *p_shader); + static RendererStorageRD::MaterialData *_create_material_funcs(RendererStorageRD::ShaderData *p_shader) { + return static_cast<RendererSceneRenderForward *>(singleton)->_create_material_func(static_cast<ShaderData *>(p_shader)); + } + + /* Framebuffer */ + + struct RenderBufferDataForward : public RenderBufferData { + //for rendering, may be MSAAd + + RID color; + RID depth; + RID specular; + RID normal_roughness_buffer; + RID giprobe_buffer; + + RS::ViewportMSAA msaa; + RD::TextureSamples texture_samples; + + RID color_msaa; + RID depth_msaa; + RID specular_msaa; + RID normal_roughness_buffer_msaa; + RID roughness_buffer_msaa; + RID giprobe_buffer_msaa; + + RID depth_fb; + RID depth_normal_roughness_fb; + RID depth_normal_roughness_giprobe_fb; + RID color_fb; + RID color_specular_fb; + RID specular_only_fb; + int width, height; + + RID render_sdfgi_uniform_set; + void ensure_specular(); + void ensure_giprobe(); + void clear(); + virtual void configure(RID p_color_buffer, RID p_depth_buffer, int p_width, int p_height, RS::ViewportMSAA p_msaa); + + ~RenderBufferDataForward(); + }; + + virtual RenderBufferData *_create_render_buffer_data(); + void _allocate_normal_roughness_texture(RenderBufferDataForward *rb); + + RID shadow_sampler; + RID render_base_uniform_set; + LocalVector<RID> render_pass_uniform_sets; + RID sdfgi_pass_uniform_set; + + uint64_t lightmap_texture_array_version = 0xFFFFFFFF; + + virtual void _base_uniforms_changed(); + void _render_buffers_clear_uniform_set(RenderBufferDataForward *rb); + virtual void _render_buffers_uniform_set_changed(RID p_render_buffers); + virtual RID _render_buffers_get_normal_texture(RID p_render_buffers); + + void _update_render_base_uniform_set(); + RID _setup_sdfgi_render_pass_uniform_set(RID p_albedo_texture, RID p_emission_texture, RID p_emission_aniso_texture, RID p_geom_facing_texture); + RID _setup_render_pass_uniform_set(RenderListType p_render_list, RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, bool p_use_directional_shadow_atlas = false, int p_index = 0); + + enum PassMode { + PASS_MODE_COLOR, + PASS_MODE_COLOR_SPECULAR, + PASS_MODE_COLOR_TRANSPARENT, + PASS_MODE_SHADOW, + PASS_MODE_SHADOW_DP, + PASS_MODE_DEPTH, + PASS_MODE_DEPTH_NORMAL_ROUGHNESS, + PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE, + PASS_MODE_DEPTH_MATERIAL, + PASS_MODE_SDF, + }; + + struct GeometryInstanceSurfaceDataCache; + struct RenderElementInfo; + + struct RenderListParameters { + GeometryInstanceSurfaceDataCache **elements = nullptr; + RenderElementInfo *element_info = nullptr; + int element_count = 0; + bool reverse_cull = false; + PassMode pass_mode = PASS_MODE_COLOR; + bool no_gi = false; + RID render_pass_uniform_set; + bool force_wireframe = false; + Vector2 uv_offset; + Plane lod_plane; + float lod_distance_multiplier = 0.0; + float screen_lod_threshold = 0.0; + RD::FramebufferFormatID framebuffer_format = 0; + uint32_t element_offset = 0; + uint32_t barrier = RD::BARRIER_MASK_ALL; + + RenderListParameters(GeometryInstanceSurfaceDataCache **p_elements, RenderElementInfo *p_element_info, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), const Plane &p_lod_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, uint32_t p_element_offset = 0, uint32_t p_barrier = RD::BARRIER_MASK_ALL) { + elements = p_elements; + element_info = p_element_info; + element_count = p_element_count; + reverse_cull = p_reverse_cull; + pass_mode = p_pass_mode; + no_gi = p_no_gi; + render_pass_uniform_set = p_render_pass_uniform_set; + force_wireframe = p_force_wireframe; + uv_offset = p_uv_offset; + lod_plane = p_lod_plane; + lod_distance_multiplier = p_lod_distance_multiplier; + screen_lod_threshold = p_screen_lod_threshold; + element_offset = p_element_offset; + barrier = p_barrier; + } + }; + + struct LightmapData { + float normal_xform[12]; + }; + + struct LightmapCaptureData { + float sh[9 * 4]; + }; + + enum { + INSTANCE_DATA_FLAG_USE_GI_BUFFERS = 1 << 6, + INSTANCE_DATA_FLAG_USE_SDFGI = 1 << 7, + INSTANCE_DATA_FLAG_USE_LIGHTMAP_CAPTURE = 1 << 8, + INSTANCE_DATA_FLAG_USE_LIGHTMAP = 1 << 9, + INSTANCE_DATA_FLAG_USE_SH_LIGHTMAP = 1 << 10, + INSTANCE_DATA_FLAG_USE_GIPROBE = 1 << 11, + INSTANCE_DATA_FLAG_MULTIMESH = 1 << 12, + INSTANCE_DATA_FLAG_MULTIMESH_FORMAT_2D = 1 << 13, + INSTANCE_DATA_FLAG_MULTIMESH_HAS_COLOR = 1 << 14, + INSTANCE_DATA_FLAG_MULTIMESH_HAS_CUSTOM_DATA = 1 << 15, + INSTANCE_DATA_FLAGS_MULTIMESH_STRIDE_SHIFT = 16, + INSTANCE_DATA_FLAGS_MULTIMESH_STRIDE_MASK = 0x7, + INSTANCE_DATA_FLAG_SKELETON = 1 << 19, + }; + + struct SceneState { + struct UBO { + float projection_matrix[16]; + float inv_projection_matrix[16]; + + float camera_matrix[16]; + float inv_camera_matrix[16]; + + float viewport_size[2]; + float screen_pixel_size[2]; + + uint32_t cluster_shift; + uint32_t cluster_width; + uint32_t cluster_type_size; + uint32_t max_cluster_element_count_div_32; + + float directional_penumbra_shadow_kernel[128]; //32 vec4s + float directional_soft_shadow_kernel[128]; + float penumbra_shadow_kernel[128]; + float soft_shadow_kernel[128]; + + uint32_t directional_penumbra_shadow_samples; + uint32_t directional_soft_shadow_samples; + uint32_t penumbra_shadow_samples; + uint32_t soft_shadow_samples; + + float ambient_light_color_energy[4]; + + float ambient_color_sky_mix; + uint32_t use_ambient_light; + uint32_t use_ambient_cubemap; + uint32_t use_reflection_cubemap; + + float radiance_inverse_xform[12]; + + float shadow_atlas_pixel_size[2]; + float directional_shadow_pixel_size[2]; + + uint32_t directional_light_count; + float dual_paraboloid_side; + float z_far; + float z_near; + + uint32_t ssao_enabled; + float ssao_light_affect; + float ssao_ao_affect; + uint32_t roughness_limiter_enabled; + + float roughness_limiter_amount; + float roughness_limiter_limit; + uint32_t roughness_limiter_pad[2]; + + float ao_color[4]; + + float sdf_to_bounds[16]; + + int32_t sdf_offset[3]; + uint32_t material_uv2_mode; + + int32_t sdf_size[3]; + uint32_t gi_upscale_for_msaa; + + uint32_t volumetric_fog_enabled; + float volumetric_fog_inv_length; + float volumetric_fog_detail_spread; + uint32_t volumetric_fog_pad; + + // Fog + uint32_t fog_enabled; + float fog_density; + float fog_height; + float fog_height_density; + + float fog_light_color[3]; + float fog_sun_scatter; + + float fog_aerial_perspective; + + float time; + float reflection_multiplier; + + uint32_t pancake_shadows; + }; + + struct PushConstant { + uint32_t base_index; // + uint32_t uv_offset; //packed + uint32_t pad[2]; + }; + + struct InstanceData { + float transform[16]; + uint32_t flags; + uint32_t instance_uniforms_ofs; //base offset in global buffer for instance variables + uint32_t gi_offset; //GI information when using lightmapping (VCT or lightmap index) + uint32_t layer_mask; + float lightmap_uv_scale[4]; + }; + + UBO ubo; + + LocalVector<RID> uniform_buffers; + + LightmapData lightmaps[MAX_LIGHTMAPS]; + RID lightmap_ids[MAX_LIGHTMAPS]; + bool lightmap_has_sh[MAX_LIGHTMAPS]; + uint32_t lightmaps_used = 0; + uint32_t max_lightmaps; + RID lightmap_buffer; + + RID instance_buffer[RENDER_LIST_MAX]; + uint32_t instance_buffer_size[RENDER_LIST_MAX] = { 0, 0, 0 }; + LocalVector<InstanceData> instance_data[RENDER_LIST_MAX]; + + LightmapCaptureData *lightmap_captures; + uint32_t max_lightmap_captures; + RID lightmap_capture_buffer; + + RID giprobe_ids[MAX_GI_PROBES]; + uint32_t giprobes_used = 0; + + bool used_screen_texture = false; + bool used_normal_texture = false; + bool used_depth_texture = false; + bool used_sss = false; + + struct ShadowPass { + uint32_t element_from; + uint32_t element_count; + bool flip_cull; + PassMode pass_mode; + + RID rp_uniform_set; + Plane camera_plane; + float lod_distance_multiplier; + float screen_lod_threshold; + + RID framebuffer; + RD::InitialAction initial_depth_action; + RD::FinalAction final_depth_action; + Rect2i rect; + }; + + LocalVector<ShadowPass> shadow_passes; + + } scene_state; + + static RendererSceneRenderForward *singleton; + + double time; + RID default_shader; + RID default_material; + RID overdraw_material_shader; + RID overdraw_material; + RID wireframe_material_shader; + RID wireframe_material; + RID default_shader_rd; + RID default_shader_sdfgi_rd; + + RID default_vec4_xform_buffer; + RID default_vec4_xform_uniform_set; + + void _setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false, int p_index = 0); + void _setup_giprobes(const PagedArray<RID> &p_giprobes); + void _setup_lightmaps(const PagedArray<RID> &p_lightmaps, const Transform &p_cam_transform); + + struct RenderElementInfo { + uint32_t repeat : 22; + uint32_t uses_lightmap : 1; + uint32_t uses_forward_gi : 1; + uint32_t lod_index : 8; + }; + + template <PassMode p_pass_mode> + _FORCE_INLINE_ void _render_list_template(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element); + + void _render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element); + + LocalVector<RD::DrawListID> thread_draw_lists; + void _render_list_thread_function(uint32_t p_thread, RenderListParameters *p_params); + void _render_list_with_threads(RenderListParameters *p_params, RID p_framebuffer, RD::InitialAction p_initial_color_action, RD::FinalAction p_final_color_action, RD::InitialAction p_initial_depth_action, RD::FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values = Vector<Color>(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2(), const Vector<RID> &p_storage_textures = Vector<RID>()); + + uint32_t render_list_thread_threshold = 500; + + void _update_instance_data_buffer(RenderListType p_render_list); + void _fill_instance_data(RenderListType p_render_list, uint32_t p_offset = 0, int32_t p_max_elements = -1, bool p_update_buffer = true); + void _fill_render_list(RenderListType p_render_list, const PagedArray<GeometryInstance *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi = false, bool p_using_opaque_gi = false, const Plane &p_lod_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, bool p_append = false); + + Map<Size2i, RID> sdfgi_framebuffer_size_cache; + + struct GeometryInstanceData; + struct GeometryInstanceForward; + + struct GeometryInstanceLightmapSH { + Color sh[9]; + }; + + // Cached data for drawing surfaces + struct GeometryInstanceSurfaceDataCache { + enum { + FLAG_PASS_DEPTH = 1, + FLAG_PASS_OPAQUE = 2, + FLAG_PASS_ALPHA = 4, + FLAG_PASS_SHADOW = 8, + FLAG_USES_SHARED_SHADOW_MATERIAL = 128, + FLAG_USES_SUBSURFACE_SCATTERING = 2048, + FLAG_USES_SCREEN_TEXTURE = 4096, + FLAG_USES_DEPTH_TEXTURE = 8192, + FLAG_USES_NORMAL_TEXTURE = 16384, + FLAG_USES_DOUBLE_SIDED_SHADOWS = 32768, + }; + + union { + struct { + uint64_t lod_index : 8; + uint64_t surface_index : 10; + uint64_t geometry_id : 32; + uint64_t material_id_low : 14; + + uint64_t material_id_hi : 18; + uint64_t shader_id : 32; + uint64_t uses_forward_gi : 1; + uint64_t uses_lightmap : 1; + uint64_t depth_layer : 4; + uint64_t priority : 8; + }; + struct { + uint64_t sort_key1; + uint64_t sort_key2; + }; + } sort; + + RS::PrimitiveType primitive = RS::PRIMITIVE_MAX; + uint32_t flags = 0; + uint32_t surface_index = 0; + + void *surface = nullptr; + RID material_uniform_set; + ShaderData *shader = nullptr; + + void *surface_shadow = nullptr; + RID material_uniform_set_shadow; + ShaderData *shader_shadow = nullptr; + + GeometryInstanceSurfaceDataCache *next = nullptr; + GeometryInstanceForward *owner = nullptr; + }; + + struct GeometryInstanceForward : public GeometryInstance { + //used during rendering + bool mirror = false; + bool non_uniform_scale = false; + float lod_bias = 0.0; + float lod_model_scale = 1.0; + AABB transformed_aabb; //needed for LOD + float depth = 0; + uint32_t gi_offset_cache = 0; + uint32_t flags_cache = 0; + bool store_transform_cache = true; + int32_t shader_parameters_offset = -1; + uint32_t lightmap_slice_index; + Rect2 lightmap_uv_scale; + uint32_t layer_mask = 1; + RID transforms_uniform_set; + uint32_t instance_count = 0; + RID mesh_instance; + bool can_sdfgi = false; + //used during setup + uint32_t base_flags = 0; + Transform transform; + RID gi_probes[MAX_GI_PROBES_PER_INSTANCE]; + RID lightmap_instance; + GeometryInstanceLightmapSH *lightmap_sh = nullptr; + GeometryInstanceSurfaceDataCache *surface_caches = nullptr; + SelfList<GeometryInstanceForward> dirty_list_element; + + struct Data { + //data used less often goes into regular heap + RID base; + RS::InstanceType base_type; + + RID skeleton; + Vector<RID> surface_materials; + RID material_override; + AABB aabb; + + bool use_dynamic_gi = false; + bool use_baked_light = false; + bool cast_double_sided_shaodows = false; + bool mirror = false; + bool dirty_dependencies = false; + + RendererStorage::DependencyTracker dependency_tracker; + }; + + Data *data = nullptr; + + GeometryInstanceForward() : + dirty_list_element(this) {} + }; + + static void _geometry_instance_dependency_changed(RendererStorage::DependencyChangedNotification p_notification, RendererStorage::DependencyTracker *p_tracker); + static void _geometry_instance_dependency_deleted(const RID &p_dependency, RendererStorage::DependencyTracker *p_tracker); + + SelfList<GeometryInstanceForward>::List geometry_instance_dirty_list; + + PagedAllocator<GeometryInstanceForward> geometry_instance_alloc; + PagedAllocator<GeometryInstanceSurfaceDataCache> geometry_instance_surface_alloc; + PagedAllocator<GeometryInstanceLightmapSH> geometry_instance_lightmap_sh; + + void _geometry_instance_add_surface_with_material(GeometryInstanceForward *ginstance, uint32_t p_surface, MaterialData *p_material, uint32_t p_material_id, uint32_t p_shader_id, RID p_mesh); + void _geometry_instance_add_surface(GeometryInstanceForward *ginstance, uint32_t p_surface, RID p_material, RID p_mesh); + void _geometry_instance_mark_dirty(GeometryInstance *p_geometry_instance); + void _geometry_instance_update(GeometryInstance *p_geometry_instance); + void _update_dirty_geometry_instances(); + + bool low_end = false; + + /* Render List */ + + struct RenderList { + LocalVector<GeometryInstanceSurfaceDataCache *> elements; + LocalVector<RenderElementInfo> element_info; + + void clear() { + elements.clear(); + element_info.clear(); + } + + //should eventually be replaced by radix + + struct SortByKey { + _FORCE_INLINE_ bool operator()(const GeometryInstanceSurfaceDataCache *A, const GeometryInstanceSurfaceDataCache *B) const { + return (A->sort.sort_key2 == B->sort.sort_key2) ? (A->sort.sort_key1 < B->sort.sort_key1) : (A->sort.sort_key2 < B->sort.sort_key2); + } + }; + + void sort_by_key() { + SortArray<GeometryInstanceSurfaceDataCache *, SortByKey> sorter; + sorter.sort(elements.ptr(), elements.size()); + } + + void sort_by_key_range(uint32_t p_from, uint32_t p_size) { + SortArray<GeometryInstanceSurfaceDataCache *, SortByKey> sorter; + sorter.sort(elements.ptr() + p_from, p_size); + } + + struct SortByDepth { + _FORCE_INLINE_ bool operator()(const GeometryInstanceSurfaceDataCache *A, const GeometryInstanceSurfaceDataCache *B) const { + return (A->owner->depth < B->owner->depth); + } + }; + + void sort_by_depth() { //used for shadows + + SortArray<GeometryInstanceSurfaceDataCache *, SortByDepth> sorter; + sorter.sort(elements.ptr(), elements.size()); + } + + struct SortByReverseDepthAndPriority { + _FORCE_INLINE_ bool operator()(const GeometryInstanceSurfaceDataCache *A, const GeometryInstanceSurfaceDataCache *B) const { + return (A->sort.priority == B->sort.priority) ? (A->owner->depth > B->owner->depth) : (A->sort.priority < B->sort.priority); + } + }; + + void sort_by_reverse_depth_and_priority(bool p_alpha) { //used for alpha + + SortArray<GeometryInstanceSurfaceDataCache *, SortByReverseDepthAndPriority> sorter; + sorter.sort(elements.ptr(), elements.size()); + } + + _FORCE_INLINE_ void add_element(GeometryInstanceSurfaceDataCache *p_element) { + elements.push_back(p_element); + } + }; + + RenderList render_list[RENDER_LIST_MAX]; + +protected: + virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_lod_threshold); + + virtual void _render_shadow_begin(); + virtual void _render_shadow_append(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true); + virtual void _render_shadow_process(); + virtual void _render_shadow_end(uint32_t p_barrier = RD::BARRIER_MASK_ALL); + + virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region); + virtual void _render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region); + virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<GeometryInstance *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture); + virtual void _render_particle_collider_heightfield(RID p_fb, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, const PagedArray<GeometryInstance *> &p_instances); + +public: + virtual GeometryInstance *geometry_instance_create(RID p_base); + virtual void geometry_instance_set_skeleton(GeometryInstance *p_geometry_instance, RID p_skeleton); + virtual void geometry_instance_set_material_override(GeometryInstance *p_geometry_instance, RID p_override); + virtual void geometry_instance_set_surface_materials(GeometryInstance *p_geometry_instance, const Vector<RID> &p_materials); + virtual void geometry_instance_set_mesh_instance(GeometryInstance *p_geometry_instance, RID p_mesh_instance); + virtual void geometry_instance_set_transform(GeometryInstance *p_geometry_instance, const Transform &p_transform, const AABB &p_aabb, const AABB &p_transformed_aabb); + virtual void geometry_instance_set_layer_mask(GeometryInstance *p_geometry_instance, uint32_t p_layer_mask); + virtual void geometry_instance_set_lod_bias(GeometryInstance *p_geometry_instance, float p_lod_bias); + virtual void geometry_instance_set_use_baked_light(GeometryInstance *p_geometry_instance, bool p_enable); + virtual void geometry_instance_set_use_dynamic_gi(GeometryInstance *p_geometry_instance, bool p_enable); + virtual void geometry_instance_set_use_lightmap(GeometryInstance *p_geometry_instance, RID p_lightmap_instance, const Rect2 &p_lightmap_uv_scale, int p_lightmap_slice_index); + virtual void geometry_instance_set_lightmap_capture(GeometryInstance *p_geometry_instance, const Color *p_sh9); + virtual void geometry_instance_set_instance_shader_parameters_offset(GeometryInstance *p_geometry_instance, int32_t p_offset); + virtual void geometry_instance_set_cast_double_sided_shadows(GeometryInstance *p_geometry_instance, bool p_enable); + + virtual Transform geometry_instance_get_transform(GeometryInstance *p_instance); + virtual AABB geometry_instance_get_aabb(GeometryInstance *p_instance); + + virtual void geometry_instance_free(GeometryInstance *p_geometry_instance); + + virtual uint32_t geometry_instance_get_pair_mask(); + virtual void geometry_instance_pair_light_instances(GeometryInstance *p_geometry_instance, const RID *p_light_instances, uint32_t p_light_instance_count); + virtual void geometry_instance_pair_reflection_probe_instances(GeometryInstance *p_geometry_instance, const RID *p_reflection_probe_instances, uint32_t p_reflection_probe_instance_count); + virtual void geometry_instance_pair_decal_instances(GeometryInstance *p_geometry_instance, const RID *p_decal_instances, uint32_t p_decal_instance_count); + virtual void geometry_instance_pair_gi_probe_instances(GeometryInstance *p_geometry_instance, const RID *p_gi_probe_instances, uint32_t p_gi_probe_instance_count); + + virtual void set_time(double p_time, double p_step); + + virtual bool free(RID p_rid); + + RendererSceneRenderForward(RendererStorageRD *p_storage); + ~RendererSceneRenderForward(); +}; +#endif // RASTERIZER_SCENE_HIGHEND_RD_H diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp new file mode 100644 index 0000000000..15e963f6e4 --- /dev/null +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp @@ -0,0 +1,8998 @@ +/*************************************************************************/ +/* renderer_scene_render_rd.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "renderer_scene_render_rd.h" + +#include "core/config/project_settings.h" +#include "core/os/os.h" +#include "renderer_compositor_rd.h" +#include "servers/rendering/rendering_server_default.h" + +uint64_t RendererSceneRenderRD::auto_exposure_counter = 2; + +void get_vogel_disk(float *r_kernel, int p_sample_count) { + const float golden_angle = 2.4; + + for (int i = 0; i < p_sample_count; i++) { + float r = Math::sqrt(float(i) + 0.5) / Math::sqrt(float(p_sample_count)); + float theta = float(i) * golden_angle; + + r_kernel[i * 4] = Math::cos(theta) * r; + r_kernel[i * 4 + 1] = Math::sin(theta) * r; + } +} + +void RendererSceneRenderRD::_clear_reflection_data(ReflectionData &rd) { + rd.layers.clear(); + rd.radiance_base_cubemap = RID(); + if (rd.downsampled_radiance_cubemap.is_valid()) { + RD::get_singleton()->free(rd.downsampled_radiance_cubemap); + } + rd.downsampled_radiance_cubemap = RID(); + rd.downsampled_layer.mipmaps.clear(); + rd.coefficient_buffer = RID(); +} + +void RendererSceneRenderRD::_update_reflection_data(ReflectionData &rd, int p_size, int p_mipmaps, bool p_use_array, RID p_base_cube, int p_base_layer, bool p_low_quality) { + //recreate radiance and all data + + int mipmaps = p_mipmaps; + uint32_t w = p_size, h = p_size; + + if (p_use_array) { + int layers = p_low_quality ? 8 : roughness_layers; + + for (int i = 0; i < layers; i++) { + ReflectionData::Layer layer; + uint32_t mmw = w; + uint32_t mmh = h; + layer.mipmaps.resize(mipmaps); + layer.views.resize(mipmaps); + for (int j = 0; j < mipmaps; j++) { + ReflectionData::Layer::Mipmap &mm = layer.mipmaps.write[j]; + mm.size.width = mmw; + mm.size.height = mmh; + for (int k = 0; k < 6; k++) { + mm.views[k] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), p_base_cube, p_base_layer + i * 6 + k, j); + Vector<RID> fbtex; + fbtex.push_back(mm.views[k]); + mm.framebuffers[k] = RD::get_singleton()->framebuffer_create(fbtex); + } + + layer.views.write[j] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), p_base_cube, p_base_layer + i * 6, j, RD::TEXTURE_SLICE_CUBEMAP); + + mmw = MAX(1, mmw >> 1); + mmh = MAX(1, mmh >> 1); + } + + rd.layers.push_back(layer); + } + + } else { + mipmaps = p_low_quality ? 8 : mipmaps; + //regular cubemap, lower quality (aliasing, less memory) + ReflectionData::Layer layer; + uint32_t mmw = w; + uint32_t mmh = h; + layer.mipmaps.resize(mipmaps); + layer.views.resize(mipmaps); + for (int j = 0; j < mipmaps; j++) { + ReflectionData::Layer::Mipmap &mm = layer.mipmaps.write[j]; + mm.size.width = mmw; + mm.size.height = mmh; + for (int k = 0; k < 6; k++) { + mm.views[k] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), p_base_cube, p_base_layer + k, j); + Vector<RID> fbtex; + fbtex.push_back(mm.views[k]); + mm.framebuffers[k] = RD::get_singleton()->framebuffer_create(fbtex); + } + + layer.views.write[j] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), p_base_cube, p_base_layer, j, RD::TEXTURE_SLICE_CUBEMAP); + + mmw = MAX(1, mmw >> 1); + mmh = MAX(1, mmh >> 1); + } + + rd.layers.push_back(layer); + } + + rd.radiance_base_cubemap = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), p_base_cube, p_base_layer, 0, RD::TEXTURE_SLICE_CUBEMAP); + + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf.width = 64; // Always 64x64 + tf.height = 64; + tf.texture_type = RD::TEXTURE_TYPE_CUBE; + tf.array_layers = 6; + tf.mipmaps = 7; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + + rd.downsampled_radiance_cubemap = RD::get_singleton()->texture_create(tf, RD::TextureView()); + { + uint32_t mmw = 64; + uint32_t mmh = 64; + rd.downsampled_layer.mipmaps.resize(7); + for (int j = 0; j < rd.downsampled_layer.mipmaps.size(); j++) { + ReflectionData::DownsampleLayer::Mipmap &mm = rd.downsampled_layer.mipmaps.write[j]; + mm.size.width = mmw; + mm.size.height = mmh; + mm.view = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rd.downsampled_radiance_cubemap, 0, j, RD::TEXTURE_SLICE_CUBEMAP); + + mmw = MAX(1, mmw >> 1); + mmh = MAX(1, mmh >> 1); + } + } +} + +void RendererSceneRenderRD::_create_reflection_fast_filter(ReflectionData &rd, bool p_use_arrays) { + storage->get_effects()->cubemap_downsample(rd.radiance_base_cubemap, rd.downsampled_layer.mipmaps[0].view, rd.downsampled_layer.mipmaps[0].size); + + for (int i = 1; i < rd.downsampled_layer.mipmaps.size(); i++) { + storage->get_effects()->cubemap_downsample(rd.downsampled_layer.mipmaps[i - 1].view, rd.downsampled_layer.mipmaps[i].view, rd.downsampled_layer.mipmaps[i].size); + } + + Vector<RID> views; + if (p_use_arrays) { + for (int i = 1; i < rd.layers.size(); i++) { + views.push_back(rd.layers[i].views[0]); + } + } else { + for (int i = 1; i < rd.layers[0].views.size(); i++) { + views.push_back(rd.layers[0].views[i]); + } + } + + storage->get_effects()->cubemap_filter(rd.downsampled_radiance_cubemap, views, p_use_arrays); +} + +void RendererSceneRenderRD::_create_reflection_importance_sample(ReflectionData &rd, bool p_use_arrays, int p_cube_side, int p_base_layer) { + if (p_use_arrays) { + //render directly to the layers + storage->get_effects()->cubemap_roughness(rd.radiance_base_cubemap, rd.layers[p_base_layer].views[0], p_cube_side, sky_ggx_samples_quality, float(p_base_layer) / (rd.layers.size() - 1.0), rd.layers[p_base_layer].mipmaps[0].size.x); + } else { + storage->get_effects()->cubemap_roughness(rd.layers[0].views[p_base_layer - 1], rd.layers[0].views[p_base_layer], p_cube_side, sky_ggx_samples_quality, float(p_base_layer) / (rd.layers[0].mipmaps.size() - 1.0), rd.layers[0].mipmaps[p_base_layer].size.x); + } +} + +void RendererSceneRenderRD::_update_reflection_mipmaps(ReflectionData &rd, int p_start, int p_end) { + for (int i = p_start; i < p_end; i++) { + for (int j = 0; j < rd.layers[i].views.size() - 1; j++) { + RID view = rd.layers[i].views[j]; + RID texture = rd.layers[i].views[j + 1]; + Size2i size = rd.layers[i].mipmaps[j + 1].size; + storage->get_effects()->cubemap_downsample(view, texture, size); + } + } +} + +void RendererSceneRenderRD::_sdfgi_erase(RenderBuffers *rb) { + for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { + const SDFGI::Cascade &c = rb->sdfgi->cascades[i]; + RD::get_singleton()->free(c.light_data); + RD::get_singleton()->free(c.light_aniso_0_tex); + RD::get_singleton()->free(c.light_aniso_1_tex); + RD::get_singleton()->free(c.sdf_tex); + RD::get_singleton()->free(c.solid_cell_dispatch_buffer); + RD::get_singleton()->free(c.solid_cell_buffer); + RD::get_singleton()->free(c.lightprobe_history_tex); + RD::get_singleton()->free(c.lightprobe_average_tex); + RD::get_singleton()->free(c.lights_buffer); + } + + RD::get_singleton()->free(rb->sdfgi->render_albedo); + RD::get_singleton()->free(rb->sdfgi->render_emission); + RD::get_singleton()->free(rb->sdfgi->render_emission_aniso); + + RD::get_singleton()->free(rb->sdfgi->render_sdf[0]); + RD::get_singleton()->free(rb->sdfgi->render_sdf[1]); + + RD::get_singleton()->free(rb->sdfgi->render_sdf_half[0]); + RD::get_singleton()->free(rb->sdfgi->render_sdf_half[1]); + + for (int i = 0; i < 8; i++) { + RD::get_singleton()->free(rb->sdfgi->render_occlusion[i]); + } + + RD::get_singleton()->free(rb->sdfgi->render_geom_facing); + + RD::get_singleton()->free(rb->sdfgi->lightprobe_data); + RD::get_singleton()->free(rb->sdfgi->lightprobe_history_scroll); + RD::get_singleton()->free(rb->sdfgi->occlusion_data); + RD::get_singleton()->free(rb->sdfgi->ambient_texture); + + RD::get_singleton()->free(rb->sdfgi->cascades_ubo); + + memdelete(rb->sdfgi); + + rb->sdfgi = nullptr; +} + +const Vector3i RendererSceneRenderRD::SDFGI::Cascade::DIRTY_ALL = Vector3i(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF); + +void RendererSceneRenderRD::sdfgi_update(RID p_render_buffers, RID p_environment, const Vector3 &p_world_position) { + Environment *env = environment_owner.getornull(p_environment); + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + bool needs_sdfgi = env && env->sdfgi_enabled; + + if (!needs_sdfgi) { + if (rb->sdfgi != nullptr) { + //erase it + _sdfgi_erase(rb); + _render_buffers_uniform_set_changed(p_render_buffers); + } + return; + } + + static const uint32_t history_frames_to_converge[RS::ENV_SDFGI_CONVERGE_MAX] = { 5, 10, 15, 20, 25, 30 }; + uint32_t requested_history_size = history_frames_to_converge[sdfgi_frames_to_converge]; + + if (rb->sdfgi && (rb->sdfgi->cascade_mode != env->sdfgi_cascades || rb->sdfgi->min_cell_size != env->sdfgi_min_cell_size || requested_history_size != rb->sdfgi->history_size || rb->sdfgi->uses_occlusion != env->sdfgi_use_occlusion || rb->sdfgi->y_scale_mode != env->sdfgi_y_scale)) { + //configuration changed, erase + _sdfgi_erase(rb); + } + + SDFGI *sdfgi = rb->sdfgi; + if (sdfgi == nullptr) { + //re-create + rb->sdfgi = memnew(SDFGI); + sdfgi = rb->sdfgi; + sdfgi->cascade_mode = env->sdfgi_cascades; + sdfgi->min_cell_size = env->sdfgi_min_cell_size; + sdfgi->uses_occlusion = env->sdfgi_use_occlusion; + sdfgi->y_scale_mode = env->sdfgi_y_scale; + static const float y_scale[3] = { 1.0, 1.5, 2.0 }; + sdfgi->y_mult = y_scale[sdfgi->y_scale_mode]; + static const int cascasde_size[3] = { 4, 6, 8 }; + sdfgi->cascades.resize(cascasde_size[sdfgi->cascade_mode]); + sdfgi->probe_axis_count = SDFGI::PROBE_DIVISOR + 1; + sdfgi->solid_cell_ratio = sdfgi_solid_cell_ratio; + sdfgi->solid_cell_count = uint32_t(float(sdfgi->cascade_size * sdfgi->cascade_size * sdfgi->cascade_size) * sdfgi->solid_cell_ratio); + + float base_cell_size = sdfgi->min_cell_size; + + RD::TextureFormat tf_sdf; + tf_sdf.format = RD::DATA_FORMAT_R8_UNORM; + tf_sdf.width = sdfgi->cascade_size; // Always 64x64 + tf_sdf.height = sdfgi->cascade_size; + tf_sdf.depth = sdfgi->cascade_size; + tf_sdf.texture_type = RD::TEXTURE_TYPE_3D; + tf_sdf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + + { + RD::TextureFormat tf_render = tf_sdf; + tf_render.format = RD::DATA_FORMAT_R16_UINT; + sdfgi->render_albedo = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + tf_render.format = RD::DATA_FORMAT_R32_UINT; + sdfgi->render_emission = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + sdfgi->render_emission_aniso = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + + tf_render.format = RD::DATA_FORMAT_R8_UNORM; //at least its easy to visualize + + for (int i = 0; i < 8; i++) { + sdfgi->render_occlusion[i] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + } + + tf_render.format = RD::DATA_FORMAT_R32_UINT; + sdfgi->render_geom_facing = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + + tf_render.format = RD::DATA_FORMAT_R8G8B8A8_UINT; + sdfgi->render_sdf[0] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + sdfgi->render_sdf[1] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + + tf_render.width /= 2; + tf_render.height /= 2; + tf_render.depth /= 2; + + sdfgi->render_sdf_half[0] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + sdfgi->render_sdf_half[1] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + } + + RD::TextureFormat tf_occlusion = tf_sdf; + tf_occlusion.format = RD::DATA_FORMAT_R16_UINT; + tf_occlusion.shareable_formats.push_back(RD::DATA_FORMAT_R16_UINT); + tf_occlusion.shareable_formats.push_back(RD::DATA_FORMAT_R4G4B4A4_UNORM_PACK16); + tf_occlusion.depth *= sdfgi->cascades.size(); //use depth for occlusion slices + tf_occlusion.width *= 2; //use width for the other half + + RD::TextureFormat tf_light = tf_sdf; + tf_light.format = RD::DATA_FORMAT_R32_UINT; + tf_light.shareable_formats.push_back(RD::DATA_FORMAT_R32_UINT); + tf_light.shareable_formats.push_back(RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32); + + RD::TextureFormat tf_aniso0 = tf_sdf; + tf_aniso0.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + RD::TextureFormat tf_aniso1 = tf_sdf; + tf_aniso1.format = RD::DATA_FORMAT_R8G8_UNORM; + + int passes = nearest_shift(sdfgi->cascade_size) - 1; + + //store lightprobe SH + RD::TextureFormat tf_probes; + tf_probes.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf_probes.width = sdfgi->probe_axis_count * sdfgi->probe_axis_count; + tf_probes.height = sdfgi->probe_axis_count * SDFGI::SH_SIZE; + tf_probes.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + tf_probes.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + + sdfgi->history_size = requested_history_size; + + RD::TextureFormat tf_probe_history = tf_probes; + tf_probe_history.format = RD::DATA_FORMAT_R16G16B16A16_SINT; //signed integer because SH are signed + tf_probe_history.array_layers = sdfgi->history_size; + + RD::TextureFormat tf_probe_average = tf_probes; + tf_probe_average.format = RD::DATA_FORMAT_R32G32B32A32_SINT; //signed integer because SH are signed + tf_probe_average.texture_type = RD::TEXTURE_TYPE_2D; + + sdfgi->lightprobe_history_scroll = RD::get_singleton()->texture_create(tf_probe_history, RD::TextureView()); + sdfgi->lightprobe_average_scroll = RD::get_singleton()->texture_create(tf_probe_average, RD::TextureView()); + + { + //octahedral lightprobes + RD::TextureFormat tf_octprobes = tf_probes; + tf_octprobes.array_layers = sdfgi->cascades.size() * 2; + tf_octprobes.format = RD::DATA_FORMAT_R32_UINT; //pack well with RGBE + tf_octprobes.width = sdfgi->probe_axis_count * sdfgi->probe_axis_count * (SDFGI::LIGHTPROBE_OCT_SIZE + 2); + tf_octprobes.height = sdfgi->probe_axis_count * (SDFGI::LIGHTPROBE_OCT_SIZE + 2); + tf_octprobes.shareable_formats.push_back(RD::DATA_FORMAT_R32_UINT); + tf_octprobes.shareable_formats.push_back(RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32); + //lightprobe texture is an octahedral texture + + sdfgi->lightprobe_data = RD::get_singleton()->texture_create(tf_octprobes, RD::TextureView()); + RD::TextureView tv; + tv.format_override = RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32; + sdfgi->lightprobe_texture = RD::get_singleton()->texture_create_shared(tv, sdfgi->lightprobe_data); + + //texture handling ambient data, to integrate with volumetric foc + RD::TextureFormat tf_ambient = tf_probes; + tf_ambient.array_layers = sdfgi->cascades.size(); + tf_ambient.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; //pack well with RGBE + tf_ambient.width = sdfgi->probe_axis_count * sdfgi->probe_axis_count; + tf_ambient.height = sdfgi->probe_axis_count; + tf_ambient.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + //lightprobe texture is an octahedral texture + sdfgi->ambient_texture = RD::get_singleton()->texture_create(tf_ambient, RD::TextureView()); + } + + sdfgi->cascades_ubo = RD::get_singleton()->uniform_buffer_create(sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES); + + sdfgi->occlusion_data = RD::get_singleton()->texture_create(tf_occlusion, RD::TextureView()); + { + RD::TextureView tv; + tv.format_override = RD::DATA_FORMAT_R4G4B4A4_UNORM_PACK16; + sdfgi->occlusion_texture = RD::get_singleton()->texture_create_shared(tv, sdfgi->occlusion_data); + } + + for (uint32_t i = 0; i < sdfgi->cascades.size(); i++) { + SDFGI::Cascade &cascade = sdfgi->cascades[i]; + + /* 3D Textures */ + + cascade.sdf_tex = RD::get_singleton()->texture_create(tf_sdf, RD::TextureView()); + + cascade.light_data = RD::get_singleton()->texture_create(tf_light, RD::TextureView()); + + cascade.light_aniso_0_tex = RD::get_singleton()->texture_create(tf_aniso0, RD::TextureView()); + cascade.light_aniso_1_tex = RD::get_singleton()->texture_create(tf_aniso1, RD::TextureView()); + + { + RD::TextureView tv; + tv.format_override = RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32; + cascade.light_tex = RD::get_singleton()->texture_create_shared(tv, cascade.light_data); + + RD::get_singleton()->texture_clear(cascade.light_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(cascade.light_aniso_0_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(cascade.light_aniso_1_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); + } + + cascade.cell_size = base_cell_size; + Vector3 world_position = p_world_position; + world_position.y *= sdfgi->y_mult; + int32_t probe_cells = sdfgi->cascade_size / SDFGI::PROBE_DIVISOR; + Vector3 probe_size = Vector3(1, 1, 1) * cascade.cell_size * probe_cells; + Vector3i probe_pos = Vector3i((world_position / probe_size + Vector3(0.5, 0.5, 0.5)).floor()); + cascade.position = probe_pos * probe_cells; + + cascade.dirty_regions = SDFGI::Cascade::DIRTY_ALL; + + base_cell_size *= 2.0; + + /* Probe History */ + + cascade.lightprobe_history_tex = RD::get_singleton()->texture_create(tf_probe_history, RD::TextureView()); + RD::get_singleton()->texture_clear(cascade.lightprobe_history_tex, Color(0, 0, 0, 0), 0, 1, 0, tf_probe_history.array_layers); //needs to be cleared for average to work + + cascade.lightprobe_average_tex = RD::get_singleton()->texture_create(tf_probe_average, RD::TextureView()); + RD::get_singleton()->texture_clear(cascade.lightprobe_average_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); //needs to be cleared for average to work + + /* Buffers */ + + cascade.solid_cell_buffer = RD::get_singleton()->storage_buffer_create(sizeof(SDFGI::Cascade::SolidCell) * sdfgi->solid_cell_count); + cascade.solid_cell_dispatch_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 4, Vector<uint8_t>(), RD::STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT); + cascade.lights_buffer = RD::get_singleton()->storage_buffer_create(sizeof(SDGIShader::Light) * MAX(SDFGI::MAX_STATIC_LIGHTS, SDFGI::MAX_DYNAMIC_LIGHTS)); + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.ids.push_back(sdfgi->render_sdf[(passes & 1) ? 1 : 0]); //if passes are even, we read from buffer 0, else we read from buffer 1 + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.ids.push_back(sdfgi->render_albedo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 3; + for (int j = 0; j < 8; j++) { + u.ids.push_back(sdfgi->render_occlusion[j]); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 4; + u.ids.push_back(sdfgi->render_emission); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 5; + u.ids.push_back(sdfgi->render_emission_aniso); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 6; + u.ids.push_back(sdfgi->render_geom_facing); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 7; + u.ids.push_back(cascade.sdf_tex); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 8; + u.ids.push_back(sdfgi->occlusion_data); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 10; + u.ids.push_back(cascade.solid_cell_dispatch_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 11; + u.ids.push_back(cascade.solid_cell_buffer); + uniforms.push_back(u); + } + + cascade.sdf_store_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_STORE), 0); + } + + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.ids.push_back(sdfgi->render_albedo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.ids.push_back(sdfgi->render_geom_facing); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 3; + u.ids.push_back(sdfgi->render_emission); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 4; + u.ids.push_back(sdfgi->render_emission_aniso); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 5; + u.ids.push_back(cascade.solid_cell_dispatch_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 6; + u.ids.push_back(cascade.solid_cell_buffer); + uniforms.push_back(u); + } + + cascade.scroll_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_SCROLL), 0); + } + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + for (int j = 0; j < 8; j++) { + u.ids.push_back(sdfgi->render_occlusion[j]); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.ids.push_back(sdfgi->occlusion_data); + uniforms.push_back(u); + } + + cascade.scroll_occlusion_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_SCROLL_OCCLUSION), 0); + } + } + + //direct light + for (uint32_t i = 0; i < sdfgi->cascades.size(); i++) { + SDFGI::Cascade &cascade = sdfgi->cascades[i]; + + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { + if (j < rb->sdfgi->cascades.size()) { + u.ids.push_back(rb->sdfgi->cascades[j].sdf_tex); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 2; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 3; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(cascade.solid_cell_dispatch_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 4; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(cascade.solid_cell_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 5; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.ids.push_back(cascade.light_data); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 6; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.ids.push_back(cascade.light_aniso_0_tex); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 7; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.ids.push_back(cascade.light_aniso_1_tex); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 8; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.ids.push_back(rb->sdfgi->cascades_ubo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 9; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(cascade.lights_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 10; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.ids.push_back(rb->sdfgi->lightprobe_texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 11; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.ids.push_back(rb->sdfgi->occlusion_texture); + uniforms.push_back(u); + } + + cascade.sdf_direct_light_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.direct_light.version_get_shader(sdfgi_shader.direct_light_shader, 0), 0); + } + + //preprocess initialize uniform set + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.ids.push_back(sdfgi->render_albedo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.ids.push_back(sdfgi->render_sdf[0]); + uniforms.push_back(u); + } + + sdfgi->sdf_initialize_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE), 0); + } + + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.ids.push_back(sdfgi->render_albedo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.ids.push_back(sdfgi->render_sdf_half[0]); + uniforms.push_back(u); + } + + sdfgi->sdf_initialize_half_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE_HALF), 0); + } + + //jump flood uniform set + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.ids.push_back(sdfgi->render_sdf[0]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.ids.push_back(sdfgi->render_sdf[1]); + uniforms.push_back(u); + } + + sdfgi->jump_flood_uniform_set[0] = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_JUMP_FLOOD), 0); + SWAP(uniforms.write[0].ids.write[0], uniforms.write[1].ids.write[0]); + sdfgi->jump_flood_uniform_set[1] = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_JUMP_FLOOD), 0); + } + //jump flood half uniform set + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.ids.push_back(sdfgi->render_sdf_half[0]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.ids.push_back(sdfgi->render_sdf_half[1]); + uniforms.push_back(u); + } + + sdfgi->jump_flood_half_uniform_set[0] = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_JUMP_FLOOD), 0); + SWAP(uniforms.write[0].ids.write[0], uniforms.write[1].ids.write[0]); + sdfgi->jump_flood_half_uniform_set[1] = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_JUMP_FLOOD), 0); + } + + //upscale half size sdf + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.ids.push_back(sdfgi->render_albedo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.ids.push_back(sdfgi->render_sdf_half[(passes & 1) ? 0 : 1]); //reverse pass order because half size + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 3; + u.ids.push_back(sdfgi->render_sdf[(passes & 1) ? 0 : 1]); //reverse pass order because it needs an extra JFA pass + uniforms.push_back(u); + } + + sdfgi->upscale_jfa_uniform_set_index = (passes & 1) ? 0 : 1; + sdfgi->sdf_upscale_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_JUMP_FLOOD_UPSCALE), 0); + } + + //occlusion uniform set + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.ids.push_back(sdfgi->render_albedo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + for (int i = 0; i < 8; i++) { + u.ids.push_back(sdfgi->render_occlusion[i]); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 3; + u.ids.push_back(sdfgi->render_geom_facing); + uniforms.push_back(u); + } + + sdfgi->occlusion_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_OCCLUSION), 0); + } + + for (uint32_t i = 0; i < sdfgi->cascades.size(); i++) { + //integrate uniform + + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { + if (j < sdfgi->cascades.size()) { + u.ids.push_back(sdfgi->cascades[j].sdf_tex); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 2; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { + if (j < sdfgi->cascades.size()) { + u.ids.push_back(sdfgi->cascades[j].light_tex); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 3; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { + if (j < sdfgi->cascades.size()) { + u.ids.push_back(sdfgi->cascades[j].light_aniso_0_tex); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 4; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { + if (j < sdfgi->cascades.size()) { + u.ids.push_back(sdfgi->cascades[j].light_aniso_1_tex); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 6; + u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 7; + u.ids.push_back(sdfgi->cascades_ubo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 8; + u.ids.push_back(sdfgi->lightprobe_data); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 9; + u.ids.push_back(sdfgi->cascades[i].lightprobe_history_tex); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 10; + u.ids.push_back(sdfgi->cascades[i].lightprobe_average_tex); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 11; + u.ids.push_back(sdfgi->lightprobe_history_scroll); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 12; + u.ids.push_back(sdfgi->lightprobe_average_scroll); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 13; + RID parent_average; + if (i < sdfgi->cascades.size() - 1) { + parent_average = sdfgi->cascades[i + 1].lightprobe_average_tex; + } else { + parent_average = sdfgi->cascades[i - 1].lightprobe_average_tex; //to use something, but it won't be used + } + u.ids.push_back(parent_average); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 14; + u.ids.push_back(sdfgi->ambient_texture); + uniforms.push_back(u); + } + + sdfgi->cascades[i].integrate_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.integrate.version_get_shader(sdfgi_shader.integrate_shader, 0), 0); + } + + sdfgi->bounce_feedback = env->sdfgi_bounce_feedback; + sdfgi->energy = env->sdfgi_energy; + sdfgi->normal_bias = env->sdfgi_normal_bias; + sdfgi->probe_bias = env->sdfgi_probe_bias; + sdfgi->reads_sky = env->sdfgi_read_sky_light; + + _render_buffers_uniform_set_changed(p_render_buffers); + + return; //done. all levels will need to be rendered which its going to take a bit + } + + //check for updates + + sdfgi->bounce_feedback = env->sdfgi_bounce_feedback; + sdfgi->energy = env->sdfgi_energy; + sdfgi->normal_bias = env->sdfgi_normal_bias; + sdfgi->probe_bias = env->sdfgi_probe_bias; + sdfgi->reads_sky = env->sdfgi_read_sky_light; + + int32_t drag_margin = (sdfgi->cascade_size / SDFGI::PROBE_DIVISOR) / 2; + + for (uint32_t i = 0; i < sdfgi->cascades.size(); i++) { + SDFGI::Cascade &cascade = sdfgi->cascades[i]; + cascade.dirty_regions = Vector3i(); + + Vector3 probe_half_size = Vector3(1, 1, 1) * cascade.cell_size * float(sdfgi->cascade_size / SDFGI::PROBE_DIVISOR) * 0.5; + probe_half_size = Vector3(0, 0, 0); + + Vector3 world_position = p_world_position; + world_position.y *= sdfgi->y_mult; + Vector3i pos_in_cascade = Vector3i((world_position + probe_half_size) / cascade.cell_size); + + for (int j = 0; j < 3; j++) { + if (pos_in_cascade[j] < cascade.position[j]) { + while (pos_in_cascade[j] < (cascade.position[j] - drag_margin)) { + cascade.position[j] -= drag_margin * 2; + cascade.dirty_regions[j] += drag_margin * 2; + } + } else if (pos_in_cascade[j] > cascade.position[j]) { + while (pos_in_cascade[j] > (cascade.position[j] + drag_margin)) { + cascade.position[j] += drag_margin * 2; + cascade.dirty_regions[j] -= drag_margin * 2; + } + } + + if (cascade.dirty_regions[j] == 0) { + continue; // not dirty + } else if (uint32_t(ABS(cascade.dirty_regions[j])) >= sdfgi->cascade_size) { + //moved too much, just redraw everything (make all dirty) + cascade.dirty_regions = SDFGI::Cascade::DIRTY_ALL; + break; + } + } + + if (cascade.dirty_regions != Vector3i() && cascade.dirty_regions != SDFGI::Cascade::DIRTY_ALL) { + //see how much the total dirty volume represents from the total volume + uint32_t total_volume = sdfgi->cascade_size * sdfgi->cascade_size * sdfgi->cascade_size; + uint32_t safe_volume = 1; + for (int j = 0; j < 3; j++) { + safe_volume *= sdfgi->cascade_size - ABS(cascade.dirty_regions[j]); + } + uint32_t dirty_volume = total_volume - safe_volume; + if (dirty_volume > (safe_volume / 2)) { + //more than half the volume is dirty, make all dirty so its only rendered once + cascade.dirty_regions = SDFGI::Cascade::DIRTY_ALL; + } + } + } +} + +int RendererSceneRenderRD::sdfgi_get_pending_region_count(RID p_render_buffers) const { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + + ERR_FAIL_COND_V(rb == nullptr, 0); + + if (rb->sdfgi == nullptr) { + return 0; + } + + int dirty_count = 0; + for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { + const SDFGI::Cascade &c = rb->sdfgi->cascades[i]; + + if (c.dirty_regions == SDFGI::Cascade::DIRTY_ALL) { + dirty_count++; + } else { + for (int j = 0; j < 3; j++) { + if (c.dirty_regions[j] != 0) { + dirty_count++; + } + } + } + } + + return dirty_count; +} + +int RendererSceneRenderRD::_sdfgi_get_pending_region_data(RID p_render_buffers, int p_region, Vector3i &r_local_offset, Vector3i &r_local_size, AABB &r_bounds) const { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(rb == nullptr, -1); + ERR_FAIL_COND_V(rb->sdfgi == nullptr, -1); + + int dirty_count = 0; + for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { + const SDFGI::Cascade &c = rb->sdfgi->cascades[i]; + + if (c.dirty_regions == SDFGI::Cascade::DIRTY_ALL) { + if (dirty_count == p_region) { + r_local_offset = Vector3i(); + r_local_size = Vector3i(1, 1, 1) * rb->sdfgi->cascade_size; + + r_bounds.position = Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + c.position)) * c.cell_size * Vector3(1, 1.0 / rb->sdfgi->y_mult, 1); + r_bounds.size = Vector3(r_local_size) * c.cell_size * Vector3(1, 1.0 / rb->sdfgi->y_mult, 1); + return i; + } + dirty_count++; + } else { + for (int j = 0; j < 3; j++) { + if (c.dirty_regions[j] != 0) { + if (dirty_count == p_region) { + Vector3i from = Vector3i(0, 0, 0); + Vector3i to = Vector3i(1, 1, 1) * rb->sdfgi->cascade_size; + + if (c.dirty_regions[j] > 0) { + //fill from the beginning + to[j] = c.dirty_regions[j]; + } else { + //fill from the end + from[j] = to[j] + c.dirty_regions[j]; + } + + for (int k = 0; k < j; k++) { + // "chip" away previous regions to avoid re-voxelizing the same thing + if (c.dirty_regions[k] > 0) { + from[k] += c.dirty_regions[k]; + } else if (c.dirty_regions[k] < 0) { + to[k] += c.dirty_regions[k]; + } + } + + r_local_offset = from; + r_local_size = to - from; + + r_bounds.position = Vector3(from + Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + c.position) * c.cell_size * Vector3(1, 1.0 / rb->sdfgi->y_mult, 1); + r_bounds.size = Vector3(r_local_size) * c.cell_size * Vector3(1, 1.0 / rb->sdfgi->y_mult, 1); + + return i; + } + + dirty_count++; + } + } + } + } + return -1; +} + +AABB RendererSceneRenderRD::sdfgi_get_pending_region_bounds(RID p_render_buffers, int p_region) const { + AABB bounds; + Vector3i from; + Vector3i size; + + int c = _sdfgi_get_pending_region_data(p_render_buffers, p_region, from, size, bounds); + ERR_FAIL_COND_V(c == -1, AABB()); + return bounds; +} + +uint32_t RendererSceneRenderRD::sdfgi_get_pending_region_cascade(RID p_render_buffers, int p_region) const { + AABB bounds; + Vector3i from; + Vector3i size; + + return _sdfgi_get_pending_region_data(p_render_buffers, p_region, from, size, bounds); +} + +void RendererSceneRenderRD::_sdfgi_update_cascades(RID p_render_buffers) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(rb == nullptr); + if (rb->sdfgi == nullptr) { + return; + } + + //update cascades + SDFGI::Cascade::UBO cascade_data[SDFGI::MAX_CASCADES]; + int32_t probe_divisor = rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR; + + for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { + Vector3 pos = Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + rb->sdfgi->cascades[i].position)) * rb->sdfgi->cascades[i].cell_size; + + cascade_data[i].offset[0] = pos.x; + cascade_data[i].offset[1] = pos.y; + cascade_data[i].offset[2] = pos.z; + cascade_data[i].to_cell = 1.0 / rb->sdfgi->cascades[i].cell_size; + cascade_data[i].probe_offset[0] = rb->sdfgi->cascades[i].position.x / probe_divisor; + cascade_data[i].probe_offset[1] = rb->sdfgi->cascades[i].position.y / probe_divisor; + cascade_data[i].probe_offset[2] = rb->sdfgi->cascades[i].position.z / probe_divisor; + cascade_data[i].pad = 0; + } + + RD::get_singleton()->buffer_update(rb->sdfgi->cascades_ubo, 0, sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES, cascade_data, RD::BARRIER_MASK_COMPUTE); +} + +void RendererSceneRenderRD::_sdfgi_update_light(RID p_render_buffers, RID p_environment) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(rb == nullptr); + if (rb->sdfgi == nullptr) { + return; + } + + RD::get_singleton()->draw_command_begin_label("SDFGI Update dynamic Light"); + + /* Update dynamic light */ + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.direct_light_pipeline[SDGIShader::DIRECT_LIGHT_MODE_DYNAMIC]); + + SDGIShader::DirectLightPushConstant push_constant; + + push_constant.grid_size[0] = rb->sdfgi->cascade_size; + push_constant.grid_size[1] = rb->sdfgi->cascade_size; + push_constant.grid_size[2] = rb->sdfgi->cascade_size; + push_constant.max_cascades = rb->sdfgi->cascades.size(); + push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; + push_constant.bounce_feedback = rb->sdfgi->bounce_feedback; + push_constant.y_mult = rb->sdfgi->y_mult; + push_constant.use_occlusion = rb->sdfgi->uses_occlusion; + + for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { + SDFGI::Cascade &cascade = rb->sdfgi->cascades[i]; + push_constant.light_count = rb->sdfgi->cascade_dynamic_light_count[i]; + push_constant.cascade = i; + + if (rb->sdfgi->cascades[i].all_dynamic_lights_dirty || sdfgi_frames_to_update_light == RS::ENV_SDFGI_UPDATE_LIGHT_IN_1_FRAME) { + push_constant.process_offset = 0; + push_constant.process_increment = 1; + } else { + static uint32_t frames_to_update_table[RS::ENV_SDFGI_UPDATE_LIGHT_MAX] = { + 1, 2, 4, 8, 16 + }; + + uint32_t frames_to_update = frames_to_update_table[sdfgi_frames_to_update_light]; + + push_constant.process_offset = RSG::rasterizer->get_frame_number() % frames_to_update; + push_constant.process_increment = frames_to_update; + } + rb->sdfgi->cascades[i].all_dynamic_lights_dirty = false; + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascade.sdf_direct_light_uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::DirectLightPushConstant)); + RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cascade.solid_cell_dispatch_buffer, 0); + } + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->draw_command_end_label(); +} + +void RendererSceneRenderRD::_sdfgi_update_probes(RID p_render_buffers, RID p_environment) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(rb == nullptr); + if (rb->sdfgi == nullptr) { + return; + } + + RD::get_singleton()->draw_command_begin_label("SDFGI Update Probes"); + + Environment *env = environment_owner.getornull(p_environment); + + SDGIShader::IntegratePushConstant push_constant; + push_constant.grid_size[1] = rb->sdfgi->cascade_size; + push_constant.grid_size[2] = rb->sdfgi->cascade_size; + push_constant.grid_size[0] = rb->sdfgi->cascade_size; + push_constant.max_cascades = rb->sdfgi->cascades.size(); + push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; + push_constant.history_index = rb->sdfgi->render_pass % rb->sdfgi->history_size; + push_constant.history_size = rb->sdfgi->history_size; + static const uint32_t ray_count[RS::ENV_SDFGI_RAY_COUNT_MAX] = { 4, 8, 16, 32, 64, 96, 128 }; + push_constant.ray_count = ray_count[sdfgi_ray_count]; + push_constant.ray_bias = rb->sdfgi->probe_bias; + push_constant.image_size[0] = rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count; + push_constant.image_size[1] = rb->sdfgi->probe_axis_count; + push_constant.store_ambient_texture = env->volumetric_fog_enabled; + + RID sky_uniform_set = sdfgi_shader.integrate_default_sky_uniform_set; + push_constant.sky_mode = SDGIShader::IntegratePushConstant::SKY_MODE_DISABLED; + push_constant.y_mult = rb->sdfgi->y_mult; + + if (rb->sdfgi->reads_sky && env) { + push_constant.sky_energy = env->bg_energy; + + if (env->background == RS::ENV_BG_CLEAR_COLOR) { + push_constant.sky_mode = SDGIShader::IntegratePushConstant::SKY_MODE_COLOR; + Color c = storage->get_default_clear_color().to_linear(); + push_constant.sky_color[0] = c.r; + push_constant.sky_color[1] = c.g; + push_constant.sky_color[2] = c.b; + } else if (env->background == RS::ENV_BG_COLOR) { + push_constant.sky_mode = SDGIShader::IntegratePushConstant::SKY_MODE_COLOR; + Color c = env->bg_color; + push_constant.sky_color[0] = c.r; + push_constant.sky_color[1] = c.g; + push_constant.sky_color[2] = c.b; + + } else if (env->background == RS::ENV_BG_SKY) { + Sky *sky = sky_owner.getornull(env->sky); + if (sky && sky->radiance.is_valid()) { + if (sky->sdfgi_integrate_sky_uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(sky->sdfgi_integrate_sky_uniform_set)) { + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 0; + u.ids.push_back(sky->radiance); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 1; + u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + + sky->sdfgi_integrate_sky_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.integrate.version_get_shader(sdfgi_shader.integrate_shader, 0), 1); + } + sky_uniform_set = sky->sdfgi_integrate_sky_uniform_set; + push_constant.sky_mode = SDGIShader::IntegratePushConstant::SKY_MODE_SKY; + } + } + } + + rb->sdfgi->render_pass++; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(true); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.integrate_pipeline[SDGIShader::INTEGRATE_MODE_PROCESS]); + + int32_t probe_divisor = rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR; + for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { + push_constant.cascade = i; + push_constant.world_offset[0] = rb->sdfgi->cascades[i].position.x / probe_divisor; + push_constant.world_offset[1] = rb->sdfgi->cascades[i].position.y / probe_divisor; + push_constant.world_offset[2] = rb->sdfgi->cascades[i].position.z / probe_divisor; + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[i].integrate_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sky_uniform_set, 1); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::IntegratePushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1); + } + + //end later after raster to avoid barriering on layout changes + //RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); + + RD::get_singleton()->draw_command_end_label(); +} + +void RendererSceneRenderRD::_sdfgi_store_probes(RID p_render_buffers) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(rb == nullptr); + if (rb->sdfgi == nullptr) { + return; + } + + RD::get_singleton()->barrier(RD::BARRIER_MASK_COMPUTE, RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->draw_command_begin_label("SDFGI Store Probes"); + + SDGIShader::IntegratePushConstant push_constant; + push_constant.grid_size[1] = rb->sdfgi->cascade_size; + push_constant.grid_size[2] = rb->sdfgi->cascade_size; + push_constant.grid_size[0] = rb->sdfgi->cascade_size; + push_constant.max_cascades = rb->sdfgi->cascades.size(); + push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; + push_constant.history_index = rb->sdfgi->render_pass % rb->sdfgi->history_size; + push_constant.history_size = rb->sdfgi->history_size; + static const uint32_t ray_count[RS::ENV_SDFGI_RAY_COUNT_MAX] = { 4, 8, 16, 32, 64, 96, 128 }; + push_constant.ray_count = ray_count[sdfgi_ray_count]; + push_constant.ray_bias = rb->sdfgi->probe_bias; + push_constant.image_size[0] = rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count; + push_constant.image_size[1] = rb->sdfgi->probe_axis_count; + push_constant.store_ambient_texture = false; + + push_constant.sky_mode = 0; + push_constant.y_mult = rb->sdfgi->y_mult; + + // Then store values into the lightprobe texture. Separating these steps has a small performance hit, but it allows for multiple bounces + RENDER_TIMESTAMP("Average Probes"); + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.integrate_pipeline[SDGIShader::INTEGRATE_MODE_STORE]); + + //convert to octahedral to store + push_constant.image_size[0] *= SDFGI::LIGHTPROBE_OCT_SIZE; + push_constant.image_size[1] *= SDFGI::LIGHTPROBE_OCT_SIZE; + + for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { + push_constant.cascade = i; + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[i].integrate_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdfgi_shader.integrate_default_sky_uniform_set, 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::IntegratePushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1); + } + + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); + + RD::get_singleton()->draw_command_end_label(); +} +void RendererSceneRenderRD::_setup_giprobes(RID p_render_buffers, const Transform &p_transform, const PagedArray<RID> &p_gi_probes, uint32_t &r_gi_probes_used) { + r_gi_probes_used = 0; + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(rb == nullptr); + + RD::get_singleton()->draw_command_begin_label("GIProbes Setup"); + + RID gi_probe_buffer = render_buffers_get_gi_probe_buffer(p_render_buffers); + GI::GIProbeData gi_probe_data[RenderBuffers::MAX_GIPROBES]; + + bool giprobes_changed = false; + + Transform to_camera; + to_camera.origin = p_transform.origin; //only translation, make local + + for (int i = 0; i < RenderBuffers::MAX_GIPROBES; i++) { + RID texture; + if (i < (int)p_gi_probes.size()) { + GIProbeInstance *gipi = gi_probe_instance_owner.getornull(p_gi_probes[i]); + + if (gipi) { + texture = gipi->texture; + GI::GIProbeData &gipd = gi_probe_data[i]; + + RID base_probe = gipi->probe; + + Transform to_cell = storage->gi_probe_get_to_cell_xform(gipi->probe) * gipi->transform.affine_inverse() * to_camera; + + gipd.xform[0] = to_cell.basis.elements[0][0]; + gipd.xform[1] = to_cell.basis.elements[1][0]; + gipd.xform[2] = to_cell.basis.elements[2][0]; + gipd.xform[3] = 0; + gipd.xform[4] = to_cell.basis.elements[0][1]; + gipd.xform[5] = to_cell.basis.elements[1][1]; + gipd.xform[6] = to_cell.basis.elements[2][1]; + gipd.xform[7] = 0; + gipd.xform[8] = to_cell.basis.elements[0][2]; + gipd.xform[9] = to_cell.basis.elements[1][2]; + gipd.xform[10] = to_cell.basis.elements[2][2]; + gipd.xform[11] = 0; + gipd.xform[12] = to_cell.origin.x; + gipd.xform[13] = to_cell.origin.y; + gipd.xform[14] = to_cell.origin.z; + gipd.xform[15] = 1; + + Vector3 bounds = storage->gi_probe_get_octree_size(base_probe); + + gipd.bounds[0] = bounds.x; + gipd.bounds[1] = bounds.y; + gipd.bounds[2] = bounds.z; + + gipd.dynamic_range = storage->gi_probe_get_dynamic_range(base_probe) * storage->gi_probe_get_energy(base_probe); + gipd.bias = storage->gi_probe_get_bias(base_probe); + gipd.normal_bias = storage->gi_probe_get_normal_bias(base_probe); + gipd.blend_ambient = !storage->gi_probe_is_interior(base_probe); + gipd.anisotropy_strength = 0; + gipd.ao = storage->gi_probe_get_ao(base_probe); + gipd.ao_size = Math::pow(storage->gi_probe_get_ao_size(base_probe), 4.0f); + gipd.mipmaps = gipi->mipmaps.size(); + } + + r_gi_probes_used++; + } + + if (texture == RID()) { + texture = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE); + } + + if (texture != rb->giprobe_textures[i]) { + giprobes_changed = true; + rb->giprobe_textures[i] = texture; + } + } + + if (giprobes_changed) { + if (RD::get_singleton()->uniform_set_is_valid(rb->gi_uniform_set)) { + RD::get_singleton()->free(rb->gi_uniform_set); + } + rb->gi_uniform_set = RID(); + if (rb->volumetric_fog) { + if (RD::get_singleton()->uniform_set_is_valid(rb->volumetric_fog->uniform_set)) { + RD::get_singleton()->free(rb->volumetric_fog->uniform_set); + RD::get_singleton()->free(rb->volumetric_fog->uniform_set2); + } + rb->volumetric_fog->uniform_set = RID(); + rb->volumetric_fog->uniform_set2 = RID(); + } + } + + if (p_gi_probes.size() > 0) { + RD::get_singleton()->buffer_update(gi_probe_buffer, 0, sizeof(GI::GIProbeData) * MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()), gi_probe_data, RD::BARRIER_MASK_COMPUTE); + } + + RD::get_singleton()->draw_command_end_label(); +} + +void RendererSceneRenderRD::_pre_process_gi(RID p_render_buffers, const Transform &p_transform) { + // Do the required buffer transfers and setup before the depth-pre pass, this way GI can + // run in parallel during depth-pre pass and shadow rendering. + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(rb == nullptr); + + /* Update Cascades UBO */ + + if (rb->sdfgi) { + /* Update general SDFGI Buffer */ + + _sdfgi_update_cascades(p_render_buffers); + + GI::SDFGIData sdfgi_data; + + sdfgi_data.grid_size[0] = rb->sdfgi->cascade_size; + sdfgi_data.grid_size[1] = rb->sdfgi->cascade_size; + sdfgi_data.grid_size[2] = rb->sdfgi->cascade_size; + + sdfgi_data.max_cascades = rb->sdfgi->cascades.size(); + sdfgi_data.probe_axis_size = rb->sdfgi->probe_axis_count; + sdfgi_data.cascade_probe_size[0] = sdfgi_data.probe_axis_size - 1; //float version for performance + sdfgi_data.cascade_probe_size[1] = sdfgi_data.probe_axis_size - 1; + sdfgi_data.cascade_probe_size[2] = sdfgi_data.probe_axis_size - 1; + + float csize = rb->sdfgi->cascade_size; + sdfgi_data.probe_to_uvw = 1.0 / float(sdfgi_data.cascade_probe_size[0]); + sdfgi_data.use_occlusion = rb->sdfgi->uses_occlusion; + //sdfgi_data.energy = rb->sdfgi->energy; + + sdfgi_data.y_mult = rb->sdfgi->y_mult; + + float cascade_voxel_size = (csize / sdfgi_data.cascade_probe_size[0]); + float occlusion_clamp = (cascade_voxel_size - 0.5) / cascade_voxel_size; + sdfgi_data.occlusion_clamp[0] = occlusion_clamp; + sdfgi_data.occlusion_clamp[1] = occlusion_clamp; + sdfgi_data.occlusion_clamp[2] = occlusion_clamp; + sdfgi_data.normal_bias = (rb->sdfgi->normal_bias / csize) * sdfgi_data.cascade_probe_size[0]; + + //vec2 tex_pixel_size = 1.0 / vec2(ivec2( (OCT_SIZE+2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE+2) * params.probe_axis_size ) ); + //vec3 probe_uv_offset = (ivec3(OCT_SIZE+2,OCT_SIZE+2,(OCT_SIZE+2) * params.probe_axis_size)) * tex_pixel_size.xyx; + + uint32_t oct_size = SDFGI::LIGHTPROBE_OCT_SIZE; + + sdfgi_data.lightprobe_tex_pixel_size[0] = 1.0 / ((oct_size + 2) * sdfgi_data.probe_axis_size * sdfgi_data.probe_axis_size); + sdfgi_data.lightprobe_tex_pixel_size[1] = 1.0 / ((oct_size + 2) * sdfgi_data.probe_axis_size); + sdfgi_data.lightprobe_tex_pixel_size[2] = 1.0; + + sdfgi_data.energy = rb->sdfgi->energy; + + sdfgi_data.lightprobe_uv_offset[0] = float(oct_size + 2) * sdfgi_data.lightprobe_tex_pixel_size[0]; + sdfgi_data.lightprobe_uv_offset[1] = float(oct_size + 2) * sdfgi_data.lightprobe_tex_pixel_size[1]; + sdfgi_data.lightprobe_uv_offset[2] = float((oct_size + 2) * sdfgi_data.probe_axis_size) * sdfgi_data.lightprobe_tex_pixel_size[0]; + + sdfgi_data.occlusion_renormalize[0] = 0.5; + sdfgi_data.occlusion_renormalize[1] = 1.0; + sdfgi_data.occlusion_renormalize[2] = 1.0 / float(sdfgi_data.max_cascades); + + int32_t probe_divisor = rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR; + + for (uint32_t i = 0; i < sdfgi_data.max_cascades; i++) { + GI::SDFGIData::ProbeCascadeData &c = sdfgi_data.cascades[i]; + Vector3 pos = Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + rb->sdfgi->cascades[i].position)) * rb->sdfgi->cascades[i].cell_size; + Vector3 cam_origin = p_transform.origin; + cam_origin.y *= rb->sdfgi->y_mult; + pos -= cam_origin; //make pos local to camera, to reduce numerical error + c.position[0] = pos.x; + c.position[1] = pos.y; + c.position[2] = pos.z; + c.to_probe = 1.0 / (float(rb->sdfgi->cascade_size) * rb->sdfgi->cascades[i].cell_size / float(rb->sdfgi->probe_axis_count - 1)); + + Vector3i probe_ofs = rb->sdfgi->cascades[i].position / probe_divisor; + c.probe_world_offset[0] = probe_ofs.x; + c.probe_world_offset[1] = probe_ofs.y; + c.probe_world_offset[2] = probe_ofs.z; + + c.to_cell = 1.0 / rb->sdfgi->cascades[i].cell_size; + } + + RD::get_singleton()->buffer_update(gi.sdfgi_ubo, 0, sizeof(GI::SDFGIData), &sdfgi_data, RD::BARRIER_MASK_COMPUTE); + + /* Update dynamic lights in SDFGI cascades */ + + for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { + SDFGI::Cascade &cascade = rb->sdfgi->cascades[i]; + + SDGIShader::Light lights[SDFGI::MAX_DYNAMIC_LIGHTS]; + uint32_t idx = 0; + for (uint32_t j = 0; j < (uint32_t)render_state.sdfgi_update_data->directional_lights->size(); j++) { + if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { + break; + } + + LightInstance *li = light_instance_owner.getornull(render_state.sdfgi_update_data->directional_lights->get(j)); + ERR_CONTINUE(!li); + + if (storage->light_directional_is_sky_only(li->light)) { + continue; + } + + Vector3 dir = -li->transform.basis.get_axis(Vector3::AXIS_Z); + dir.y *= rb->sdfgi->y_mult; + dir.normalize(); + lights[idx].direction[0] = dir.x; + lights[idx].direction[1] = dir.y; + lights[idx].direction[2] = dir.z; + Color color = storage->light_get_color(li->light); + color = color.to_linear(); + lights[idx].color[0] = color.r; + lights[idx].color[1] = color.g; + lights[idx].color[2] = color.b; + lights[idx].type = RS::LIGHT_DIRECTIONAL; + lights[idx].energy = storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY); + lights[idx].has_shadow = storage->light_has_shadow(li->light); + + idx++; + } + + AABB cascade_aabb; + cascade_aabb.position = Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + cascade.position)) * cascade.cell_size; + cascade_aabb.size = Vector3(1, 1, 1) * rb->sdfgi->cascade_size * cascade.cell_size; + + for (uint32_t j = 0; j < render_state.sdfgi_update_data->positional_light_count; j++) { + if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { + break; + } + + LightInstance *li = light_instance_owner.getornull(render_state.sdfgi_update_data->positional_light_instances[j]); + ERR_CONTINUE(!li); + + uint32_t max_sdfgi_cascade = storage->light_get_max_sdfgi_cascade(li->light); + if (i > max_sdfgi_cascade) { + continue; + } + + if (!cascade_aabb.intersects(li->aabb)) { + continue; + } + + Vector3 dir = -li->transform.basis.get_axis(Vector3::AXIS_Z); + //faster to not do this here + //dir.y *= rb->sdfgi->y_mult; + //dir.normalize(); + lights[idx].direction[0] = dir.x; + lights[idx].direction[1] = dir.y; + lights[idx].direction[2] = dir.z; + Vector3 pos = li->transform.origin; + pos.y *= rb->sdfgi->y_mult; + lights[idx].position[0] = pos.x; + lights[idx].position[1] = pos.y; + lights[idx].position[2] = pos.z; + Color color = storage->light_get_color(li->light); + color = color.to_linear(); + lights[idx].color[0] = color.r; + lights[idx].color[1] = color.g; + lights[idx].color[2] = color.b; + lights[idx].type = storage->light_get_type(li->light); + lights[idx].energy = storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY); + lights[idx].has_shadow = storage->light_has_shadow(li->light); + lights[idx].attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_ATTENUATION); + lights[idx].radius = storage->light_get_param(li->light, RS::LIGHT_PARAM_RANGE); + lights[idx].cos_spot_angle = Math::cos(Math::deg2rad(storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ANGLE))); + lights[idx].inv_spot_attenuation = 1.0f / storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ATTENUATION); + + idx++; + } + + if (idx > 0) { + RD::get_singleton()->buffer_update(cascade.lights_buffer, 0, idx * sizeof(SDGIShader::Light), lights, RD::BARRIER_MASK_COMPUTE); + } + + rb->sdfgi->cascade_dynamic_light_count[i] = idx; + } + } +} + +void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, const PagedArray<RID> &p_gi_probes) { + RD::get_singleton()->draw_command_begin_label("GI Render"); + + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(rb == nullptr); + Environment *env = environment_owner.getornull(p_environment); + + if (rb->ambient_buffer.is_null() || rb->using_half_size_gi != gi.half_resolution) { + if (rb->ambient_buffer.is_valid()) { + RD::get_singleton()->free(rb->ambient_buffer); + RD::get_singleton()->free(rb->reflection_buffer); + } + + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf.width = rb->width; + tf.height = rb->height; + if (gi.half_resolution) { + tf.width >>= 1; + tf.height >>= 1; + } + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + rb->reflection_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->ambient_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->using_half_size_gi = gi.half_resolution; + + _render_buffers_uniform_set_changed(p_render_buffers); + } + + GI::PushConstant push_constant; + + push_constant.screen_size[0] = rb->width; + push_constant.screen_size[1] = rb->height; + push_constant.z_near = p_projection.get_z_near(); + push_constant.z_far = p_projection.get_z_far(); + push_constant.orthogonal = p_projection.is_orthogonal(); + push_constant.proj_info[0] = -2.0f / (rb->width * p_projection.matrix[0][0]); + push_constant.proj_info[1] = -2.0f / (rb->height * p_projection.matrix[1][1]); + push_constant.proj_info[2] = (1.0f - p_projection.matrix[0][2]) / p_projection.matrix[0][0]; + push_constant.proj_info[3] = (1.0f + p_projection.matrix[1][2]) / p_projection.matrix[1][1]; + push_constant.max_giprobes = MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()); + push_constant.high_quality_vct = gi_probe_quality == RS::GI_PROBE_QUALITY_HIGH; + + bool use_sdfgi = rb->sdfgi != nullptr; + bool use_giprobes = push_constant.max_giprobes > 0; + + if (env) { + push_constant.ao_color[0] = env->ao_color.r; + push_constant.ao_color[1] = env->ao_color.g; + push_constant.ao_color[2] = env->ao_color.b; + } else { + push_constant.ao_color[0] = 0; + push_constant.ao_color[1] = 0; + push_constant.ao_color[2] = 0; + } + + push_constant.cam_rotation[0] = p_transform.basis[0][0]; + push_constant.cam_rotation[1] = p_transform.basis[1][0]; + push_constant.cam_rotation[2] = p_transform.basis[2][0]; + push_constant.cam_rotation[3] = 0; + push_constant.cam_rotation[4] = p_transform.basis[0][1]; + push_constant.cam_rotation[5] = p_transform.basis[1][1]; + push_constant.cam_rotation[6] = p_transform.basis[2][1]; + push_constant.cam_rotation[7] = 0; + push_constant.cam_rotation[8] = p_transform.basis[0][2]; + push_constant.cam_rotation[9] = p_transform.basis[1][2]; + push_constant.cam_rotation[10] = p_transform.basis[2][2]; + push_constant.cam_rotation[11] = 0; + + if (rb->gi_uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(rb->gi_uniform_set)) { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { + if (rb->sdfgi && j < rb->sdfgi->cascades.size()) { + u.ids.push_back(rb->sdfgi->cascades[j].sdf_tex); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 2; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { + if (rb->sdfgi && j < rb->sdfgi->cascades.size()) { + u.ids.push_back(rb->sdfgi->cascades[j].light_tex); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 3; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { + if (rb->sdfgi && j < rb->sdfgi->cascades.size()) { + u.ids.push_back(rb->sdfgi->cascades[j].light_aniso_0_tex); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 4; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { + if (rb->sdfgi && j < rb->sdfgi->cascades.size()) { + u.ids.push_back(rb->sdfgi->cascades[j].light_aniso_1_tex); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 5; + if (rb->sdfgi) { + u.ids.push_back(rb->sdfgi->occlusion_texture); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 6; + u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 7; + u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 9; + u.ids.push_back(rb->ambient_buffer); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 10; + u.ids.push_back(rb->reflection_buffer); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 11; + if (rb->sdfgi) { + u.ids.push_back(rb->sdfgi->lightprobe_texture); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE)); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 12; + u.ids.push_back(rb->depth_texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 13; + u.ids.push_back(p_normal_roughness_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 14; + RID buffer = p_gi_probe_buffer.is_valid() ? p_gi_probe_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); + u.ids.push_back(buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 15; + u.ids.push_back(gi.sdfgi_ubo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 16; + u.ids.push_back(rb->giprobe_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 17; + for (int i = 0; i < RenderBuffers::MAX_GIPROBES; i++) { + u.ids.push_back(rb->giprobe_textures[i]); + } + uniforms.push_back(u); + } + + rb->gi_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi.shader.version_get_shader(gi.shader_version, 0), 0); + } + + GI::Mode mode; + + if (rb->using_half_size_gi) { + mode = (use_sdfgi && use_giprobes) ? GI::MODE_HALF_RES_COMBINED : (use_sdfgi ? GI::MODE_HALF_RES_SDFGI : GI::MODE_HALF_RES_GIPROBE); + } else { + mode = (use_sdfgi && use_giprobes) ? GI::MODE_COMBINED : (use_sdfgi ? GI::MODE_SDFGI : GI::MODE_GIPROBE); + } + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(true); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi.pipelines[mode]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->gi_uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(GI::PushConstant)); + + if (rb->using_half_size_gi) { + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width >> 1, rb->height >> 1, 1); + } else { + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width, rb->height, 1); + } + //do barrier later to allow oeverlap + //RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); //no barriers, let other compute, raster and transfer happen at the same time + RD::get_singleton()->draw_command_end_label(); +} + +RID RendererSceneRenderRD::sky_allocate() { + return sky_owner.allocate_rid(); +} +void RendererSceneRenderRD::sky_initialize(RID p_rid) { + sky_owner.initialize_rid(p_rid, Sky()); +} + +void RendererSceneRenderRD::_sky_invalidate(Sky *p_sky) { + if (!p_sky->dirty) { + p_sky->dirty = true; + p_sky->dirty_list = dirty_sky_list; + dirty_sky_list = p_sky; + } +} + +void RendererSceneRenderRD::sky_set_radiance_size(RID p_sky, int p_radiance_size) { + Sky *sky = sky_owner.getornull(p_sky); + ERR_FAIL_COND(!sky); + ERR_FAIL_COND(p_radiance_size < 32 || p_radiance_size > 2048); + if (sky->radiance_size == p_radiance_size) { + return; + } + sky->radiance_size = p_radiance_size; + + if (sky->mode == RS::SKY_MODE_REALTIME && sky->radiance_size != 256) { + WARN_PRINT("Realtime Skies can only use a radiance size of 256. Radiance size will be set to 256 internally."); + sky->radiance_size = 256; + } + + _sky_invalidate(sky); + if (sky->radiance.is_valid()) { + RD::get_singleton()->free(sky->radiance); + sky->radiance = RID(); + } + _clear_reflection_data(sky->reflection); +} + +void RendererSceneRenderRD::sky_set_mode(RID p_sky, RS::SkyMode p_mode) { + Sky *sky = sky_owner.getornull(p_sky); + ERR_FAIL_COND(!sky); + + if (sky->mode == p_mode) { + return; + } + + sky->mode = p_mode; + + if (sky->mode == RS::SKY_MODE_REALTIME && sky->radiance_size != 256) { + WARN_PRINT("Realtime Skies can only use a radiance size of 256. Radiance size will be set to 256 internally."); + sky_set_radiance_size(p_sky, 256); + } + + _sky_invalidate(sky); + if (sky->radiance.is_valid()) { + RD::get_singleton()->free(sky->radiance); + sky->radiance = RID(); + } + _clear_reflection_data(sky->reflection); +} + +void RendererSceneRenderRD::sky_set_material(RID p_sky, RID p_material) { + Sky *sky = sky_owner.getornull(p_sky); + ERR_FAIL_COND(!sky); + sky->material = p_material; + _sky_invalidate(sky); +} + +Ref<Image> RendererSceneRenderRD::sky_bake_panorama(RID p_sky, float p_energy, bool p_bake_irradiance, const Size2i &p_size) { + Sky *sky = sky_owner.getornull(p_sky); + ERR_FAIL_COND_V(!sky, Ref<Image>()); + + _update_dirty_skys(); + + if (sky->radiance.is_valid()) { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; + tf.width = p_size.width; + tf.height = p_size.height; + tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + + RID rad_tex = RD::get_singleton()->texture_create(tf, RD::TextureView()); + storage->get_effects()->copy_cubemap_to_panorama(sky->radiance, rad_tex, p_size, p_bake_irradiance ? roughness_layers : 0, sky->reflection.layers.size() > 1); + Vector<uint8_t> data = RD::get_singleton()->texture_get_data(rad_tex, 0); + RD::get_singleton()->free(rad_tex); + + Ref<Image> img; + img.instance(); + img->create(p_size.width, p_size.height, false, Image::FORMAT_RGBAF, data); + for (int i = 0; i < p_size.width; i++) { + for (int j = 0; j < p_size.height; j++) { + Color c = img->get_pixel(i, j); + c.r *= p_energy; + c.g *= p_energy; + c.b *= p_energy; + img->set_pixel(i, j, c); + } + } + return img; + } + + return Ref<Image>(); +} + +void RendererSceneRenderRD::_update_dirty_skys() { + Sky *sky = dirty_sky_list; + + while (sky) { + bool texture_set_dirty = false; + //update sky configuration if texture is missing + + if (sky->radiance.is_null()) { + int mipmaps = Image::get_image_required_mipmaps(sky->radiance_size, sky->radiance_size, Image::FORMAT_RGBAH) + 1; + + uint32_t w = sky->radiance_size, h = sky->radiance_size; + int layers = roughness_layers; + if (sky->mode == RS::SKY_MODE_REALTIME) { + layers = 8; + if (roughness_layers != 8) { + WARN_PRINT("When using REALTIME skies, roughness_layers should be set to 8 in the project settings for best quality reflections"); + } + } + + if (sky_use_cubemap_array) { + //array (higher quality, 6 times more memory) + RD::TextureFormat tf; + tf.array_layers = layers * 6; + tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf.texture_type = RD::TEXTURE_TYPE_CUBE_ARRAY; + tf.mipmaps = mipmaps; + tf.width = w; + tf.height = h; + tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + + sky->radiance = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + _update_reflection_data(sky->reflection, sky->radiance_size, mipmaps, true, sky->radiance, 0, sky->mode == RS::SKY_MODE_REALTIME); + + } else { + //regular cubemap, lower quality (aliasing, less memory) + RD::TextureFormat tf; + tf.array_layers = 6; + tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf.texture_type = RD::TEXTURE_TYPE_CUBE; + tf.mipmaps = MIN(mipmaps, layers); + tf.width = w; + tf.height = h; + tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + + sky->radiance = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + _update_reflection_data(sky->reflection, sky->radiance_size, MIN(mipmaps, layers), false, sky->radiance, 0, sky->mode == RS::SKY_MODE_REALTIME); + } + texture_set_dirty = true; + } + + // Create subpass buffers if they haven't been created already + if (sky->half_res_pass.is_null() && !RD::get_singleton()->texture_is_valid(sky->half_res_pass) && sky->screen_size.x >= 4 && sky->screen_size.y >= 4) { + RD::TextureFormat tformat; + tformat.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tformat.width = sky->screen_size.x / 2; + tformat.height = sky->screen_size.y / 2; + tformat.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + tformat.texture_type = RD::TEXTURE_TYPE_2D; + + sky->half_res_pass = RD::get_singleton()->texture_create(tformat, RD::TextureView()); + Vector<RID> texs; + texs.push_back(sky->half_res_pass); + sky->half_res_framebuffer = RD::get_singleton()->framebuffer_create(texs); + texture_set_dirty = true; + } + + if (sky->quarter_res_pass.is_null() && !RD::get_singleton()->texture_is_valid(sky->quarter_res_pass) && sky->screen_size.x >= 4 && sky->screen_size.y >= 4) { + RD::TextureFormat tformat; + tformat.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tformat.width = sky->screen_size.x / 4; + tformat.height = sky->screen_size.y / 4; + tformat.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + tformat.texture_type = RD::TEXTURE_TYPE_2D; + + sky->quarter_res_pass = RD::get_singleton()->texture_create(tformat, RD::TextureView()); + Vector<RID> texs; + texs.push_back(sky->quarter_res_pass); + sky->quarter_res_framebuffer = RD::get_singleton()->framebuffer_create(texs); + texture_set_dirty = true; + } + + if (texture_set_dirty) { + for (int i = 0; i < SKY_TEXTURE_SET_MAX; i++) { + if (sky->texture_uniform_sets[i].is_valid() && RD::get_singleton()->uniform_set_is_valid(sky->texture_uniform_sets[i])) { + RD::get_singleton()->free(sky->texture_uniform_sets[i]); + sky->texture_uniform_sets[i] = RID(); + } + } + } + + sky->reflection.dirty = true; + sky->processing_layer = 0; + + Sky *next = sky->dirty_list; + sky->dirty_list = nullptr; + sky->dirty = false; + sky = next; + } + + dirty_sky_list = nullptr; +} + +RID RendererSceneRenderRD::sky_get_radiance_texture_rd(RID p_sky) const { + Sky *sky = sky_owner.getornull(p_sky); + ERR_FAIL_COND_V(!sky, RID()); + + return sky->radiance; +} + +RID RendererSceneRenderRD::sky_get_radiance_uniform_set_rd(RID p_sky, RID p_shader, int p_set) const { + Sky *sky = sky_owner.getornull(p_sky); + ERR_FAIL_COND_V(!sky, RID()); + + if (sky->uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(sky->uniform_set)) { + sky->uniform_set = RID(); + if (sky->radiance.is_valid()) { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 0; + u.ids.push_back(sky->radiance); + uniforms.push_back(u); + } + + sky->uniform_set = RD::get_singleton()->uniform_set_create(uniforms, p_shader, p_set); + } + } + + return sky->uniform_set; +} + +RID RendererSceneRenderRD::_get_sky_textures(Sky *p_sky, SkyTextureSetVersion p_version) { + if (p_sky->texture_uniform_sets[p_version].is_valid() && RD::get_singleton()->uniform_set_is_valid(p_sky->texture_uniform_sets[p_version])) { + return p_sky->texture_uniform_sets[p_version]; + } + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 0; + if (p_sky->radiance.is_valid() && p_version <= SKY_TEXTURE_SET_QUARTER_RES) { + u.ids.push_back(p_sky->radiance); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_BLACK)); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 1; // half res + if (p_sky->half_res_pass.is_valid() && p_version != SKY_TEXTURE_SET_HALF_RES && p_version != SKY_TEXTURE_SET_CUBEMAP_HALF_RES) { + if (p_version >= SKY_TEXTURE_SET_CUBEMAP) { + u.ids.push_back(p_sky->reflection.layers[0].views[1]); + } else { + u.ids.push_back(p_sky->half_res_pass); + } + } else { + if (p_version < SKY_TEXTURE_SET_CUBEMAP) { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE)); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_BLACK)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 2; // quarter res + if (p_sky->quarter_res_pass.is_valid() && p_version != SKY_TEXTURE_SET_QUARTER_RES && p_version != SKY_TEXTURE_SET_CUBEMAP_QUARTER_RES) { + if (p_version >= SKY_TEXTURE_SET_CUBEMAP) { + u.ids.push_back(p_sky->reflection.layers[0].views[2]); + } else { + u.ids.push_back(p_sky->quarter_res_pass); + } + } else { + if (p_version < SKY_TEXTURE_SET_CUBEMAP) { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE)); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_BLACK)); + } + } + uniforms.push_back(u); + } + + p_sky->texture_uniform_sets[p_version] = RD::get_singleton()->uniform_set_create(uniforms, sky_shader.default_shader_rd, SKY_SET_TEXTURES); + return p_sky->texture_uniform_sets[p_version]; +} + +RID RendererSceneRenderRD::sky_get_material(RID p_sky) const { + Sky *sky = sky_owner.getornull(p_sky); + ERR_FAIL_COND_V(!sky, RID()); + + return sky->material; +} + +void RendererSceneRenderRD::_draw_sky(bool p_can_continue_color, bool p_can_continue_depth, RID p_fb, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform) { + ERR_FAIL_COND(!is_environment(p_environment)); + + SkyMaterialData *material = nullptr; + + Sky *sky = sky_owner.getornull(environment_get_sky(p_environment)); + + RID sky_material; + + RS::EnvironmentBG background = environment_get_background(p_environment); + + if (!(background == RS::ENV_BG_CLEAR_COLOR || background == RS::ENV_BG_COLOR) || sky) { + ERR_FAIL_COND(!sky); + sky_material = sky_get_material(environment_get_sky(p_environment)); + + if (sky_material.is_valid()) { + material = (SkyMaterialData *)storage->material_get_data(sky_material, RendererStorageRD::SHADER_TYPE_SKY); + if (!material || !material->shader_data->valid) { + material = nullptr; + } + } + + if (!material) { + sky_material = sky_shader.default_material; + material = (SkyMaterialData *)storage->material_get_data(sky_material, RendererStorageRD::SHADER_TYPE_SKY); + } + } + + if (background == RS::ENV_BG_CLEAR_COLOR || background == RS::ENV_BG_COLOR) { + sky_material = sky_scene_state.fog_material; + material = (SkyMaterialData *)storage->material_get_data(sky_material, RendererStorageRD::SHADER_TYPE_SKY); + } + + ERR_FAIL_COND(!material); + + SkyShaderData *shader_data = material->shader_data; + + ERR_FAIL_COND(!shader_data); + + Basis sky_transform = environment_get_sky_orientation(p_environment); + sky_transform.invert(); + + float multiplier = environment_get_bg_energy(p_environment); + float custom_fov = environment_get_sky_custom_fov(p_environment); + // Camera + CameraMatrix camera; + + if (custom_fov) { + float near_plane = p_projection.get_z_near(); + float far_plane = p_projection.get_z_far(); + float aspect = p_projection.get_aspect(); + + camera.set_perspective(custom_fov, aspect, near_plane, far_plane); + + } else { + camera = p_projection; + } + + sky_transform = p_transform.basis * sky_transform; + + if (shader_data->uses_quarter_res) { + PipelineCacheRD *pipeline = &shader_data->pipelines[SKY_VERSION_QUARTER_RES]; + + RID texture_uniform_set = _get_sky_textures(sky, SKY_TEXTURE_SET_QUARTER_RES); + + Vector<Color> clear_colors; + clear_colors.push_back(Color(0.0, 0.0, 0.0)); + + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(sky->quarter_res_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_DISCARD, clear_colors); + storage->get_effects()->render_sky(draw_list, time, sky->quarter_res_framebuffer, sky_scene_state.uniform_set, sky_scene_state.fog_uniform_set, pipeline, material->uniform_set, texture_uniform_set, camera, sky_transform, multiplier, p_transform.origin); + RD::get_singleton()->draw_list_end(); + } + + if (shader_data->uses_half_res) { + PipelineCacheRD *pipeline = &shader_data->pipelines[SKY_VERSION_HALF_RES]; + + RID texture_uniform_set = _get_sky_textures(sky, SKY_TEXTURE_SET_HALF_RES); + + Vector<Color> clear_colors; + clear_colors.push_back(Color(0.0, 0.0, 0.0)); + + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(sky->half_res_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_DISCARD, clear_colors); + storage->get_effects()->render_sky(draw_list, time, sky->half_res_framebuffer, sky_scene_state.uniform_set, sky_scene_state.fog_uniform_set, pipeline, material->uniform_set, texture_uniform_set, camera, sky_transform, multiplier, p_transform.origin); + RD::get_singleton()->draw_list_end(); + } + + PipelineCacheRD *pipeline = &shader_data->pipelines[SKY_VERSION_BACKGROUND]; + + RID texture_uniform_set; + if (sky) { + texture_uniform_set = _get_sky_textures(sky, SKY_TEXTURE_SET_BACKGROUND); + } else { + texture_uniform_set = sky_scene_state.fog_only_texture_uniform_set; + } + + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_fb, RD::INITIAL_ACTION_CONTINUE, p_can_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, p_can_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ); + storage->get_effects()->render_sky(draw_list, time, p_fb, sky_scene_state.uniform_set, sky_scene_state.fog_uniform_set, pipeline, material->uniform_set, texture_uniform_set, camera, sky_transform, multiplier, p_transform.origin); + RD::get_singleton()->draw_list_end(); +} + +void RendererSceneRenderRD::_setup_sky(RID p_environment, RID p_render_buffers, const CameraMatrix &p_projection, const Transform &p_transform, const Size2i p_screen_size) { + ERR_FAIL_COND(!is_environment(p_environment)); + + SkyMaterialData *material = nullptr; + + Sky *sky = sky_owner.getornull(environment_get_sky(p_environment)); + + RID sky_material; + + SkyShaderData *shader_data = nullptr; + + RS::EnvironmentBG background = environment_get_background(p_environment); + + if (!(background == RS::ENV_BG_CLEAR_COLOR || background == RS::ENV_BG_COLOR) || sky) { + ERR_FAIL_COND(!sky); + sky_material = sky_get_material(environment_get_sky(p_environment)); + + if (sky_material.is_valid()) { + material = (SkyMaterialData *)storage->material_get_data(sky_material, RendererStorageRD::SHADER_TYPE_SKY); + if (!material || !material->shader_data->valid) { + material = nullptr; + } + } + + if (!material) { + sky_material = sky_shader.default_material; + material = (SkyMaterialData *)storage->material_get_data(sky_material, RendererStorageRD::SHADER_TYPE_SKY); + } + + ERR_FAIL_COND(!material); + + shader_data = material->shader_data; + + ERR_FAIL_COND(!shader_data); + } + + if (sky) { + // Invalidate supbass buffers if screen size changes + if (sky->screen_size != p_screen_size) { + sky->screen_size = p_screen_size; + sky->screen_size.x = sky->screen_size.x < 4 ? 4 : sky->screen_size.x; + sky->screen_size.y = sky->screen_size.y < 4 ? 4 : sky->screen_size.y; + if (shader_data->uses_half_res) { + if (sky->half_res_pass.is_valid()) { + RD::get_singleton()->free(sky->half_res_pass); + sky->half_res_pass = RID(); + } + _sky_invalidate(sky); + } + if (shader_data->uses_quarter_res) { + if (sky->quarter_res_pass.is_valid()) { + RD::get_singleton()->free(sky->quarter_res_pass); + sky->quarter_res_pass = RID(); + } + _sky_invalidate(sky); + } + } + + // Create new subpass buffers if necessary + if ((shader_data->uses_half_res && sky->half_res_pass.is_null()) || + (shader_data->uses_quarter_res && sky->quarter_res_pass.is_null()) || + sky->radiance.is_null()) { + _sky_invalidate(sky); + _update_dirty_skys(); + } + + if (shader_data->uses_time && time - sky->prev_time > 0.00001) { + sky->prev_time = time; + sky->reflection.dirty = true; + RenderingServerDefault::redraw_request(); + } + + if (material != sky->prev_material) { + sky->prev_material = material; + sky->reflection.dirty = true; + } + + if (material->uniform_set_updated) { + material->uniform_set_updated = false; + sky->reflection.dirty = true; + } + + if (!p_transform.origin.is_equal_approx(sky->prev_position) && shader_data->uses_position) { + sky->prev_position = p_transform.origin; + sky->reflection.dirty = true; + } + + if (shader_data->uses_light) { + // Check whether the directional_light_buffer changes + bool light_data_dirty = false; + + if (sky_scene_state.ubo.directional_light_count != sky_scene_state.last_frame_directional_light_count) { + light_data_dirty = true; + for (uint32_t i = sky_scene_state.ubo.directional_light_count; i < sky_scene_state.max_directional_lights; i++) { + sky_scene_state.directional_lights[i].enabled = false; + } + } + if (!light_data_dirty) { + for (uint32_t i = 0; i < sky_scene_state.ubo.directional_light_count; i++) { + if (sky_scene_state.directional_lights[i].direction[0] != sky_scene_state.last_frame_directional_lights[i].direction[0] || + sky_scene_state.directional_lights[i].direction[1] != sky_scene_state.last_frame_directional_lights[i].direction[1] || + sky_scene_state.directional_lights[i].direction[2] != sky_scene_state.last_frame_directional_lights[i].direction[2] || + sky_scene_state.directional_lights[i].energy != sky_scene_state.last_frame_directional_lights[i].energy || + sky_scene_state.directional_lights[i].color[0] != sky_scene_state.last_frame_directional_lights[i].color[0] || + sky_scene_state.directional_lights[i].color[1] != sky_scene_state.last_frame_directional_lights[i].color[1] || + sky_scene_state.directional_lights[i].color[2] != sky_scene_state.last_frame_directional_lights[i].color[2] || + sky_scene_state.directional_lights[i].enabled != sky_scene_state.last_frame_directional_lights[i].enabled || + sky_scene_state.directional_lights[i].size != sky_scene_state.last_frame_directional_lights[i].size) { + light_data_dirty = true; + break; + } + } + } + + if (light_data_dirty) { + RD::get_singleton()->buffer_update(sky_scene_state.directional_light_buffer, 0, sizeof(SkyDirectionalLightData) * sky_scene_state.max_directional_lights, sky_scene_state.directional_lights); + + RendererSceneRenderRD::SkyDirectionalLightData *temp = sky_scene_state.last_frame_directional_lights; + sky_scene_state.last_frame_directional_lights = sky_scene_state.directional_lights; + sky_scene_state.directional_lights = temp; + sky_scene_state.last_frame_directional_light_count = sky_scene_state.ubo.directional_light_count; + sky->reflection.dirty = true; + } + } + } + + //setup fog variables + sky_scene_state.ubo.volumetric_fog_enabled = false; + if (p_render_buffers.is_valid()) { + if (render_buffers_has_volumetric_fog(p_render_buffers)) { + sky_scene_state.ubo.volumetric_fog_enabled = true; + + float fog_end = render_buffers_get_volumetric_fog_end(p_render_buffers); + if (fog_end > 0.0) { + sky_scene_state.ubo.volumetric_fog_inv_length = 1.0 / fog_end; + } else { + sky_scene_state.ubo.volumetric_fog_inv_length = 1.0; + } + + float fog_detail_spread = render_buffers_get_volumetric_fog_detail_spread(p_render_buffers); //reverse lookup + if (fog_detail_spread > 0.0) { + sky_scene_state.ubo.volumetric_fog_detail_spread = 1.0 / fog_detail_spread; + } else { + sky_scene_state.ubo.volumetric_fog_detail_spread = 1.0; + } + } + + RID fog_uniform_set = render_buffers_get_volumetric_fog_sky_uniform_set(p_render_buffers); + + if (fog_uniform_set != RID()) { + sky_scene_state.fog_uniform_set = fog_uniform_set; + } else { + sky_scene_state.fog_uniform_set = sky_scene_state.default_fog_uniform_set; + } + } + + sky_scene_state.ubo.z_far = p_projection.get_z_far(); + sky_scene_state.ubo.fog_enabled = environment_is_fog_enabled(p_environment); + sky_scene_state.ubo.fog_density = environment_get_fog_density(p_environment); + sky_scene_state.ubo.fog_aerial_perspective = environment_get_fog_aerial_perspective(p_environment); + Color fog_color = environment_get_fog_light_color(p_environment).to_linear(); + float fog_energy = environment_get_fog_light_energy(p_environment); + sky_scene_state.ubo.fog_light_color[0] = fog_color.r * fog_energy; + sky_scene_state.ubo.fog_light_color[1] = fog_color.g * fog_energy; + sky_scene_state.ubo.fog_light_color[2] = fog_color.b * fog_energy; + sky_scene_state.ubo.fog_sun_scatter = environment_get_fog_sun_scatter(p_environment); + + RD::get_singleton()->buffer_update(sky_scene_state.uniform_buffer, 0, sizeof(SkySceneState::UBO), &sky_scene_state.ubo); +} + +void RendererSceneRenderRD::_update_sky(RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform) { + ERR_FAIL_COND(!is_environment(p_environment)); + + Sky *sky = sky_owner.getornull(environment_get_sky(p_environment)); + ERR_FAIL_COND(!sky); + + RID sky_material = sky_get_material(environment_get_sky(p_environment)); + + SkyMaterialData *material = nullptr; + + if (sky_material.is_valid()) { + material = (SkyMaterialData *)storage->material_get_data(sky_material, RendererStorageRD::SHADER_TYPE_SKY); + if (!material || !material->shader_data->valid) { + material = nullptr; + } + } + + if (!material) { + sky_material = sky_shader.default_material; + material = (SkyMaterialData *)storage->material_get_data(sky_material, RendererStorageRD::SHADER_TYPE_SKY); + } + + ERR_FAIL_COND(!material); + + SkyShaderData *shader_data = material->shader_data; + + ERR_FAIL_COND(!shader_data); + + float multiplier = environment_get_bg_energy(p_environment); + + bool update_single_frame = sky->mode == RS::SKY_MODE_REALTIME || sky->mode == RS::SKY_MODE_QUALITY; + RS::SkyMode sky_mode = sky->mode; + + if (sky_mode == RS::SKY_MODE_AUTOMATIC) { + if (shader_data->uses_time || shader_data->uses_position) { + update_single_frame = true; + sky_mode = RS::SKY_MODE_REALTIME; + } else if (shader_data->uses_light || shader_data->ubo_size > 0) { + update_single_frame = false; + sky_mode = RS::SKY_MODE_INCREMENTAL; + } else { + update_single_frame = true; + sky_mode = RS::SKY_MODE_QUALITY; + } + } + + if (sky->processing_layer == 0 && sky_mode == RS::SKY_MODE_INCREMENTAL) { + // On the first frame after creating sky, rebuild in single frame + update_single_frame = true; + sky_mode = RS::SKY_MODE_QUALITY; + } + + int max_processing_layer = sky_use_cubemap_array ? sky->reflection.layers.size() : sky->reflection.layers[0].mipmaps.size(); + + // Update radiance cubemap + if (sky->reflection.dirty && (sky->processing_layer >= max_processing_layer || update_single_frame)) { + static const Vector3 view_normals[6] = { + Vector3(+1, 0, 0), + Vector3(-1, 0, 0), + Vector3(0, +1, 0), + Vector3(0, -1, 0), + Vector3(0, 0, +1), + Vector3(0, 0, -1) + }; + static const Vector3 view_up[6] = { + Vector3(0, -1, 0), + Vector3(0, -1, 0), + Vector3(0, 0, +1), + Vector3(0, 0, -1), + Vector3(0, -1, 0), + Vector3(0, -1, 0) + }; + + CameraMatrix cm; + cm.set_perspective(90, 1, 0.01, 10.0); + CameraMatrix correction; + correction.set_depth_correction(true); + cm = correction * cm; + + if (shader_data->uses_quarter_res) { + PipelineCacheRD *pipeline = &shader_data->pipelines[SKY_VERSION_CUBEMAP_QUARTER_RES]; + + Vector<Color> clear_colors; + clear_colors.push_back(Color(0.0, 0.0, 0.0)); + RD::DrawListID cubemap_draw_list; + + for (int i = 0; i < 6; i++) { + Transform local_view; + local_view.set_look_at(Vector3(0, 0, 0), view_normals[i], view_up[i]); + RID texture_uniform_set = _get_sky_textures(sky, SKY_TEXTURE_SET_CUBEMAP_QUARTER_RES); + + cubemap_draw_list = RD::get_singleton()->draw_list_begin(sky->reflection.layers[0].mipmaps[2].framebuffers[i], RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + storage->get_effects()->render_sky(cubemap_draw_list, time, sky->reflection.layers[0].mipmaps[2].framebuffers[i], sky_scene_state.uniform_set, sky_scene_state.fog_uniform_set, pipeline, material->uniform_set, texture_uniform_set, cm, local_view.basis, multiplier, p_transform.origin); + RD::get_singleton()->draw_list_end(); + } + } + + if (shader_data->uses_half_res) { + PipelineCacheRD *pipeline = &shader_data->pipelines[SKY_VERSION_CUBEMAP_HALF_RES]; + + Vector<Color> clear_colors; + clear_colors.push_back(Color(0.0, 0.0, 0.0)); + RD::DrawListID cubemap_draw_list; + + for (int i = 0; i < 6; i++) { + Transform local_view; + local_view.set_look_at(Vector3(0, 0, 0), view_normals[i], view_up[i]); + RID texture_uniform_set = _get_sky_textures(sky, SKY_TEXTURE_SET_CUBEMAP_HALF_RES); + + cubemap_draw_list = RD::get_singleton()->draw_list_begin(sky->reflection.layers[0].mipmaps[1].framebuffers[i], RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + storage->get_effects()->render_sky(cubemap_draw_list, time, sky->reflection.layers[0].mipmaps[1].framebuffers[i], sky_scene_state.uniform_set, sky_scene_state.fog_uniform_set, pipeline, material->uniform_set, texture_uniform_set, cm, local_view.basis, multiplier, p_transform.origin); + RD::get_singleton()->draw_list_end(); + } + } + + RD::DrawListID cubemap_draw_list; + PipelineCacheRD *pipeline = &shader_data->pipelines[SKY_VERSION_CUBEMAP]; + + for (int i = 0; i < 6; i++) { + Transform local_view; + local_view.set_look_at(Vector3(0, 0, 0), view_normals[i], view_up[i]); + RID texture_uniform_set = _get_sky_textures(sky, SKY_TEXTURE_SET_CUBEMAP); + + cubemap_draw_list = RD::get_singleton()->draw_list_begin(sky->reflection.layers[0].mipmaps[0].framebuffers[i], RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + storage->get_effects()->render_sky(cubemap_draw_list, time, sky->reflection.layers[0].mipmaps[0].framebuffers[i], sky_scene_state.uniform_set, sky_scene_state.fog_uniform_set, pipeline, material->uniform_set, texture_uniform_set, cm, local_view.basis, multiplier, p_transform.origin); + RD::get_singleton()->draw_list_end(); + } + + if (sky_mode == RS::SKY_MODE_REALTIME) { + _create_reflection_fast_filter(sky->reflection, sky_use_cubemap_array); + if (sky_use_cubemap_array) { + _update_reflection_mipmaps(sky->reflection, 0, sky->reflection.layers.size()); + } + } else { + if (update_single_frame) { + for (int i = 1; i < max_processing_layer; i++) { + _create_reflection_importance_sample(sky->reflection, sky_use_cubemap_array, 10, i); + } + if (sky_use_cubemap_array) { + _update_reflection_mipmaps(sky->reflection, 0, sky->reflection.layers.size()); + } + } else { + if (sky_use_cubemap_array) { + // Multi-Frame so just update the first array level + _update_reflection_mipmaps(sky->reflection, 0, 1); + } + } + sky->processing_layer = 1; + } + + sky->reflection.dirty = false; + + } else { + if (sky_mode == RS::SKY_MODE_INCREMENTAL && sky->processing_layer < max_processing_layer) { + _create_reflection_importance_sample(sky->reflection, sky_use_cubemap_array, 10, sky->processing_layer); + + if (sky_use_cubemap_array) { + _update_reflection_mipmaps(sky->reflection, sky->processing_layer, sky->processing_layer + 1); + } + + sky->processing_layer++; + } + } +} + +/* SKY SHADER */ + +void RendererSceneRenderRD::SkyShaderData::set_code(const String &p_code) { + //compile + + code = p_code; + valid = false; + ubo_size = 0; + uniforms.clear(); + + if (code == String()) { + return; //just invalid, but no error + } + + ShaderCompilerRD::GeneratedCode gen_code; + ShaderCompilerRD::IdentifierActions actions; + + uses_time = false; + uses_half_res = false; + uses_quarter_res = false; + uses_position = false; + uses_light = false; + + actions.render_mode_flags["use_half_res_pass"] = &uses_half_res; + actions.render_mode_flags["use_quarter_res_pass"] = &uses_quarter_res; + + actions.usage_flag_pointers["TIME"] = &uses_time; + actions.usage_flag_pointers["POSITION"] = &uses_position; + actions.usage_flag_pointers["LIGHT0_ENABLED"] = &uses_light; + actions.usage_flag_pointers["LIGHT0_ENERGY"] = &uses_light; + actions.usage_flag_pointers["LIGHT0_DIRECTION"] = &uses_light; + actions.usage_flag_pointers["LIGHT0_COLOR"] = &uses_light; + actions.usage_flag_pointers["LIGHT0_SIZE"] = &uses_light; + actions.usage_flag_pointers["LIGHT1_ENABLED"] = &uses_light; + actions.usage_flag_pointers["LIGHT1_ENERGY"] = &uses_light; + actions.usage_flag_pointers["LIGHT1_DIRECTION"] = &uses_light; + actions.usage_flag_pointers["LIGHT1_COLOR"] = &uses_light; + actions.usage_flag_pointers["LIGHT1_SIZE"] = &uses_light; + actions.usage_flag_pointers["LIGHT2_ENABLED"] = &uses_light; + actions.usage_flag_pointers["LIGHT2_ENERGY"] = &uses_light; + actions.usage_flag_pointers["LIGHT2_DIRECTION"] = &uses_light; + actions.usage_flag_pointers["LIGHT2_COLOR"] = &uses_light; + actions.usage_flag_pointers["LIGHT2_SIZE"] = &uses_light; + actions.usage_flag_pointers["LIGHT3_ENABLED"] = &uses_light; + actions.usage_flag_pointers["LIGHT3_ENERGY"] = &uses_light; + actions.usage_flag_pointers["LIGHT3_DIRECTION"] = &uses_light; + actions.usage_flag_pointers["LIGHT3_COLOR"] = &uses_light; + actions.usage_flag_pointers["LIGHT3_SIZE"] = &uses_light; + + actions.uniforms = &uniforms; + + RendererSceneRenderRD *scene_singleton = (RendererSceneRenderRD *)RendererSceneRenderRD::singleton; + + Error err = scene_singleton->sky_shader.compiler.compile(RS::SHADER_SKY, code, &actions, path, gen_code); + + ERR_FAIL_COND(err != OK); + + if (version.is_null()) { + version = scene_singleton->sky_shader.shader.version_create(); + } + +#if 0 + print_line("**compiling shader:"); + print_line("**defines:\n"); + for (int i = 0; i < gen_code.defines.size(); i++) { + print_line(gen_code.defines[i]); + } + print_line("\n**uniforms:\n" + gen_code.uniforms); + // print_line("\n**vertex_globals:\n" + gen_code.vertex_global); + // print_line("\n**vertex_code:\n" + gen_code.vertex); + print_line("\n**fragment_globals:\n" + gen_code.fragment_global); + print_line("\n**fragment_code:\n" + gen_code.fragment); + print_line("\n**light_code:\n" + gen_code.light); +#endif + + scene_singleton->sky_shader.shader.version_set_code(version, gen_code.uniforms, gen_code.vertex_global, gen_code.vertex, gen_code.fragment_global, gen_code.light, gen_code.fragment, gen_code.defines); + ERR_FAIL_COND(!scene_singleton->sky_shader.shader.version_is_valid(version)); + + ubo_size = gen_code.uniform_total_size; + ubo_offsets = gen_code.uniform_offsets; + texture_uniforms = gen_code.texture_uniforms; + + //update pipelines + + for (int i = 0; i < SKY_VERSION_MAX; i++) { + RD::PipelineDepthStencilState depth_stencil_state; + depth_stencil_state.enable_depth_test = true; + depth_stencil_state.depth_compare_operator = RD::COMPARE_OP_LESS_OR_EQUAL; + + RID shader_variant = scene_singleton->sky_shader.shader.version_get_shader(version, i); + pipelines[i].setup(shader_variant, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), depth_stencil_state, RD::PipelineColorBlendState::create_disabled(), 0); + } + + valid = true; +} + +void RendererSceneRenderRD::SkyShaderData::set_default_texture_param(const StringName &p_name, RID p_texture) { + if (!p_texture.is_valid()) { + default_texture_params.erase(p_name); + } else { + default_texture_params[p_name] = p_texture; + } +} + +void RendererSceneRenderRD::SkyShaderData::get_param_list(List<PropertyInfo> *p_param_list) const { + Map<int, StringName> order; + + for (Map<StringName, ShaderLanguage::ShaderNode::Uniform>::Element *E = uniforms.front(); E; E = E->next()) { + if (E->get().scope == ShaderLanguage::ShaderNode::Uniform::SCOPE_GLOBAL || E->get().scope == ShaderLanguage::ShaderNode::Uniform::SCOPE_INSTANCE) { + continue; + } + + if (E->get().texture_order >= 0) { + order[E->get().texture_order + 100000] = E->key(); + } else { + order[E->get().order] = E->key(); + } + } + + for (Map<int, StringName>::Element *E = order.front(); E; E = E->next()) { + PropertyInfo pi = ShaderLanguage::uniform_to_property_info(uniforms[E->get()]); + pi.name = E->get(); + p_param_list->push_back(pi); + } +} + +void RendererSceneRenderRD::SkyShaderData::get_instance_param_list(List<RendererStorage::InstanceShaderParam> *p_param_list) const { + for (Map<StringName, ShaderLanguage::ShaderNode::Uniform>::Element *E = uniforms.front(); E; E = E->next()) { + if (E->get().scope != ShaderLanguage::ShaderNode::Uniform::SCOPE_INSTANCE) { + continue; + } + + RendererStorage::InstanceShaderParam p; + p.info = ShaderLanguage::uniform_to_property_info(E->get()); + p.info.name = E->key(); //supply name + p.index = E->get().instance_index; + p.default_value = ShaderLanguage::constant_value_to_variant(E->get().default_value, E->get().type, E->get().hint); + p_param_list->push_back(p); + } +} + +bool RendererSceneRenderRD::SkyShaderData::is_param_texture(const StringName &p_param) const { + if (!uniforms.has(p_param)) { + return false; + } + + return uniforms[p_param].texture_order >= 0; +} + +bool RendererSceneRenderRD::SkyShaderData::is_animated() const { + return false; +} + +bool RendererSceneRenderRD::SkyShaderData::casts_shadows() const { + return false; +} + +Variant RendererSceneRenderRD::SkyShaderData::get_default_parameter(const StringName &p_parameter) const { + if (uniforms.has(p_parameter)) { + ShaderLanguage::ShaderNode::Uniform uniform = uniforms[p_parameter]; + Vector<ShaderLanguage::ConstantNode::Value> default_value = uniform.default_value; + return ShaderLanguage::constant_value_to_variant(default_value, uniform.type, uniform.hint); + } + return Variant(); +} + +RS::ShaderNativeSourceCode RendererSceneRenderRD::SkyShaderData::get_native_source_code() const { + RendererSceneRenderRD *scene_singleton = (RendererSceneRenderRD *)RendererSceneRenderRD::singleton; + + return scene_singleton->sky_shader.shader.version_get_native_source_code(version); +} + +RendererSceneRenderRD::SkyShaderData::SkyShaderData() { + valid = false; +} + +RendererSceneRenderRD::SkyShaderData::~SkyShaderData() { + RendererSceneRenderRD *scene_singleton = (RendererSceneRenderRD *)RendererSceneRenderRD::singleton; + ERR_FAIL_COND(!scene_singleton); + //pipeline variants will clear themselves if shader is gone + if (version.is_valid()) { + scene_singleton->sky_shader.shader.version_free(version); + } +} + +RendererStorageRD::ShaderData *RendererSceneRenderRD::_create_sky_shader_func() { + SkyShaderData *shader_data = memnew(SkyShaderData); + return shader_data; +} + +void RendererSceneRenderRD::SkyMaterialData::update_parameters(const Map<StringName, Variant> &p_parameters, bool p_uniform_dirty, bool p_textures_dirty) { + RendererSceneRenderRD *scene_singleton = (RendererSceneRenderRD *)RendererSceneRenderRD::singleton; + + uniform_set_updated = true; + + if ((uint32_t)ubo_data.size() != shader_data->ubo_size) { + p_uniform_dirty = true; + if (uniform_buffer.is_valid()) { + RD::get_singleton()->free(uniform_buffer); + uniform_buffer = RID(); + } + + ubo_data.resize(shader_data->ubo_size); + if (ubo_data.size()) { + uniform_buffer = RD::get_singleton()->uniform_buffer_create(ubo_data.size()); + memset(ubo_data.ptrw(), 0, ubo_data.size()); //clear + } + + //clear previous uniform set + if (uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + RD::get_singleton()->free(uniform_set); + uniform_set = RID(); + } + } + + //check whether buffer changed + if (p_uniform_dirty && ubo_data.size()) { + update_uniform_buffer(shader_data->uniforms, shader_data->ubo_offsets.ptr(), p_parameters, ubo_data.ptrw(), ubo_data.size(), false); + RD::get_singleton()->buffer_update(uniform_buffer, 0, ubo_data.size(), ubo_data.ptrw()); + } + + uint32_t tex_uniform_count = shader_data->texture_uniforms.size(); + + if ((uint32_t)texture_cache.size() != tex_uniform_count) { + texture_cache.resize(tex_uniform_count); + p_textures_dirty = true; + + //clear previous uniform set + if (uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + RD::get_singleton()->free(uniform_set); + uniform_set = RID(); + } + } + + if (p_textures_dirty && tex_uniform_count) { + update_textures(p_parameters, shader_data->default_texture_params, shader_data->texture_uniforms, texture_cache.ptrw(), true); + } + + if (shader_data->ubo_size == 0 && shader_data->texture_uniforms.size() == 0) { + // This material does not require an uniform set, so don't create it. + return; + } + + if (!p_textures_dirty && uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + //no reason to update uniform set, only UBO (or nothing) was needed to update + return; + } + + Vector<RD::Uniform> uniforms; + + { + if (shader_data->ubo_size) { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 0; + u.ids.push_back(uniform_buffer); + uniforms.push_back(u); + } + + const RID *textures = texture_cache.ptrw(); + for (uint32_t i = 0; i < tex_uniform_count; i++) { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 1 + i; + u.ids.push_back(textures[i]); + uniforms.push_back(u); + } + } + + uniform_set = RD::get_singleton()->uniform_set_create(uniforms, scene_singleton->sky_shader.shader.version_get_shader(shader_data->version, 0), SKY_SET_MATERIAL); +} + +RendererSceneRenderRD::SkyMaterialData::~SkyMaterialData() { + if (uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + RD::get_singleton()->free(uniform_set); + } + + if (uniform_buffer.is_valid()) { + RD::get_singleton()->free(uniform_buffer); + } +} + +RendererStorageRD::MaterialData *RendererSceneRenderRD::_create_sky_material_func(SkyShaderData *p_shader) { + SkyMaterialData *material_data = memnew(SkyMaterialData); + material_data->shader_data = p_shader; + material_data->last_frame = false; + //update will happen later anyway so do nothing. + return material_data; +} + +RID RendererSceneRenderRD::environment_allocate() { + return environment_owner.allocate_rid(); +} +void RendererSceneRenderRD::environment_initialize(RID p_rid) { + environment_owner.initialize_rid(p_rid, Environment()); +} + +void RendererSceneRenderRD::environment_set_background(RID p_env, RS::EnvironmentBG p_bg) { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + env->background = p_bg; +} + +void RendererSceneRenderRD::environment_set_sky(RID p_env, RID p_sky) { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + env->sky = p_sky; +} + +void RendererSceneRenderRD::environment_set_sky_custom_fov(RID p_env, float p_scale) { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + env->sky_custom_fov = p_scale; +} + +void RendererSceneRenderRD::environment_set_sky_orientation(RID p_env, const Basis &p_orientation) { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + env->sky_orientation = p_orientation; +} + +void RendererSceneRenderRD::environment_set_bg_color(RID p_env, const Color &p_color) { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + env->bg_color = p_color; +} + +void RendererSceneRenderRD::environment_set_bg_energy(RID p_env, float p_energy) { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + env->bg_energy = p_energy; +} + +void RendererSceneRenderRD::environment_set_canvas_max_layer(RID p_env, int p_max_layer) { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + env->canvas_max_layer = p_max_layer; +} + +void RendererSceneRenderRD::environment_set_ambient_light(RID p_env, const Color &p_color, RS::EnvironmentAmbientSource p_ambient, float p_energy, float p_sky_contribution, RS::EnvironmentReflectionSource p_reflection_source, const Color &p_ao_color) { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + env->ambient_light = p_color; + env->ambient_source = p_ambient; + env->ambient_light_energy = p_energy; + env->ambient_sky_contribution = p_sky_contribution; + env->reflection_source = p_reflection_source; + env->ao_color = p_ao_color; +} + +RS::EnvironmentBG RendererSceneRenderRD::environment_get_background(RID p_env) const { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, RS::ENV_BG_MAX); + return env->background; +} + +RID RendererSceneRenderRD::environment_get_sky(RID p_env) const { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, RID()); + return env->sky; +} + +float RendererSceneRenderRD::environment_get_sky_custom_fov(RID p_env) const { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, 0); + return env->sky_custom_fov; +} + +Basis RendererSceneRenderRD::environment_get_sky_orientation(RID p_env) const { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, Basis()); + return env->sky_orientation; +} + +Color RendererSceneRenderRD::environment_get_bg_color(RID p_env) const { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, Color()); + return env->bg_color; +} + +float RendererSceneRenderRD::environment_get_bg_energy(RID p_env) const { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, 0); + return env->bg_energy; +} + +int RendererSceneRenderRD::environment_get_canvas_max_layer(RID p_env) const { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, 0); + return env->canvas_max_layer; +} + +Color RendererSceneRenderRD::environment_get_ambient_light_color(RID p_env) const { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, Color()); + return env->ambient_light; +} + +RS::EnvironmentAmbientSource RendererSceneRenderRD::environment_get_ambient_source(RID p_env) const { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, RS::ENV_AMBIENT_SOURCE_BG); + return env->ambient_source; +} + +float RendererSceneRenderRD::environment_get_ambient_light_energy(RID p_env) const { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, 0); + return env->ambient_light_energy; +} + +float RendererSceneRenderRD::environment_get_ambient_sky_contribution(RID p_env) const { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, 0); + return env->ambient_sky_contribution; +} + +RS::EnvironmentReflectionSource RendererSceneRenderRD::environment_get_reflection_source(RID p_env) const { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, RS::ENV_REFLECTION_SOURCE_DISABLED); + return env->reflection_source; +} + +Color RendererSceneRenderRD::environment_get_ao_color(RID p_env) const { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, Color()); + return env->ao_color; +} + +void RendererSceneRenderRD::environment_set_tonemap(RID p_env, RS::EnvironmentToneMapper p_tone_mapper, float p_exposure, float p_white, bool p_auto_exposure, float p_min_luminance, float p_max_luminance, float p_auto_exp_speed, float p_auto_exp_scale) { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + env->exposure = p_exposure; + env->tone_mapper = p_tone_mapper; + if (!env->auto_exposure && p_auto_exposure) { + env->auto_exposure_version = ++auto_exposure_counter; + } + env->auto_exposure = p_auto_exposure; + env->white = p_white; + env->min_luminance = p_min_luminance; + env->max_luminance = p_max_luminance; + env->auto_exp_speed = p_auto_exp_speed; + env->auto_exp_scale = p_auto_exp_scale; +} + +void RendererSceneRenderRD::environment_set_glow(RID p_env, bool p_enable, Vector<float> p_levels, float p_intensity, float p_strength, float p_mix, float p_bloom_threshold, RS::EnvironmentGlowBlendMode p_blend_mode, float p_hdr_bleed_threshold, float p_hdr_bleed_scale, float p_hdr_luminance_cap) { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + ERR_FAIL_COND_MSG(p_levels.size() != 7, "Size of array of glow levels must be 7"); + env->glow_enabled = p_enable; + env->glow_levels = p_levels; + env->glow_intensity = p_intensity; + env->glow_strength = p_strength; + env->glow_mix = p_mix; + env->glow_bloom = p_bloom_threshold; + env->glow_blend_mode = p_blend_mode; + env->glow_hdr_bleed_threshold = p_hdr_bleed_threshold; + env->glow_hdr_bleed_scale = p_hdr_bleed_scale; + env->glow_hdr_luminance_cap = p_hdr_luminance_cap; +} + +void RendererSceneRenderRD::environment_glow_set_use_bicubic_upscale(bool p_enable) { + glow_bicubic_upscale = p_enable; +} + +void RendererSceneRenderRD::environment_glow_set_use_high_quality(bool p_enable) { + glow_high_quality = p_enable; +} + +void RendererSceneRenderRD::environment_set_sdfgi(RID p_env, bool p_enable, RS::EnvironmentSDFGICascades p_cascades, float p_min_cell_size, RS::EnvironmentSDFGIYScale p_y_scale, bool p_use_occlusion, float p_bounce_feedback, bool p_read_sky, float p_energy, float p_normal_bias, float p_probe_bias) { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + + if (low_end) { + return; + } + + env->sdfgi_enabled = p_enable; + env->sdfgi_cascades = p_cascades; + env->sdfgi_min_cell_size = p_min_cell_size; + env->sdfgi_use_occlusion = p_use_occlusion; + env->sdfgi_bounce_feedback = p_bounce_feedback; + env->sdfgi_read_sky_light = p_read_sky; + env->sdfgi_energy = p_energy; + env->sdfgi_normal_bias = p_normal_bias; + env->sdfgi_probe_bias = p_probe_bias; + env->sdfgi_y_scale = p_y_scale; +} + +void RendererSceneRenderRD::environment_set_fog(RID p_env, bool p_enable, const Color &p_light_color, float p_light_energy, float p_sun_scatter, float p_density, float p_height, float p_height_density, float p_fog_aerial_perspective) { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + + env->fog_enabled = p_enable; + env->fog_light_color = p_light_color; + env->fog_light_energy = p_light_energy; + env->fog_sun_scatter = p_sun_scatter; + env->fog_density = p_density; + env->fog_height = p_height; + env->fog_height_density = p_height_density; + env->fog_aerial_perspective = p_fog_aerial_perspective; +} + +bool RendererSceneRenderRD::environment_is_fog_enabled(RID p_env) const { + const Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, false); + + return env->fog_enabled; +} +Color RendererSceneRenderRD::environment_get_fog_light_color(RID p_env) const { + const Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, Color()); + return env->fog_light_color; +} +float RendererSceneRenderRD::environment_get_fog_light_energy(RID p_env) const { + const Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, 0); + return env->fog_light_energy; +} +float RendererSceneRenderRD::environment_get_fog_sun_scatter(RID p_env) const { + const Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, 0); + return env->fog_sun_scatter; +} +float RendererSceneRenderRD::environment_get_fog_density(RID p_env) const { + const Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, 0); + return env->fog_density; +} +float RendererSceneRenderRD::environment_get_fog_height(RID p_env) const { + const Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, 0); + + return env->fog_height; +} +float RendererSceneRenderRD::environment_get_fog_height_density(RID p_env) const { + const Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, 0); + return env->fog_height_density; +} + +float RendererSceneRenderRD::environment_get_fog_aerial_perspective(RID p_env) const { + const Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, 0); + return env->fog_aerial_perspective; +} + +void RendererSceneRenderRD::environment_set_volumetric_fog(RID p_env, bool p_enable, float p_density, const Color &p_light, float p_light_energy, float p_length, float p_detail_spread, float p_gi_inject, bool p_temporal_reprojection, float p_temporal_reprojection_amount) { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + + if (low_end) { + return; + } + + env->volumetric_fog_enabled = p_enable; + env->volumetric_fog_density = p_density; + env->volumetric_fog_light = p_light; + env->volumetric_fog_light_energy = p_light_energy; + env->volumetric_fog_length = p_length; + env->volumetric_fog_detail_spread = p_detail_spread; + env->volumetric_fog_gi_inject = p_gi_inject; + env->volumetric_fog_temporal_reprojection = p_temporal_reprojection; + env->volumetric_fog_temporal_reprojection_amount = p_temporal_reprojection_amount; +} + +void RendererSceneRenderRD::environment_set_volumetric_fog_volume_size(int p_size, int p_depth) { + volumetric_fog_size = p_size; + volumetric_fog_depth = p_depth; +} + +void RendererSceneRenderRD::environment_set_volumetric_fog_filter_active(bool p_enable) { + volumetric_fog_filter_active = p_enable; +} + +void RendererSceneRenderRD::environment_set_sdfgi_ray_count(RS::EnvironmentSDFGIRayCount p_ray_count) { + sdfgi_ray_count = p_ray_count; +} + +void RendererSceneRenderRD::environment_set_sdfgi_frames_to_converge(RS::EnvironmentSDFGIFramesToConverge p_frames) { + sdfgi_frames_to_converge = p_frames; +} +void RendererSceneRenderRD::environment_set_sdfgi_frames_to_update_light(RS::EnvironmentSDFGIFramesToUpdateLight p_update) { + sdfgi_frames_to_update_light = p_update; +} + +void RendererSceneRenderRD::environment_set_ssr(RID p_env, bool p_enable, int p_max_steps, float p_fade_int, float p_fade_out, float p_depth_tolerance) { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + + if (low_end) { + return; + } + + env->ssr_enabled = p_enable; + env->ssr_max_steps = p_max_steps; + env->ssr_fade_in = p_fade_int; + env->ssr_fade_out = p_fade_out; + env->ssr_depth_tolerance = p_depth_tolerance; +} + +void RendererSceneRenderRD::environment_set_ssr_roughness_quality(RS::EnvironmentSSRRoughnessQuality p_quality) { + ssr_roughness_quality = p_quality; +} + +RS::EnvironmentSSRRoughnessQuality RendererSceneRenderRD::environment_get_ssr_roughness_quality() const { + return ssr_roughness_quality; +} + +void RendererSceneRenderRD::environment_set_ssao(RID p_env, bool p_enable, float p_radius, float p_intensity, float p_power, float p_detail, float p_horizon, float p_sharpness, float p_light_affect, float p_ao_channel_affect) { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + + if (low_end) { + return; + } + + env->ssao_enabled = p_enable; + env->ssao_radius = p_radius; + env->ssao_intensity = p_intensity; + env->ssao_power = p_power; + env->ssao_detail = p_detail; + env->ssao_horizon = p_horizon; + env->ssao_sharpness = p_sharpness; + env->ssao_direct_light_affect = p_light_affect; + env->ssao_ao_channel_affect = p_ao_channel_affect; +} + +void RendererSceneRenderRD::environment_set_ssao_quality(RS::EnvironmentSSAOQuality p_quality, bool p_half_size, float p_adaptive_target, int p_blur_passes, float p_fadeout_from, float p_fadeout_to) { + ssao_quality = p_quality; + ssao_half_size = p_half_size; + ssao_adaptive_target = p_adaptive_target; + ssao_blur_passes = p_blur_passes; + ssao_fadeout_from = p_fadeout_from; + ssao_fadeout_to = p_fadeout_to; +} + +bool RendererSceneRenderRD::environment_is_ssao_enabled(RID p_env) const { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, false); + return env->ssao_enabled; +} + +float RendererSceneRenderRD::environment_get_ssao_ao_affect(RID p_env) const { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, 0.0); + return env->ssao_ao_channel_affect; +} + +float RendererSceneRenderRD::environment_get_ssao_light_affect(RID p_env) const { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, 0.0); + return env->ssao_direct_light_affect; +} + +bool RendererSceneRenderRD::environment_is_ssr_enabled(RID p_env) const { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, false); + return env->ssr_enabled; +} +bool RendererSceneRenderRD::environment_is_sdfgi_enabled(RID p_env) const { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, false); + return env->sdfgi_enabled; +} + +bool RendererSceneRenderRD::is_environment(RID p_env) const { + return environment_owner.owns(p_env); +} + +Ref<Image> RendererSceneRenderRD::environment_bake_panorama(RID p_env, bool p_bake_irradiance, const Size2i &p_size) { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND_V(!env, Ref<Image>()); + + if (env->background == RS::ENV_BG_CAMERA_FEED || env->background == RS::ENV_BG_CANVAS || env->background == RS::ENV_BG_KEEP) { + return Ref<Image>(); //nothing to bake + } + + if (env->background == RS::ENV_BG_CLEAR_COLOR || env->background == RS::ENV_BG_COLOR) { + Color color; + if (env->background == RS::ENV_BG_CLEAR_COLOR) { + color = storage->get_default_clear_color(); + } else { + color = env->bg_color; + } + color.r *= env->bg_energy; + color.g *= env->bg_energy; + color.b *= env->bg_energy; + + Ref<Image> ret; + ret.instance(); + ret->create(p_size.width, p_size.height, false, Image::FORMAT_RGBAF); + for (int i = 0; i < p_size.width; i++) { + for (int j = 0; j < p_size.height; j++) { + ret->set_pixel(i, j, color); + } + } + return ret; + } + + if (env->background == RS::ENV_BG_SKY && env->sky.is_valid()) { + return sky_bake_panorama(env->sky, env->bg_energy, p_bake_irradiance, p_size); + } + + return Ref<Image>(); +} + +//////////////////////////////////////////////////////////// + +RID RendererSceneRenderRD::reflection_atlas_create() { + ReflectionAtlas ra; + ra.count = GLOBAL_GET("rendering/reflections/reflection_atlas/reflection_count"); + ra.size = GLOBAL_GET("rendering/reflections/reflection_atlas/reflection_size"); + + ra.cluster_builder = memnew(ClusterBuilderRD); + ra.cluster_builder->set_shared(&cluster_builder_shared); + ra.cluster_builder->setup(Size2i(ra.size, ra.size), max_cluster_elements, RID(), RID(), RID()); + + return reflection_atlas_owner.make_rid(ra); +} + +void RendererSceneRenderRD::reflection_atlas_set_size(RID p_ref_atlas, int p_reflection_size, int p_reflection_count) { + ReflectionAtlas *ra = reflection_atlas_owner.getornull(p_ref_atlas); + ERR_FAIL_COND(!ra); + + if (ra->size == p_reflection_size && ra->count == p_reflection_count) { + return; //no changes + } + + ra->cluster_builder->setup(Size2i(ra->size, ra->size), max_cluster_elements, RID(), RID(), RID()); + + ra->size = p_reflection_size; + ra->count = p_reflection_count; + + if (ra->reflection.is_valid()) { + //clear and invalidate everything + RD::get_singleton()->free(ra->reflection); + ra->reflection = RID(); + RD::get_singleton()->free(ra->depth_buffer); + ra->depth_buffer = RID(); + for (int i = 0; i < ra->reflections.size(); i++) { + _clear_reflection_data(ra->reflections.write[i].data); + if (ra->reflections[i].owner.is_null()) { + continue; + } + reflection_probe_release_atlas_index(ra->reflections[i].owner); + //rp->atlasindex clear + } + + ra->reflections.clear(); + } +} + +int RendererSceneRenderRD::reflection_atlas_get_size(RID p_ref_atlas) const { + ReflectionAtlas *ra = reflection_atlas_owner.getornull(p_ref_atlas); + ERR_FAIL_COND_V(!ra, 0); + + return ra->size; +} + +//////////////////////// +RID RendererSceneRenderRD::reflection_probe_instance_create(RID p_probe) { + ReflectionProbeInstance rpi; + rpi.probe = p_probe; + return reflection_probe_instance_owner.make_rid(rpi); +} + +void RendererSceneRenderRD::reflection_probe_instance_set_transform(RID p_instance, const Transform &p_transform) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND(!rpi); + + rpi->transform = p_transform; + rpi->dirty = true; +} + +void RendererSceneRenderRD::reflection_probe_release_atlas_index(RID p_instance) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND(!rpi); + + if (rpi->atlas.is_null()) { + return; //nothing to release + } + ReflectionAtlas *atlas = reflection_atlas_owner.getornull(rpi->atlas); + ERR_FAIL_COND(!atlas); + ERR_FAIL_INDEX(rpi->atlas_index, atlas->reflections.size()); + atlas->reflections.write[rpi->atlas_index].owner = RID(); + rpi->atlas_index = -1; + rpi->atlas = RID(); +} + +bool RendererSceneRenderRD::reflection_probe_instance_needs_redraw(RID p_instance) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND_V(!rpi, false); + + if (rpi->rendering) { + return false; + } + + if (rpi->dirty) { + return true; + } + + if (storage->reflection_probe_get_update_mode(rpi->probe) == RS::REFLECTION_PROBE_UPDATE_ALWAYS) { + return true; + } + + return rpi->atlas_index == -1; +} + +bool RendererSceneRenderRD::reflection_probe_instance_has_reflection(RID p_instance) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND_V(!rpi, false); + + return rpi->atlas.is_valid(); +} + +bool RendererSceneRenderRD::reflection_probe_instance_begin_render(RID p_instance, RID p_reflection_atlas) { + ReflectionAtlas *atlas = reflection_atlas_owner.getornull(p_reflection_atlas); + + ERR_FAIL_COND_V(!atlas, false); + + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND_V(!rpi, false); + + if (storage->reflection_probe_get_update_mode(rpi->probe) == RS::REFLECTION_PROBE_UPDATE_ALWAYS && atlas->reflection.is_valid() && atlas->size != 256) { + WARN_PRINT("ReflectionProbes set to UPDATE_ALWAYS must have an atlas size of 256. Please update the atlas size in the ProjectSettings."); + reflection_atlas_set_size(p_reflection_atlas, 256, atlas->count); + } + + if (storage->reflection_probe_get_update_mode(rpi->probe) == RS::REFLECTION_PROBE_UPDATE_ALWAYS && atlas->reflection.is_valid() && atlas->reflections[0].data.layers[0].mipmaps.size() != 8) { + // Invalidate reflection atlas, need to regenerate + RD::get_singleton()->free(atlas->reflection); + atlas->reflection = RID(); + + for (int i = 0; i < atlas->reflections.size(); i++) { + if (atlas->reflections[i].owner.is_null()) { + continue; + } + reflection_probe_release_atlas_index(atlas->reflections[i].owner); + } + + atlas->reflections.clear(); + } + + if (atlas->reflection.is_null()) { + int mipmaps = MIN(roughness_layers, Image::get_image_required_mipmaps(atlas->size, atlas->size, Image::FORMAT_RGBAH) + 1); + mipmaps = storage->reflection_probe_get_update_mode(rpi->probe) == RS::REFLECTION_PROBE_UPDATE_ALWAYS ? 8 : mipmaps; // always use 8 mipmaps with real time filtering + { + //reflection atlas was unused, create: + RD::TextureFormat tf; + tf.array_layers = 6 * atlas->count; + tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf.texture_type = RD::TEXTURE_TYPE_CUBE_ARRAY; + tf.mipmaps = mipmaps; + tf.width = atlas->size; + tf.height = atlas->size; + tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + + atlas->reflection = RD::get_singleton()->texture_create(tf, RD::TextureView()); + } + { + RD::TextureFormat tf; + tf.format = RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_D32_SFLOAT, RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ? RD::DATA_FORMAT_D32_SFLOAT : RD::DATA_FORMAT_X8_D24_UNORM_PACK32; + tf.width = atlas->size; + tf.height = atlas->size; + tf.usage_bits = RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; + atlas->depth_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); + } + atlas->reflections.resize(atlas->count); + for (int i = 0; i < atlas->count; i++) { + _update_reflection_data(atlas->reflections.write[i].data, atlas->size, mipmaps, false, atlas->reflection, i * 6, storage->reflection_probe_get_update_mode(rpi->probe) == RS::REFLECTION_PROBE_UPDATE_ALWAYS); + for (int j = 0; j < 6; j++) { + Vector<RID> fb; + fb.push_back(atlas->reflections.write[i].data.layers[0].mipmaps[0].views[j]); + fb.push_back(atlas->depth_buffer); + atlas->reflections.write[i].fbs[j] = RD::get_singleton()->framebuffer_create(fb); + } + } + + Vector<RID> fb; + fb.push_back(atlas->depth_buffer); + atlas->depth_fb = RD::get_singleton()->framebuffer_create(fb); + } + + if (rpi->atlas_index == -1) { + for (int i = 0; i < atlas->reflections.size(); i++) { + if (atlas->reflections[i].owner.is_null()) { + rpi->atlas_index = i; + break; + } + } + //find the one used last + if (rpi->atlas_index == -1) { + //everything is in use, find the one least used via LRU + uint64_t pass_min = 0; + + for (int i = 0; i < atlas->reflections.size(); i++) { + ReflectionProbeInstance *rpi2 = reflection_probe_instance_owner.getornull(atlas->reflections[i].owner); + if (rpi2->last_pass < pass_min) { + pass_min = rpi2->last_pass; + rpi->atlas_index = i; + } + } + } + } + + rpi->atlas = p_reflection_atlas; + rpi->rendering = true; + rpi->dirty = false; + rpi->processing_layer = 1; + rpi->processing_side = 0; + + return true; +} + +bool RendererSceneRenderRD::reflection_probe_instance_postprocess_step(RID p_instance) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND_V(!rpi, false); + ERR_FAIL_COND_V(!rpi->rendering, false); + ERR_FAIL_COND_V(rpi->atlas.is_null(), false); + + ReflectionAtlas *atlas = reflection_atlas_owner.getornull(rpi->atlas); + if (!atlas || rpi->atlas_index == -1) { + //does not belong to an atlas anymore, cancel (was removed from atlas or atlas changed while rendering) + rpi->rendering = false; + return false; + } + + if (storage->reflection_probe_get_update_mode(rpi->probe) == RS::REFLECTION_PROBE_UPDATE_ALWAYS) { + // Using real time reflections, all roughness is done in one step + _create_reflection_fast_filter(atlas->reflections.write[rpi->atlas_index].data, false); + rpi->rendering = false; + rpi->processing_side = 0; + rpi->processing_layer = 1; + return true; + } + + if (rpi->processing_layer > 1) { + _create_reflection_importance_sample(atlas->reflections.write[rpi->atlas_index].data, false, 10, rpi->processing_layer); + rpi->processing_layer++; + if (rpi->processing_layer == atlas->reflections[rpi->atlas_index].data.layers[0].mipmaps.size()) { + rpi->rendering = false; + rpi->processing_side = 0; + rpi->processing_layer = 1; + return true; + } + return false; + + } else { + _create_reflection_importance_sample(atlas->reflections.write[rpi->atlas_index].data, false, rpi->processing_side, rpi->processing_layer); + } + + rpi->processing_side++; + if (rpi->processing_side == 6) { + rpi->processing_side = 0; + rpi->processing_layer++; + } + + return false; +} + +uint32_t RendererSceneRenderRD::reflection_probe_instance_get_resolution(RID p_instance) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND_V(!rpi, 0); + + ReflectionAtlas *atlas = reflection_atlas_owner.getornull(rpi->atlas); + ERR_FAIL_COND_V(!atlas, 0); + return atlas->size; +} + +RID RendererSceneRenderRD::reflection_probe_instance_get_framebuffer(RID p_instance, int p_index) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND_V(!rpi, RID()); + ERR_FAIL_INDEX_V(p_index, 6, RID()); + + ReflectionAtlas *atlas = reflection_atlas_owner.getornull(rpi->atlas); + ERR_FAIL_COND_V(!atlas, RID()); + return atlas->reflections[rpi->atlas_index].fbs[p_index]; +} + +RID RendererSceneRenderRD::reflection_probe_instance_get_depth_framebuffer(RID p_instance, int p_index) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND_V(!rpi, RID()); + ERR_FAIL_INDEX_V(p_index, 6, RID()); + + ReflectionAtlas *atlas = reflection_atlas_owner.getornull(rpi->atlas); + ERR_FAIL_COND_V(!atlas, RID()); + return atlas->depth_fb; +} + +/////////////////////////////////////////////////////////// + +RID RendererSceneRenderRD::shadow_atlas_create() { + return shadow_atlas_owner.make_rid(ShadowAtlas()); +} + +void RendererSceneRenderRD::_update_shadow_atlas(ShadowAtlas *shadow_atlas) { + if (shadow_atlas->size > 0 && shadow_atlas->depth.is_null()) { + RD::TextureFormat tf; + tf.format = shadow_atlas->use_16_bits ? RD::DATA_FORMAT_D16_UNORM : RD::DATA_FORMAT_D32_SFLOAT; + tf.width = shadow_atlas->size; + tf.height = shadow_atlas->size; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + + shadow_atlas->depth = RD::get_singleton()->texture_create(tf, RD::TextureView()); + Vector<RID> fb_tex; + fb_tex.push_back(shadow_atlas->depth); + shadow_atlas->fb = RD::get_singleton()->framebuffer_create(fb_tex); + } +} + +void RendererSceneRenderRD::shadow_atlas_set_size(RID p_atlas, int p_size, bool p_16_bits) { + ShadowAtlas *shadow_atlas = shadow_atlas_owner.getornull(p_atlas); + ERR_FAIL_COND(!shadow_atlas); + ERR_FAIL_COND(p_size < 0); + p_size = next_power_of_2(p_size); + + if (p_size == shadow_atlas->size && p_16_bits == shadow_atlas->use_16_bits) { + return; + } + + // erasing atlas + if (shadow_atlas->depth.is_valid()) { + RD::get_singleton()->free(shadow_atlas->depth); + shadow_atlas->depth = RID(); + } + for (int i = 0; i < 4; i++) { + //clear subdivisions + shadow_atlas->quadrants[i].shadows.resize(0); + shadow_atlas->quadrants[i].shadows.resize(1 << shadow_atlas->quadrants[i].subdivision); + } + + //erase shadow atlas reference from lights + for (Map<RID, uint32_t>::Element *E = shadow_atlas->shadow_owners.front(); E; E = E->next()) { + LightInstance *li = light_instance_owner.getornull(E->key()); + ERR_CONTINUE(!li); + li->shadow_atlases.erase(p_atlas); + } + + //clear owners + shadow_atlas->shadow_owners.clear(); + + shadow_atlas->size = p_size; + shadow_atlas->use_16_bits = p_size; +} + +void RendererSceneRenderRD::shadow_atlas_set_quadrant_subdivision(RID p_atlas, int p_quadrant, int p_subdivision) { + ShadowAtlas *shadow_atlas = shadow_atlas_owner.getornull(p_atlas); + ERR_FAIL_COND(!shadow_atlas); + ERR_FAIL_INDEX(p_quadrant, 4); + ERR_FAIL_INDEX(p_subdivision, 16384); + + uint32_t subdiv = next_power_of_2(p_subdivision); + if (subdiv & 0xaaaaaaaa) { //sqrt(subdiv) must be integer + subdiv <<= 1; + } + + subdiv = int(Math::sqrt((float)subdiv)); + + //obtain the number that will be x*x + + if (shadow_atlas->quadrants[p_quadrant].subdivision == subdiv) { + return; + } + + //erase all data from quadrant + for (int i = 0; i < shadow_atlas->quadrants[p_quadrant].shadows.size(); i++) { + if (shadow_atlas->quadrants[p_quadrant].shadows[i].owner.is_valid()) { + shadow_atlas->shadow_owners.erase(shadow_atlas->quadrants[p_quadrant].shadows[i].owner); + LightInstance *li = light_instance_owner.getornull(shadow_atlas->quadrants[p_quadrant].shadows[i].owner); + ERR_CONTINUE(!li); + li->shadow_atlases.erase(p_atlas); + } + } + + shadow_atlas->quadrants[p_quadrant].shadows.resize(0); + shadow_atlas->quadrants[p_quadrant].shadows.resize(subdiv * subdiv); + shadow_atlas->quadrants[p_quadrant].subdivision = subdiv; + + //cache the smallest subdiv (for faster allocation in light update) + + shadow_atlas->smallest_subdiv = 1 << 30; + + for (int i = 0; i < 4; i++) { + if (shadow_atlas->quadrants[i].subdivision) { + shadow_atlas->smallest_subdiv = MIN(shadow_atlas->smallest_subdiv, shadow_atlas->quadrants[i].subdivision); + } + } + + if (shadow_atlas->smallest_subdiv == 1 << 30) { + shadow_atlas->smallest_subdiv = 0; + } + + //resort the size orders, simple bublesort for 4 elements.. + + int swaps = 0; + do { + swaps = 0; + + for (int i = 0; i < 3; i++) { + if (shadow_atlas->quadrants[shadow_atlas->size_order[i]].subdivision < shadow_atlas->quadrants[shadow_atlas->size_order[i + 1]].subdivision) { + SWAP(shadow_atlas->size_order[i], shadow_atlas->size_order[i + 1]); + swaps++; + } + } + } while (swaps > 0); +} + +bool RendererSceneRenderRD::_shadow_atlas_find_shadow(ShadowAtlas *shadow_atlas, int *p_in_quadrants, int p_quadrant_count, int p_current_subdiv, uint64_t p_tick, int &r_quadrant, int &r_shadow) { + for (int i = p_quadrant_count - 1; i >= 0; i--) { + int qidx = p_in_quadrants[i]; + + if (shadow_atlas->quadrants[qidx].subdivision == (uint32_t)p_current_subdiv) { + return false; + } + + //look for an empty space + int sc = shadow_atlas->quadrants[qidx].shadows.size(); + ShadowAtlas::Quadrant::Shadow *sarr = shadow_atlas->quadrants[qidx].shadows.ptrw(); + + int found_free_idx = -1; //found a free one + int found_used_idx = -1; //found existing one, must steal it + uint64_t min_pass = 0; // pass of the existing one, try to use the least recently used one (LRU fashion) + + for (int j = 0; j < sc; j++) { + if (!sarr[j].owner.is_valid()) { + found_free_idx = j; + break; + } + + LightInstance *sli = light_instance_owner.getornull(sarr[j].owner); + ERR_CONTINUE(!sli); + + if (sli->last_scene_pass != scene_pass) { + //was just allocated, don't kill it so soon, wait a bit.. + if (p_tick - sarr[j].alloc_tick < shadow_atlas_realloc_tolerance_msec) { + continue; + } + + if (found_used_idx == -1 || sli->last_scene_pass < min_pass) { + found_used_idx = j; + min_pass = sli->last_scene_pass; + } + } + } + + if (found_free_idx == -1 && found_used_idx == -1) { + continue; //nothing found + } + + if (found_free_idx == -1 && found_used_idx != -1) { + found_free_idx = found_used_idx; + } + + r_quadrant = qidx; + r_shadow = found_free_idx; + + return true; + } + + return false; +} + +bool RendererSceneRenderRD::shadow_atlas_update_light(RID p_atlas, RID p_light_intance, float p_coverage, uint64_t p_light_version) { + ShadowAtlas *shadow_atlas = shadow_atlas_owner.getornull(p_atlas); + ERR_FAIL_COND_V(!shadow_atlas, false); + + LightInstance *li = light_instance_owner.getornull(p_light_intance); + ERR_FAIL_COND_V(!li, false); + + if (shadow_atlas->size == 0 || shadow_atlas->smallest_subdiv == 0) { + return false; + } + + uint32_t quad_size = shadow_atlas->size >> 1; + int desired_fit = MIN(quad_size / shadow_atlas->smallest_subdiv, next_power_of_2(quad_size * p_coverage)); + + int valid_quadrants[4]; + int valid_quadrant_count = 0; + int best_size = -1; //best size found + int best_subdiv = -1; //subdiv for the best size + + //find the quadrants this fits into, and the best possible size it can fit into + for (int i = 0; i < 4; i++) { + int q = shadow_atlas->size_order[i]; + int sd = shadow_atlas->quadrants[q].subdivision; + if (sd == 0) { + continue; //unused + } + + int max_fit = quad_size / sd; + + if (best_size != -1 && max_fit > best_size) { + break; //too large + } + + valid_quadrants[valid_quadrant_count++] = q; + best_subdiv = sd; + + if (max_fit >= desired_fit) { + best_size = max_fit; + } + } + + ERR_FAIL_COND_V(valid_quadrant_count == 0, false); + + uint64_t tick = OS::get_singleton()->get_ticks_msec(); + + //see if it already exists + + if (shadow_atlas->shadow_owners.has(p_light_intance)) { + //it does! + uint32_t key = shadow_atlas->shadow_owners[p_light_intance]; + uint32_t q = (key >> ShadowAtlas::QUADRANT_SHIFT) & 0x3; + uint32_t s = key & ShadowAtlas::SHADOW_INDEX_MASK; + + bool should_realloc = shadow_atlas->quadrants[q].subdivision != (uint32_t)best_subdiv && (shadow_atlas->quadrants[q].shadows[s].alloc_tick - tick > shadow_atlas_realloc_tolerance_msec); + bool should_redraw = shadow_atlas->quadrants[q].shadows[s].version != p_light_version; + + if (!should_realloc) { + shadow_atlas->quadrants[q].shadows.write[s].version = p_light_version; + //already existing, see if it should redraw or it's just OK + return should_redraw; + } + + int new_quadrant, new_shadow; + + //find a better place + if (_shadow_atlas_find_shadow(shadow_atlas, valid_quadrants, valid_quadrant_count, shadow_atlas->quadrants[q].subdivision, tick, new_quadrant, new_shadow)) { + //found a better place! + ShadowAtlas::Quadrant::Shadow *sh = &shadow_atlas->quadrants[new_quadrant].shadows.write[new_shadow]; + if (sh->owner.is_valid()) { + //is taken, but is invalid, erasing it + shadow_atlas->shadow_owners.erase(sh->owner); + LightInstance *sli = light_instance_owner.getornull(sh->owner); + sli->shadow_atlases.erase(p_atlas); + } + + //erase previous + shadow_atlas->quadrants[q].shadows.write[s].version = 0; + shadow_atlas->quadrants[q].shadows.write[s].owner = RID(); + + sh->owner = p_light_intance; + sh->alloc_tick = tick; + sh->version = p_light_version; + li->shadow_atlases.insert(p_atlas); + + //make new key + key = new_quadrant << ShadowAtlas::QUADRANT_SHIFT; + key |= new_shadow; + //update it in map + shadow_atlas->shadow_owners[p_light_intance] = key; + //make it dirty, as it should redraw anyway + return true; + } + + //no better place for this shadow found, keep current + + //already existing, see if it should redraw or it's just OK + + shadow_atlas->quadrants[q].shadows.write[s].version = p_light_version; + + return should_redraw; + } + + int new_quadrant, new_shadow; + + //find a better place + if (_shadow_atlas_find_shadow(shadow_atlas, valid_quadrants, valid_quadrant_count, -1, tick, new_quadrant, new_shadow)) { + //found a better place! + ShadowAtlas::Quadrant::Shadow *sh = &shadow_atlas->quadrants[new_quadrant].shadows.write[new_shadow]; + if (sh->owner.is_valid()) { + //is taken, but is invalid, erasing it + shadow_atlas->shadow_owners.erase(sh->owner); + LightInstance *sli = light_instance_owner.getornull(sh->owner); + sli->shadow_atlases.erase(p_atlas); + } + + sh->owner = p_light_intance; + sh->alloc_tick = tick; + sh->version = p_light_version; + li->shadow_atlases.insert(p_atlas); + + //make new key + uint32_t key = new_quadrant << ShadowAtlas::QUADRANT_SHIFT; + key |= new_shadow; + //update it in map + shadow_atlas->shadow_owners[p_light_intance] = key; + //make it dirty, as it should redraw anyway + + return true; + } + + //no place to allocate this light, apologies + + return false; +} + +void RendererSceneRenderRD::_update_directional_shadow_atlas() { + if (directional_shadow.depth.is_null() && directional_shadow.size > 0) { + RD::TextureFormat tf; + tf.format = directional_shadow.use_16_bits ? RD::DATA_FORMAT_D16_UNORM : RD::DATA_FORMAT_D32_SFLOAT; + tf.width = directional_shadow.size; + tf.height = directional_shadow.size; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + + directional_shadow.depth = RD::get_singleton()->texture_create(tf, RD::TextureView()); + Vector<RID> fb_tex; + fb_tex.push_back(directional_shadow.depth); + directional_shadow.fb = RD::get_singleton()->framebuffer_create(fb_tex); + } +} +void RendererSceneRenderRD::directional_shadow_atlas_set_size(int p_size, bool p_16_bits) { + p_size = nearest_power_of_2_templated(p_size); + + if (directional_shadow.size == p_size && directional_shadow.use_16_bits == p_16_bits) { + return; + } + + directional_shadow.size = p_size; + + if (directional_shadow.depth.is_valid()) { + RD::get_singleton()->free(directional_shadow.depth); + directional_shadow.depth = RID(); + _base_uniforms_changed(); + } +} + +void RendererSceneRenderRD::set_directional_shadow_count(int p_count) { + directional_shadow.light_count = p_count; + directional_shadow.current_light = 0; +} + +static Rect2i _get_directional_shadow_rect(int p_size, int p_shadow_count, int p_shadow_index) { + int split_h = 1; + int split_v = 1; + + while (split_h * split_v < p_shadow_count) { + if (split_h == split_v) { + split_h <<= 1; + } else { + split_v <<= 1; + } + } + + Rect2i rect(0, 0, p_size, p_size); + rect.size.width /= split_h; + rect.size.height /= split_v; + + rect.position.x = rect.size.width * (p_shadow_index % split_h); + rect.position.y = rect.size.height * (p_shadow_index / split_h); + + return rect; +} + +int RendererSceneRenderRD::get_directional_light_shadow_size(RID p_light_intance) { + ERR_FAIL_COND_V(directional_shadow.light_count == 0, 0); + + Rect2i r = _get_directional_shadow_rect(directional_shadow.size, directional_shadow.light_count, 0); + + LightInstance *light_instance = light_instance_owner.getornull(p_light_intance); + ERR_FAIL_COND_V(!light_instance, 0); + + switch (storage->light_directional_get_shadow_mode(light_instance->light)) { + case RS::LIGHT_DIRECTIONAL_SHADOW_ORTHOGONAL: + break; //none + case RS::LIGHT_DIRECTIONAL_SHADOW_PARALLEL_2_SPLITS: + r.size.height /= 2; + break; + case RS::LIGHT_DIRECTIONAL_SHADOW_PARALLEL_4_SPLITS: + r.size /= 2; + break; + } + + return MAX(r.size.width, r.size.height); +} + +////////////////////////////////////////////////// + +RID RendererSceneRenderRD::camera_effects_allocate() { + return camera_effects_owner.allocate_rid(); +} +void RendererSceneRenderRD::camera_effects_initialize(RID p_rid) { + camera_effects_owner.initialize_rid(p_rid, CameraEffects()); +} + +void RendererSceneRenderRD::camera_effects_set_dof_blur_quality(RS::DOFBlurQuality p_quality, bool p_use_jitter) { + dof_blur_quality = p_quality; + dof_blur_use_jitter = p_use_jitter; +} + +void RendererSceneRenderRD::camera_effects_set_dof_blur_bokeh_shape(RS::DOFBokehShape p_shape) { + dof_blur_bokeh_shape = p_shape; +} + +void RendererSceneRenderRD::camera_effects_set_dof_blur(RID p_camera_effects, bool p_far_enable, float p_far_distance, float p_far_transition, bool p_near_enable, float p_near_distance, float p_near_transition, float p_amount) { + CameraEffects *camfx = camera_effects_owner.getornull(p_camera_effects); + ERR_FAIL_COND(!camfx); + + camfx->dof_blur_far_enabled = p_far_enable; + camfx->dof_blur_far_distance = p_far_distance; + camfx->dof_blur_far_transition = p_far_transition; + + camfx->dof_blur_near_enabled = p_near_enable; + camfx->dof_blur_near_distance = p_near_distance; + camfx->dof_blur_near_transition = p_near_transition; + + camfx->dof_blur_amount = p_amount; +} + +void RendererSceneRenderRD::camera_effects_set_custom_exposure(RID p_camera_effects, bool p_enable, float p_exposure) { + CameraEffects *camfx = camera_effects_owner.getornull(p_camera_effects); + ERR_FAIL_COND(!camfx); + + camfx->override_exposure_enabled = p_enable; + camfx->override_exposure = p_exposure; +} + +RID RendererSceneRenderRD::light_instance_create(RID p_light) { + RID li = light_instance_owner.make_rid(LightInstance()); + + LightInstance *light_instance = light_instance_owner.getornull(li); + + light_instance->self = li; + light_instance->light = p_light; + light_instance->light_type = storage->light_get_type(p_light); + + return li; +} + +void RendererSceneRenderRD::light_instance_set_transform(RID p_light_instance, const Transform &p_transform) { + LightInstance *light_instance = light_instance_owner.getornull(p_light_instance); + ERR_FAIL_COND(!light_instance); + + light_instance->transform = p_transform; +} + +void RendererSceneRenderRD::light_instance_set_aabb(RID p_light_instance, const AABB &p_aabb) { + LightInstance *light_instance = light_instance_owner.getornull(p_light_instance); + ERR_FAIL_COND(!light_instance); + + light_instance->aabb = p_aabb; +} + +void RendererSceneRenderRD::light_instance_set_shadow_transform(RID p_light_instance, const CameraMatrix &p_projection, const Transform &p_transform, float p_far, float p_split, int p_pass, float p_shadow_texel_size, float p_bias_scale, float p_range_begin, const Vector2 &p_uv_scale) { + LightInstance *light_instance = light_instance_owner.getornull(p_light_instance); + ERR_FAIL_COND(!light_instance); + + ERR_FAIL_INDEX(p_pass, 6); + + light_instance->shadow_transform[p_pass].camera = p_projection; + light_instance->shadow_transform[p_pass].transform = p_transform; + light_instance->shadow_transform[p_pass].farplane = p_far; + light_instance->shadow_transform[p_pass].split = p_split; + light_instance->shadow_transform[p_pass].bias_scale = p_bias_scale; + light_instance->shadow_transform[p_pass].range_begin = p_range_begin; + light_instance->shadow_transform[p_pass].shadow_texel_size = p_shadow_texel_size; + light_instance->shadow_transform[p_pass].uv_scale = p_uv_scale; +} + +void RendererSceneRenderRD::light_instance_mark_visible(RID p_light_instance) { + LightInstance *light_instance = light_instance_owner.getornull(p_light_instance); + ERR_FAIL_COND(!light_instance); + + light_instance->last_scene_pass = scene_pass; +} + +RendererSceneRenderRD::ShadowCubemap *RendererSceneRenderRD::_get_shadow_cubemap(int p_size) { + if (!shadow_cubemaps.has(p_size)) { + ShadowCubemap sc; + { + RD::TextureFormat tf; + tf.format = RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_D32_SFLOAT, RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ? RD::DATA_FORMAT_D32_SFLOAT : RD::DATA_FORMAT_X8_D24_UNORM_PACK32; + tf.width = p_size; + tf.height = p_size; + tf.texture_type = RD::TEXTURE_TYPE_CUBE; + tf.array_layers = 6; + tf.usage_bits = RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; + sc.cubemap = RD::get_singleton()->texture_create(tf, RD::TextureView()); + } + + for (int i = 0; i < 6; i++) { + RID side_texture = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), sc.cubemap, i, 0); + Vector<RID> fbtex; + fbtex.push_back(side_texture); + sc.side_fb[i] = RD::get_singleton()->framebuffer_create(fbtex); + } + + shadow_cubemaps[p_size] = sc; + } + + return &shadow_cubemaps[p_size]; +} + +////////////////////////// + +RID RendererSceneRenderRD::decal_instance_create(RID p_decal) { + DecalInstance di; + di.decal = p_decal; + return decal_instance_owner.make_rid(di); +} + +void RendererSceneRenderRD::decal_instance_set_transform(RID p_decal, const Transform &p_transform) { + DecalInstance *di = decal_instance_owner.getornull(p_decal); + ERR_FAIL_COND(!di); + di->transform = p_transform; +} + +///////////////////////////////// + +RID RendererSceneRenderRD::lightmap_instance_create(RID p_lightmap) { + LightmapInstance li; + li.lightmap = p_lightmap; + return lightmap_instance_owner.make_rid(li); +} +void RendererSceneRenderRD::lightmap_instance_set_transform(RID p_lightmap, const Transform &p_transform) { + LightmapInstance *li = lightmap_instance_owner.getornull(p_lightmap); + ERR_FAIL_COND(!li); + li->transform = p_transform; +} + +///////////////////////////////// + +RID RendererSceneRenderRD::gi_probe_instance_create(RID p_base) { + GIProbeInstance gi_probe; + gi_probe.probe = p_base; + RID rid = gi_probe_instance_owner.make_rid(gi_probe); + return rid; +} + +void RendererSceneRenderRD::gi_probe_instance_set_transform_to_data(RID p_probe, const Transform &p_xform) { + GIProbeInstance *gi_probe = gi_probe_instance_owner.getornull(p_probe); + ERR_FAIL_COND(!gi_probe); + + gi_probe->transform = p_xform; +} + +bool RendererSceneRenderRD::gi_probe_needs_update(RID p_probe) const { + GIProbeInstance *gi_probe = gi_probe_instance_owner.getornull(p_probe); + ERR_FAIL_COND_V(!gi_probe, false); + + if (low_end) { + return false; + } + + //return true; + return gi_probe->last_probe_version != storage->gi_probe_get_version(gi_probe->probe); +} + +void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_instances, const Vector<RID> &p_light_instances, const PagedArray<GeometryInstance *> &p_dynamic_objects) { + GIProbeInstance *gi_probe = gi_probe_instance_owner.getornull(p_probe); + ERR_FAIL_COND(!gi_probe); + + if (low_end) { + return; + } + + uint32_t data_version = storage->gi_probe_get_data_version(gi_probe->probe); + + // (RE)CREATE IF NEEDED + + if (gi_probe->last_probe_data_version != data_version) { + //need to re-create everything + if (gi_probe->texture.is_valid()) { + RD::get_singleton()->free(gi_probe->texture); + RD::get_singleton()->free(gi_probe->write_buffer); + gi_probe->mipmaps.clear(); + } + + for (int i = 0; i < gi_probe->dynamic_maps.size(); i++) { + RD::get_singleton()->free(gi_probe->dynamic_maps[i].texture); + RD::get_singleton()->free(gi_probe->dynamic_maps[i].depth); + } + + gi_probe->dynamic_maps.clear(); + + Vector3i octree_size = storage->gi_probe_get_octree_size(gi_probe->probe); + + if (octree_size != Vector3i()) { + //can create a 3D texture + Vector<int> levels = storage->gi_probe_get_level_counts(gi_probe->probe); + + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + tf.width = octree_size.x; + tf.height = octree_size.y; + tf.depth = octree_size.z; + tf.texture_type = RD::TEXTURE_TYPE_3D; + tf.mipmaps = levels.size(); + + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + + gi_probe->texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + RD::get_singleton()->texture_clear(gi_probe->texture, Color(0, 0, 0, 0), 0, levels.size(), 0, 1); + + { + int total_elements = 0; + for (int i = 0; i < levels.size(); i++) { + total_elements += levels[i]; + } + + gi_probe->write_buffer = RD::get_singleton()->storage_buffer_create(total_elements * 16); + } + + for (int i = 0; i < levels.size(); i++) { + GIProbeInstance::Mipmap mipmap; + mipmap.texture = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), gi_probe->texture, 0, i, RD::TEXTURE_SLICE_3D); + mipmap.level = levels.size() - i - 1; + mipmap.cell_offset = 0; + for (uint32_t j = 0; j < mipmap.level; j++) { + mipmap.cell_offset += levels[j]; + } + mipmap.cell_count = levels[mipmap.level]; + + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 1; + u.ids.push_back(storage->gi_probe_get_octree_buffer(gi_probe->probe)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + u.ids.push_back(storage->gi_probe_get_data_buffer(gi_probe->probe)); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 4; + u.ids.push_back(gi_probe->write_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 9; + u.ids.push_back(storage->gi_probe_get_sdf_texture(gi_probe->probe)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 10; + u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + + { + Vector<RD::Uniform> copy_uniforms = uniforms; + if (i == 0) { + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 3; + u.ids.push_back(gi_probe_lights_uniform); + copy_uniforms.push_back(u); + } + + mipmap.uniform_set = RD::get_singleton()->uniform_set_create(copy_uniforms, giprobe_lighting_shader_version_shaders[GI_PROBE_SHADER_VERSION_COMPUTE_LIGHT], 0); + + copy_uniforms = uniforms; //restore + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 5; + u.ids.push_back(gi_probe->texture); + copy_uniforms.push_back(u); + } + mipmap.second_bounce_uniform_set = RD::get_singleton()->uniform_set_create(copy_uniforms, giprobe_lighting_shader_version_shaders[GI_PROBE_SHADER_VERSION_COMPUTE_SECOND_BOUNCE], 0); + } else { + mipmap.uniform_set = RD::get_singleton()->uniform_set_create(copy_uniforms, giprobe_lighting_shader_version_shaders[GI_PROBE_SHADER_VERSION_COMPUTE_MIPMAP], 0); + } + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 5; + u.ids.push_back(mipmap.texture); + uniforms.push_back(u); + } + + mipmap.write_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, giprobe_lighting_shader_version_shaders[GI_PROBE_SHADER_VERSION_WRITE_TEXTURE], 0); + + gi_probe->mipmaps.push_back(mipmap); + } + + { + uint32_t dynamic_map_size = MAX(MAX(octree_size.x, octree_size.y), octree_size.z); + uint32_t oversample = nearest_power_of_2_templated(4); + int mipmap_index = 0; + + while (mipmap_index < gi_probe->mipmaps.size()) { + GIProbeInstance::DynamicMap dmap; + + if (oversample > 0) { + dmap.size = dynamic_map_size * (1 << oversample); + dmap.mipmap = -1; + oversample--; + } else { + dmap.size = dynamic_map_size >> mipmap_index; + dmap.mipmap = mipmap_index; + mipmap_index++; + } + + RD::TextureFormat dtf; + dtf.width = dmap.size; + dtf.height = dmap.size; + dtf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + dtf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT; + + if (gi_probe->dynamic_maps.size() == 0) { + dtf.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + } + dmap.texture = RD::get_singleton()->texture_create(dtf, RD::TextureView()); + + if (gi_probe->dynamic_maps.size() == 0) { + //render depth for first one + dtf.format = RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_D32_SFLOAT, RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ? RD::DATA_FORMAT_D32_SFLOAT : RD::DATA_FORMAT_X8_D24_UNORM_PACK32; + dtf.usage_bits = RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + dmap.fb_depth = RD::get_singleton()->texture_create(dtf, RD::TextureView()); + } + + //just use depth as-is + dtf.format = RD::DATA_FORMAT_R32_SFLOAT; + dtf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + + dmap.depth = RD::get_singleton()->texture_create(dtf, RD::TextureView()); + + if (gi_probe->dynamic_maps.size() == 0) { + dtf.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + dtf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + dmap.albedo = RD::get_singleton()->texture_create(dtf, RD::TextureView()); + dmap.normal = RD::get_singleton()->texture_create(dtf, RD::TextureView()); + dmap.orm = RD::get_singleton()->texture_create(dtf, RD::TextureView()); + + Vector<RID> fb; + fb.push_back(dmap.albedo); + fb.push_back(dmap.normal); + fb.push_back(dmap.orm); + fb.push_back(dmap.texture); //emission + fb.push_back(dmap.depth); + fb.push_back(dmap.fb_depth); + + dmap.fb = RD::get_singleton()->framebuffer_create(fb); + + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 3; + u.ids.push_back(gi_probe_lights_uniform); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 5; + u.ids.push_back(dmap.albedo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 6; + u.ids.push_back(dmap.normal); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 7; + u.ids.push_back(dmap.orm); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 8; + u.ids.push_back(dmap.fb_depth); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 9; + u.ids.push_back(storage->gi_probe_get_sdf_texture(gi_probe->probe)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 10; + u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 11; + u.ids.push_back(dmap.texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 12; + u.ids.push_back(dmap.depth); + uniforms.push_back(u); + } + + dmap.uniform_set = RD::get_singleton()->uniform_set_create(uniforms, giprobe_lighting_shader_version_shaders[GI_PROBE_SHADER_VERSION_DYNAMIC_OBJECT_LIGHTING], 0); + } + } else { + bool plot = dmap.mipmap >= 0; + bool write = dmap.mipmap < (gi_probe->mipmaps.size() - 1); + + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 5; + u.ids.push_back(gi_probe->dynamic_maps[gi_probe->dynamic_maps.size() - 1].texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 6; + u.ids.push_back(gi_probe->dynamic_maps[gi_probe->dynamic_maps.size() - 1].depth); + uniforms.push_back(u); + } + + if (write) { + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 7; + u.ids.push_back(dmap.texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 8; + u.ids.push_back(dmap.depth); + uniforms.push_back(u); + } + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 9; + u.ids.push_back(storage->gi_probe_get_sdf_texture(gi_probe->probe)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 10; + u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + + if (plot) { + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 11; + u.ids.push_back(gi_probe->mipmaps[dmap.mipmap].texture); + uniforms.push_back(u); + } + } + + dmap.uniform_set = RD::get_singleton()->uniform_set_create( + uniforms, + giprobe_lighting_shader_version_shaders[(write && plot) ? GI_PROBE_SHADER_VERSION_DYNAMIC_SHRINK_WRITE_PLOT : (write ? GI_PROBE_SHADER_VERSION_DYNAMIC_SHRINK_WRITE : GI_PROBE_SHADER_VERSION_DYNAMIC_SHRINK_PLOT)], + 0); + } + + gi_probe->dynamic_maps.push_back(dmap); + } + } + } + + gi_probe->last_probe_data_version = data_version; + p_update_light_instances = true; //just in case + + _base_uniforms_changed(); + } + + // UDPDATE TIME + + if (gi_probe->has_dynamic_object_data) { + //if it has dynamic object data, it needs to be cleared + RD::get_singleton()->texture_clear(gi_probe->texture, Color(0, 0, 0, 0), 0, gi_probe->mipmaps.size(), 0, 1); + } + + uint32_t light_count = 0; + + if (p_update_light_instances || p_dynamic_objects.size() > 0) { + light_count = MIN(gi_probe_max_lights, (uint32_t)p_light_instances.size()); + + { + Transform to_cell = storage->gi_probe_get_to_cell_xform(gi_probe->probe); + Transform to_probe_xform = (gi_probe->transform * to_cell.affine_inverse()).affine_inverse(); + //update lights + + for (uint32_t i = 0; i < light_count; i++) { + GIProbeLight &l = gi_probe_lights[i]; + RID light_instance = p_light_instances[i]; + RID light = light_instance_get_base_light(light_instance); + + l.type = storage->light_get_type(light); + if (l.type == RS::LIGHT_DIRECTIONAL && storage->light_directional_is_sky_only(light)) { + light_count--; + continue; + } + + l.attenuation = storage->light_get_param(light, RS::LIGHT_PARAM_ATTENUATION); + l.energy = storage->light_get_param(light, RS::LIGHT_PARAM_ENERGY) * storage->light_get_param(light, RS::LIGHT_PARAM_INDIRECT_ENERGY); + l.radius = to_cell.basis.xform(Vector3(storage->light_get_param(light, RS::LIGHT_PARAM_RANGE), 0, 0)).length(); + Color color = storage->light_get_color(light).to_linear(); + l.color[0] = color.r; + l.color[1] = color.g; + l.color[2] = color.b; + + l.cos_spot_angle = Math::cos(Math::deg2rad(storage->light_get_param(light, RS::LIGHT_PARAM_SPOT_ANGLE))); + l.inv_spot_attenuation = 1.0f / storage->light_get_param(light, RS::LIGHT_PARAM_SPOT_ATTENUATION); + + Transform xform = light_instance_get_base_transform(light_instance); + + Vector3 pos = to_probe_xform.xform(xform.origin); + Vector3 dir = to_probe_xform.basis.xform(-xform.basis.get_axis(2)).normalized(); + + l.position[0] = pos.x; + l.position[1] = pos.y; + l.position[2] = pos.z; + + l.direction[0] = dir.x; + l.direction[1] = dir.y; + l.direction[2] = dir.z; + + l.has_shadow = storage->light_has_shadow(light); + } + + RD::get_singleton()->buffer_update(gi_probe_lights_uniform, 0, sizeof(GIProbeLight) * light_count, gi_probe_lights); + } + } + + if (gi_probe->has_dynamic_object_data || p_update_light_instances || p_dynamic_objects.size()) { + // PROCESS MIPMAPS + if (gi_probe->mipmaps.size()) { + //can update mipmaps + + Vector3i probe_size = storage->gi_probe_get_octree_size(gi_probe->probe); + + GIProbePushConstant push_constant; + + push_constant.limits[0] = probe_size.x; + push_constant.limits[1] = probe_size.y; + push_constant.limits[2] = probe_size.z; + push_constant.stack_size = gi_probe->mipmaps.size(); + push_constant.emission_scale = 1.0; + push_constant.propagation = storage->gi_probe_get_propagation(gi_probe->probe); + push_constant.dynamic_range = storage->gi_probe_get_dynamic_range(gi_probe->probe); + push_constant.light_count = light_count; + push_constant.aniso_strength = 0; + + /* print_line("probe update to version " + itos(gi_probe->last_probe_version)); + print_line("propagation " + rtos(push_constant.propagation)); + print_line("dynrange " + rtos(push_constant.dynamic_range)); + */ + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + int passes; + if (p_update_light_instances) { + passes = storage->gi_probe_is_using_two_bounces(gi_probe->probe) ? 2 : 1; + } else { + passes = 1; //only re-blitting is necessary + } + int wg_size = 64; + int wg_limit_x = RD::get_singleton()->limit_get(RD::LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_X); + + for (int pass = 0; pass < passes; pass++) { + if (p_update_light_instances) { + for (int i = 0; i < gi_probe->mipmaps.size(); i++) { + if (i == 0) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, giprobe_lighting_shader_version_pipelines[pass == 0 ? GI_PROBE_SHADER_VERSION_COMPUTE_LIGHT : GI_PROBE_SHADER_VERSION_COMPUTE_SECOND_BOUNCE]); + } else if (i == 1) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, giprobe_lighting_shader_version_pipelines[GI_PROBE_SHADER_VERSION_COMPUTE_MIPMAP]); + } + + if (pass == 1 || i > 0) { + RD::get_singleton()->compute_list_add_barrier(compute_list); //wait til previous step is done + } + if (pass == 0 || i > 0) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, gi_probe->mipmaps[i].uniform_set, 0); + } else { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, gi_probe->mipmaps[i].second_bounce_uniform_set, 0); + } + + push_constant.cell_offset = gi_probe->mipmaps[i].cell_offset; + push_constant.cell_count = gi_probe->mipmaps[i].cell_count; + + int wg_todo = (gi_probe->mipmaps[i].cell_count - 1) / wg_size + 1; + while (wg_todo) { + int wg_count = MIN(wg_todo, wg_limit_x); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(GIProbePushConstant)); + RD::get_singleton()->compute_list_dispatch(compute_list, wg_count, 1, 1); + wg_todo -= wg_count; + push_constant.cell_offset += wg_count * wg_size; + } + } + + RD::get_singleton()->compute_list_add_barrier(compute_list); //wait til previous step is done + } + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, giprobe_lighting_shader_version_pipelines[GI_PROBE_SHADER_VERSION_WRITE_TEXTURE]); + + for (int i = 0; i < gi_probe->mipmaps.size(); i++) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, gi_probe->mipmaps[i].write_uniform_set, 0); + + push_constant.cell_offset = gi_probe->mipmaps[i].cell_offset; + push_constant.cell_count = gi_probe->mipmaps[i].cell_count; + + int wg_todo = (gi_probe->mipmaps[i].cell_count - 1) / wg_size + 1; + while (wg_todo) { + int wg_count = MIN(wg_todo, wg_limit_x); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(GIProbePushConstant)); + RD::get_singleton()->compute_list_dispatch(compute_list, wg_count, 1, 1); + wg_todo -= wg_count; + push_constant.cell_offset += wg_count * wg_size; + } + } + } + + RD::get_singleton()->compute_list_end(); + } + } + + gi_probe->has_dynamic_object_data = false; //clear until dynamic object data is used again + + if (p_dynamic_objects.size() && gi_probe->dynamic_maps.size()) { + Vector3i octree_size = storage->gi_probe_get_octree_size(gi_probe->probe); + int multiplier = gi_probe->dynamic_maps[0].size / MAX(MAX(octree_size.x, octree_size.y), octree_size.z); + + Transform oversample_scale; + oversample_scale.basis.scale(Vector3(multiplier, multiplier, multiplier)); + + Transform to_cell = oversample_scale * storage->gi_probe_get_to_cell_xform(gi_probe->probe); + Transform to_world_xform = gi_probe->transform * to_cell.affine_inverse(); + Transform to_probe_xform = to_world_xform.affine_inverse(); + + AABB probe_aabb(Vector3(), octree_size); + + //this could probably be better parallelized in compute.. + for (int i = 0; i < (int)p_dynamic_objects.size(); i++) { + GeometryInstance *instance = p_dynamic_objects[i]; + + //transform aabb to giprobe + AABB aabb = (to_probe_xform * geometry_instance_get_transform(instance)).xform(geometry_instance_get_aabb(instance)); + + //this needs to wrap to grid resolution to avoid jitter + //also extend margin a bit just in case + Vector3i begin = aabb.position - Vector3i(1, 1, 1); + Vector3i end = aabb.position + aabb.size + Vector3i(1, 1, 1); + + for (int j = 0; j < 3; j++) { + if ((end[j] - begin[j]) & 1) { + end[j]++; //for half extents split, it needs to be even + } + begin[j] = MAX(begin[j], 0); + end[j] = MIN(end[j], octree_size[j] * multiplier); + } + + //aabb = aabb.intersection(probe_aabb); //intersect + aabb.position = begin; + aabb.size = end - begin; + + //print_line("aabb: " + aabb); + + for (int j = 0; j < 6; j++) { + //if (j != 0 && j != 3) { + // continue; + //} + static const Vector3 render_z[6] = { + Vector3(1, 0, 0), + Vector3(0, 1, 0), + Vector3(0, 0, 1), + Vector3(-1, 0, 0), + Vector3(0, -1, 0), + Vector3(0, 0, -1), + }; + static const Vector3 render_up[6] = { + Vector3(0, 1, 0), + Vector3(0, 0, 1), + Vector3(0, 1, 0), + Vector3(0, 1, 0), + Vector3(0, 0, 1), + Vector3(0, 1, 0), + }; + + Vector3 render_dir = render_z[j]; + Vector3 up_dir = render_up[j]; + + Vector3 center = aabb.position + aabb.size * 0.5; + Transform xform; + xform.set_look_at(center - aabb.size * 0.5 * render_dir, center, up_dir); + + Vector3 x_dir = xform.basis.get_axis(0).abs(); + int x_axis = int(Vector3(0, 1, 2).dot(x_dir)); + Vector3 y_dir = xform.basis.get_axis(1).abs(); + int y_axis = int(Vector3(0, 1, 2).dot(y_dir)); + Vector3 z_dir = -xform.basis.get_axis(2); + int z_axis = int(Vector3(0, 1, 2).dot(z_dir.abs())); + + Rect2i rect(aabb.position[x_axis], aabb.position[y_axis], aabb.size[x_axis], aabb.size[y_axis]); + bool x_flip = bool(Vector3(1, 1, 1).dot(xform.basis.get_axis(0)) < 0); + bool y_flip = bool(Vector3(1, 1, 1).dot(xform.basis.get_axis(1)) < 0); + bool z_flip = bool(Vector3(1, 1, 1).dot(xform.basis.get_axis(2)) > 0); + + CameraMatrix cm; + cm.set_orthogonal(-rect.size.width / 2, rect.size.width / 2, -rect.size.height / 2, rect.size.height / 2, 0.0001, aabb.size[z_axis]); + + if (cull_argument.size() == 0) { + cull_argument.push_back(nullptr); + } + cull_argument[0] = instance; + + _render_material(to_world_xform * xform, cm, true, cull_argument, gi_probe->dynamic_maps[0].fb, Rect2i(Vector2i(), rect.size)); + + GIProbeDynamicPushConstant push_constant; + zeromem(&push_constant, sizeof(GIProbeDynamicPushConstant)); + push_constant.limits[0] = octree_size.x; + push_constant.limits[1] = octree_size.y; + push_constant.limits[2] = octree_size.z; + push_constant.light_count = p_light_instances.size(); + push_constant.x_dir[0] = x_dir[0]; + push_constant.x_dir[1] = x_dir[1]; + push_constant.x_dir[2] = x_dir[2]; + push_constant.y_dir[0] = y_dir[0]; + push_constant.y_dir[1] = y_dir[1]; + push_constant.y_dir[2] = y_dir[2]; + push_constant.z_dir[0] = z_dir[0]; + push_constant.z_dir[1] = z_dir[1]; + push_constant.z_dir[2] = z_dir[2]; + push_constant.z_base = xform.origin[z_axis]; + push_constant.z_sign = (z_flip ? -1.0 : 1.0); + push_constant.pos_multiplier = float(1.0) / multiplier; + push_constant.dynamic_range = storage->gi_probe_get_dynamic_range(gi_probe->probe); + push_constant.flip_x = x_flip; + push_constant.flip_y = y_flip; + push_constant.rect_pos[0] = rect.position[0]; + push_constant.rect_pos[1] = rect.position[1]; + push_constant.rect_size[0] = rect.size[0]; + push_constant.rect_size[1] = rect.size[1]; + push_constant.prev_rect_ofs[0] = 0; + push_constant.prev_rect_ofs[1] = 0; + push_constant.prev_rect_size[0] = 0; + push_constant.prev_rect_size[1] = 0; + push_constant.on_mipmap = false; + push_constant.propagation = storage->gi_probe_get_propagation(gi_probe->probe); + push_constant.pad[0] = 0; + push_constant.pad[1] = 0; + push_constant.pad[2] = 0; + + //process lighting + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, giprobe_lighting_shader_version_pipelines[GI_PROBE_SHADER_VERSION_DYNAMIC_OBJECT_LIGHTING]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, gi_probe->dynamic_maps[0].uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(GIProbeDynamicPushConstant)); + RD::get_singleton()->compute_list_dispatch(compute_list, (rect.size.x - 1) / 8 + 1, (rect.size.y - 1) / 8 + 1, 1); + //print_line("rect: " + itos(i) + ": " + rect); + + for (int k = 1; k < gi_probe->dynamic_maps.size(); k++) { + // enlarge the rect if needed so all pixels fit when downscaled, + // this ensures downsampling is smooth and optimal because no pixels are left behind + + //x + if (rect.position.x & 1) { + rect.size.x++; + push_constant.prev_rect_ofs[0] = 1; //this is used to ensure reading is also optimal + } else { + push_constant.prev_rect_ofs[0] = 0; + } + if (rect.size.x & 1) { + rect.size.x++; + } + + rect.position.x >>= 1; + rect.size.x = MAX(1, rect.size.x >> 1); + + //y + if (rect.position.y & 1) { + rect.size.y++; + push_constant.prev_rect_ofs[1] = 1; + } else { + push_constant.prev_rect_ofs[1] = 0; + } + if (rect.size.y & 1) { + rect.size.y++; + } + + rect.position.y >>= 1; + rect.size.y = MAX(1, rect.size.y >> 1); + + //shrink limits to ensure plot does not go outside map + if (gi_probe->dynamic_maps[k].mipmap > 0) { + for (int l = 0; l < 3; l++) { + push_constant.limits[l] = MAX(1, push_constant.limits[l] >> 1); + } + } + + //print_line("rect: " + itos(i) + ": " + rect); + push_constant.rect_pos[0] = rect.position[0]; + push_constant.rect_pos[1] = rect.position[1]; + push_constant.prev_rect_size[0] = push_constant.rect_size[0]; + push_constant.prev_rect_size[1] = push_constant.rect_size[1]; + push_constant.rect_size[0] = rect.size[0]; + push_constant.rect_size[1] = rect.size[1]; + push_constant.on_mipmap = gi_probe->dynamic_maps[k].mipmap > 0; + + RD::get_singleton()->compute_list_add_barrier(compute_list); + + if (gi_probe->dynamic_maps[k].mipmap < 0) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, giprobe_lighting_shader_version_pipelines[GI_PROBE_SHADER_VERSION_DYNAMIC_SHRINK_WRITE]); + } else if (k < gi_probe->dynamic_maps.size() - 1) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, giprobe_lighting_shader_version_pipelines[GI_PROBE_SHADER_VERSION_DYNAMIC_SHRINK_WRITE_PLOT]); + } else { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, giprobe_lighting_shader_version_pipelines[GI_PROBE_SHADER_VERSION_DYNAMIC_SHRINK_PLOT]); + } + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, gi_probe->dynamic_maps[k].uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(GIProbeDynamicPushConstant)); + RD::get_singleton()->compute_list_dispatch(compute_list, (rect.size.x - 1) / 8 + 1, (rect.size.y - 1) / 8 + 1, 1); + } + + RD::get_singleton()->compute_list_end(); + } + } + + gi_probe->has_dynamic_object_data = true; //clear until dynamic object data is used again + } + + gi_probe->last_probe_version = storage->gi_probe_get_version(gi_probe->probe); +} + +void RendererSceneRenderRD::_debug_giprobe(RID p_gi_probe, RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform, bool p_lighting, bool p_emission, float p_alpha) { + GIProbeInstance *gi_probe = gi_probe_instance_owner.getornull(p_gi_probe); + ERR_FAIL_COND(!gi_probe); + + if (gi_probe->mipmaps.size() == 0) { + return; + } + + CameraMatrix transform = (p_camera_with_transform * CameraMatrix(gi_probe->transform)) * CameraMatrix(storage->gi_probe_get_to_cell_xform(gi_probe->probe).affine_inverse()); + + int level = 0; + Vector3i octree_size = storage->gi_probe_get_octree_size(gi_probe->probe); + + GIProbeDebugPushConstant push_constant; + push_constant.alpha = p_alpha; + push_constant.dynamic_range = storage->gi_probe_get_dynamic_range(gi_probe->probe); + push_constant.cell_offset = gi_probe->mipmaps[level].cell_offset; + push_constant.level = level; + + push_constant.bounds[0] = octree_size.x >> level; + push_constant.bounds[1] = octree_size.y >> level; + push_constant.bounds[2] = octree_size.z >> level; + push_constant.pad = 0; + + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + push_constant.projection[i * 4 + j] = transform.matrix[i][j]; + } + } + + if (giprobe_debug_uniform_set.is_valid()) { + RD::get_singleton()->free(giprobe_debug_uniform_set); + } + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 1; + u.ids.push_back(storage->gi_probe_get_data_buffer(gi_probe->probe)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 2; + u.ids.push_back(gi_probe->texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 3; + u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + + int cell_count; + if (!p_emission && p_lighting && gi_probe->has_dynamic_object_data) { + cell_count = push_constant.bounds[0] * push_constant.bounds[1] * push_constant.bounds[2]; + } else { + cell_count = gi_probe->mipmaps[level].cell_count; + } + + giprobe_debug_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, giprobe_debug_shader_version_shaders[0], 0); + + int giprobe_debug_pipeline = GI_PROBE_DEBUG_COLOR; + if (p_emission) { + giprobe_debug_pipeline = GI_PROBE_DEBUG_EMISSION; + } else if (p_lighting) { + giprobe_debug_pipeline = gi_probe->has_dynamic_object_data ? GI_PROBE_DEBUG_LIGHT_FULL : GI_PROBE_DEBUG_LIGHT; + } + RD::get_singleton()->draw_list_bind_render_pipeline( + p_draw_list, + giprobe_debug_shader_version_pipelines[giprobe_debug_pipeline].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_framebuffer))); + RD::get_singleton()->draw_list_bind_uniform_set(p_draw_list, giprobe_debug_uniform_set, 0); + RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(GIProbeDebugPushConstant)); + RD::get_singleton()->draw_list_draw(p_draw_list, false, cell_count, 36); +} + +void RendererSceneRenderRD::_debug_sdfgi_probes(RID p_render_buffers, RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(!rb); + + if (!rb->sdfgi) { + return; //nothing to debug + } + + SDGIShader::DebugProbesPushConstant push_constant; + + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + push_constant.projection[i * 4 + j] = p_camera_with_transform.matrix[i][j]; + } + } + + //gen spheres from strips + uint32_t band_points = 16; + push_constant.band_power = 4; + push_constant.sections_in_band = ((band_points / 2) - 1); + push_constant.band_mask = band_points - 2; + push_constant.section_arc = Math_TAU / float(push_constant.sections_in_band); + push_constant.y_mult = rb->sdfgi->y_mult; + + uint32_t total_points = push_constant.sections_in_band * band_points; + uint32_t total_probes = rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count; + + push_constant.grid_size[0] = rb->sdfgi->cascade_size; + push_constant.grid_size[1] = rb->sdfgi->cascade_size; + push_constant.grid_size[2] = rb->sdfgi->cascade_size; + push_constant.cascade = 0; + + push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; + + if (!rb->sdfgi->debug_probes_uniform_set.is_valid() || !RD::get_singleton()->uniform_set_is_valid(rb->sdfgi->debug_probes_uniform_set)) { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.ids.push_back(rb->sdfgi->cascades_ubo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 2; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.ids.push_back(rb->sdfgi->lightprobe_texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 3; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 4; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.ids.push_back(rb->sdfgi->occlusion_texture); + uniforms.push_back(u); + } + + rb->sdfgi->debug_probes_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.debug_probes.version_get_shader(sdfgi_shader.debug_probes_shader, 0), 0); + } + + RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, sdfgi_shader.debug_probes_pipeline[SDGIShader::PROBE_DEBUG_PROBES].get_render_pipeline(RD::INVALID_FORMAT_ID, RD::get_singleton()->framebuffer_get_format(p_framebuffer))); + RD::get_singleton()->draw_list_bind_uniform_set(p_draw_list, rb->sdfgi->debug_probes_uniform_set, 0); + RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(SDGIShader::DebugProbesPushConstant)); + RD::get_singleton()->draw_list_draw(p_draw_list, false, total_probes, total_points); + + if (sdfgi_debug_probe_dir != Vector3()) { + print_line("CLICK DEBUG ME?"); + uint32_t cascade = 0; + Vector3 offset = Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + rb->sdfgi->cascades[cascade].position)) * rb->sdfgi->cascades[cascade].cell_size * Vector3(1.0, 1.0 / rb->sdfgi->y_mult, 1.0); + Vector3 probe_size = rb->sdfgi->cascades[cascade].cell_size * (rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR) * Vector3(1.0, 1.0 / rb->sdfgi->y_mult, 1.0); + Vector3 ray_from = sdfgi_debug_probe_pos; + Vector3 ray_to = sdfgi_debug_probe_pos + sdfgi_debug_probe_dir * rb->sdfgi->cascades[cascade].cell_size * Math::sqrt(3.0) * rb->sdfgi->cascade_size; + float sphere_radius = 0.2; + float closest_dist = 1e20; + sdfgi_debug_probe_enabled = false; + + Vector3i probe_from = rb->sdfgi->cascades[cascade].position / (rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR); + for (int i = 0; i < (SDFGI::PROBE_DIVISOR + 1); i++) { + for (int j = 0; j < (SDFGI::PROBE_DIVISOR + 1); j++) { + for (int k = 0; k < (SDFGI::PROBE_DIVISOR + 1); k++) { + Vector3 pos = offset + probe_size * Vector3(i, j, k); + Vector3 res; + if (Geometry3D::segment_intersects_sphere(ray_from, ray_to, pos, sphere_radius, &res)) { + float d = ray_from.distance_to(res); + if (d < closest_dist) { + closest_dist = d; + sdfgi_debug_probe_enabled = true; + sdfgi_debug_probe_index = probe_from + Vector3i(i, j, k); + } + } + } + } + } + + if (sdfgi_debug_probe_enabled) { + print_line("found: " + sdfgi_debug_probe_index); + } else { + print_line("no found"); + } + sdfgi_debug_probe_dir = Vector3(); + } + + if (sdfgi_debug_probe_enabled) { + uint32_t cascade = 0; + uint32_t probe_cells = (rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR); + Vector3i probe_from = rb->sdfgi->cascades[cascade].position / probe_cells; + Vector3i ofs = sdfgi_debug_probe_index - probe_from; + if (ofs.x < 0 || ofs.y < 0 || ofs.z < 0) { + return; + } + if (ofs.x > SDFGI::PROBE_DIVISOR || ofs.y > SDFGI::PROBE_DIVISOR || ofs.z > SDFGI::PROBE_DIVISOR) { + return; + } + + uint32_t mult = (SDFGI::PROBE_DIVISOR + 1); + uint32_t index = ofs.z * mult * mult + ofs.y * mult + ofs.x; + + push_constant.probe_debug_index = index; + + uint32_t cell_count = probe_cells * 2 * probe_cells * 2 * probe_cells * 2; + + RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, sdfgi_shader.debug_probes_pipeline[SDGIShader::PROBE_DEBUG_VISIBILITY].get_render_pipeline(RD::INVALID_FORMAT_ID, RD::get_singleton()->framebuffer_get_format(p_framebuffer))); + RD::get_singleton()->draw_list_bind_uniform_set(p_draw_list, rb->sdfgi->debug_probes_uniform_set, 0); + RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(SDGIShader::DebugProbesPushConstant)); + RD::get_singleton()->draw_list_draw(p_draw_list, false, cell_count, total_points); + } +} + +//////////////////////////////// +RID RendererSceneRenderRD::render_buffers_create() { + RenderBuffers rb; + rb.data = _create_render_buffer_data(); + return render_buffers_owner.make_rid(rb); +} + +void RendererSceneRenderRD::_allocate_blur_textures(RenderBuffers *rb) { + ERR_FAIL_COND(!rb->blur[0].texture.is_null()); + + uint32_t mipmaps_required = Image::get_image_required_mipmaps(rb->width, rb->height, Image::FORMAT_RGBAH); + + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf.width = rb->width; + tf.height = rb->height; + tf.texture_type = RD::TEXTURE_TYPE_2D; + tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + tf.mipmaps = mipmaps_required; + + rb->blur[0].texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); + //the second one is smaller (only used for separatable part of blur) + tf.width >>= 1; + tf.height >>= 1; + tf.mipmaps--; + rb->blur[1].texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + int base_width = rb->width; + int base_height = rb->height; + + for (uint32_t i = 0; i < mipmaps_required; i++) { + RenderBuffers::Blur::Mipmap mm; + mm.texture = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->blur[0].texture, 0, i); + + mm.width = base_width; + mm.height = base_height; + + rb->blur[0].mipmaps.push_back(mm); + + if (i > 0) { + mm.texture = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->blur[1].texture, 0, i - 1); + + rb->blur[1].mipmaps.push_back(mm); + } + + base_width = MAX(1, base_width >> 1); + base_height = MAX(1, base_height >> 1); + } +} + +void RendererSceneRenderRD::_allocate_luminance_textures(RenderBuffers *rb) { + ERR_FAIL_COND(!rb->luminance.current.is_null()); + + int w = rb->width; + int h = rb->height; + + while (true) { + w = MAX(w / 8, 1); + h = MAX(h / 8, 1); + + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R32_SFLOAT; + tf.width = w; + tf.height = h; + tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT; + + bool final = w == 1 && h == 1; + + if (final) { + tf.usage_bits |= RD::TEXTURE_USAGE_SAMPLING_BIT; + } + + RID texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + rb->luminance.reduce.push_back(texture); + + if (final) { + rb->luminance.current = RD::get_singleton()->texture_create(tf, RD::TextureView()); + break; + } + } +} + +void RendererSceneRenderRD::_free_render_buffer_data(RenderBuffers *rb) { + if (rb->texture.is_valid()) { + RD::get_singleton()->free(rb->texture); + rb->texture = RID(); + } + + if (rb->depth_texture.is_valid()) { + RD::get_singleton()->free(rb->depth_texture); + rb->depth_texture = RID(); + } + + for (int i = 0; i < 2; i++) { + if (rb->blur[i].texture.is_valid()) { + RD::get_singleton()->free(rb->blur[i].texture); + rb->blur[i].texture = RID(); + rb->blur[i].mipmaps.clear(); + } + } + + for (int i = 0; i < rb->luminance.reduce.size(); i++) { + RD::get_singleton()->free(rb->luminance.reduce[i]); + } + + rb->luminance.reduce.clear(); + + if (rb->luminance.current.is_valid()) { + RD::get_singleton()->free(rb->luminance.current); + rb->luminance.current = RID(); + } + + if (rb->ssao.depth.is_valid()) { + RD::get_singleton()->free(rb->ssao.depth); + RD::get_singleton()->free(rb->ssao.ao_deinterleaved); + RD::get_singleton()->free(rb->ssao.ao_pong); + RD::get_singleton()->free(rb->ssao.ao_final); + + RD::get_singleton()->free(rb->ssao.importance_map[0]); + RD::get_singleton()->free(rb->ssao.importance_map[1]); + + rb->ssao.depth = RID(); + rb->ssao.ao_deinterleaved = RID(); + rb->ssao.ao_pong = RID(); + rb->ssao.ao_final = RID(); + rb->ssao.importance_map[0] = RID(); + rb->ssao.importance_map[1] = RID(); + rb->ssao.depth_slices.clear(); + rb->ssao.ao_deinterleaved_slices.clear(); + rb->ssao.ao_pong_slices.clear(); + } + + if (rb->ssr.blur_radius[0].is_valid()) { + RD::get_singleton()->free(rb->ssr.blur_radius[0]); + RD::get_singleton()->free(rb->ssr.blur_radius[1]); + rb->ssr.blur_radius[0] = RID(); + rb->ssr.blur_radius[1] = RID(); + } + + if (rb->ssr.depth_scaled.is_valid()) { + RD::get_singleton()->free(rb->ssr.depth_scaled); + rb->ssr.depth_scaled = RID(); + RD::get_singleton()->free(rb->ssr.normal_scaled); + rb->ssr.normal_scaled = RID(); + } + + if (rb->ambient_buffer.is_valid()) { + RD::get_singleton()->free(rb->ambient_buffer); + RD::get_singleton()->free(rb->reflection_buffer); + rb->ambient_buffer = RID(); + rb->reflection_buffer = RID(); + } +} + +void RendererSceneRenderRD::_process_sss(RID p_render_buffers, const CameraMatrix &p_camera) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(!rb); + + bool can_use_effects = rb->width >= 8 && rb->height >= 8; + + if (!can_use_effects) { + //just copy + return; + } + + if (rb->blur[0].texture.is_null()) { + _allocate_blur_textures(rb); + _render_buffers_uniform_set_changed(p_render_buffers); + } + + storage->get_effects()->sub_surface_scattering(rb->texture, rb->blur[0].mipmaps[0].texture, rb->depth_texture, p_camera, Size2i(rb->width, rb->height), sss_scale, sss_depth_scale, sss_quality); +} + +void RendererSceneRenderRD::_process_ssr(RID p_render_buffers, RID p_dest_framebuffer, RID p_normal_buffer, RID p_specular_buffer, RID p_metallic, const Color &p_metallic_mask, RID p_environment, const CameraMatrix &p_projection, bool p_use_additive) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(!rb); + + bool can_use_effects = rb->width >= 8 && rb->height >= 8; + + if (!can_use_effects) { + //just copy + storage->get_effects()->merge_specular(p_dest_framebuffer, p_specular_buffer, p_use_additive ? RID() : rb->texture, RID()); + return; + } + + Environment *env = environment_owner.getornull(p_environment); + ERR_FAIL_COND(!env); + + ERR_FAIL_COND(!env->ssr_enabled); + + if (rb->ssr.depth_scaled.is_null()) { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R32_SFLOAT; + tf.width = rb->width / 2; + tf.height = rb->height / 2; + tf.texture_type = RD::TEXTURE_TYPE_2D; + tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT; + + rb->ssr.depth_scaled = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + tf.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + + rb->ssr.normal_scaled = RD::get_singleton()->texture_create(tf, RD::TextureView()); + } + + if (ssr_roughness_quality != RS::ENV_SSR_ROUGNESS_QUALITY_DISABLED && !rb->ssr.blur_radius[0].is_valid()) { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8_UNORM; + tf.width = rb->width / 2; + tf.height = rb->height / 2; + tf.texture_type = RD::TEXTURE_TYPE_2D; + tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; + + rb->ssr.blur_radius[0] = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->ssr.blur_radius[1] = RD::get_singleton()->texture_create(tf, RD::TextureView()); + } + + if (rb->blur[0].texture.is_null()) { + _allocate_blur_textures(rb); + _render_buffers_uniform_set_changed(p_render_buffers); + } + + storage->get_effects()->screen_space_reflection(rb->texture, p_normal_buffer, ssr_roughness_quality, rb->ssr.blur_radius[0], rb->ssr.blur_radius[1], p_metallic, p_metallic_mask, rb->depth_texture, rb->ssr.depth_scaled, rb->ssr.normal_scaled, rb->blur[0].mipmaps[1].texture, rb->blur[1].mipmaps[0].texture, Size2i(rb->width / 2, rb->height / 2), env->ssr_max_steps, env->ssr_fade_in, env->ssr_fade_out, env->ssr_depth_tolerance, p_projection); + storage->get_effects()->merge_specular(p_dest_framebuffer, p_specular_buffer, p_use_additive ? RID() : rb->texture, rb->blur[0].mipmaps[1].texture); +} + +void RendererSceneRenderRD::_process_ssao(RID p_render_buffers, RID p_environment, RID p_normal_buffer, const CameraMatrix &p_projection) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(!rb); + + Environment *env = environment_owner.getornull(p_environment); + ERR_FAIL_COND(!env); + + RENDER_TIMESTAMP("Process SSAO"); + + if (rb->ssao.ao_final.is_valid() && ssao_using_half_size != ssao_half_size) { + RD::get_singleton()->free(rb->ssao.depth); + RD::get_singleton()->free(rb->ssao.ao_deinterleaved); + RD::get_singleton()->free(rb->ssao.ao_pong); + RD::get_singleton()->free(rb->ssao.ao_final); + + RD::get_singleton()->free(rb->ssao.importance_map[0]); + RD::get_singleton()->free(rb->ssao.importance_map[1]); + + rb->ssao.depth = RID(); + rb->ssao.ao_deinterleaved = RID(); + rb->ssao.ao_pong = RID(); + rb->ssao.ao_final = RID(); + rb->ssao.importance_map[0] = RID(); + rb->ssao.importance_map[1] = RID(); + rb->ssao.depth_slices.clear(); + rb->ssao.ao_deinterleaved_slices.clear(); + rb->ssao.ao_pong_slices.clear(); + } + + int buffer_width; + int buffer_height; + int half_width; + int half_height; + if (ssao_half_size) { + buffer_width = (rb->width + 3) / 4; + buffer_height = (rb->height + 3) / 4; + half_width = (rb->width + 7) / 8; + half_height = (rb->height + 7) / 8; + } else { + buffer_width = (rb->width + 1) / 2; + buffer_height = (rb->height + 1) / 2; + half_width = (rb->width + 3) / 4; + half_height = (rb->height + 3) / 4; + } + bool uniform_sets_are_invalid = false; + if (rb->ssao.depth.is_null()) { + //allocate depth slices + + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R16_SFLOAT; + tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + tf.width = buffer_width; + tf.height = buffer_height; + tf.mipmaps = 4; + tf.array_layers = 4; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + rb->ssao.depth = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(rb->ssao.depth, "SSAO Depth"); + for (uint32_t i = 0; i < tf.mipmaps; i++) { + RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ssao.depth, 0, i, RD::TEXTURE_SLICE_2D_ARRAY); + rb->ssao.depth_slices.push_back(slice); + RD::get_singleton()->set_resource_name(rb->ssao.depth_slices[i], "SSAO Depth Mip " + itos(i) + " "); + } + } + + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8G8_UNORM; + tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + tf.width = buffer_width; + tf.height = buffer_height; + tf.array_layers = 4; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + rb->ssao.ao_deinterleaved = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(rb->ssao.ao_deinterleaved, "SSAO De-interleaved Array"); + for (uint32_t i = 0; i < 4; i++) { + RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ssao.ao_deinterleaved, i, 0); + rb->ssao.ao_deinterleaved_slices.push_back(slice); + RD::get_singleton()->set_resource_name(rb->ssao.ao_deinterleaved_slices[i], "SSAO De-interleaved Array Layer " + itos(i) + " "); + } + } + + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8G8_UNORM; + tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + tf.width = buffer_width; + tf.height = buffer_height; + tf.array_layers = 4; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + rb->ssao.ao_pong = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(rb->ssao.ao_pong, "SSAO De-interleaved Array Pong"); + for (uint32_t i = 0; i < 4; i++) { + RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ssao.ao_pong, i, 0); + rb->ssao.ao_pong_slices.push_back(slice); + RD::get_singleton()->set_resource_name(rb->ssao.ao_deinterleaved_slices[i], "SSAO De-interleaved Array Layer " + itos(i) + " Pong"); + } + } + + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8_UNORM; + tf.width = half_width; + tf.height = half_height; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + rb->ssao.importance_map[0] = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(rb->ssao.importance_map[0], "SSAO Importance Map"); + rb->ssao.importance_map[1] = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(rb->ssao.importance_map[1], "SSAO Importance Map Pong"); + } + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8_UNORM; + tf.width = rb->width; + tf.height = rb->height; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + rb->ssao.ao_final = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(rb->ssao.ao_final, "SSAO Final"); + _render_buffers_uniform_set_changed(p_render_buffers); + } + ssao_using_half_size = ssao_half_size; + uniform_sets_are_invalid = true; + } + + EffectsRD::SSAOSettings settings; + settings.radius = env->ssao_radius; + settings.intensity = env->ssao_intensity; + settings.power = env->ssao_power; + settings.detail = env->ssao_detail; + settings.horizon = env->ssao_horizon; + settings.sharpness = env->ssao_sharpness; + + settings.quality = ssao_quality; + settings.half_size = ssao_half_size; + settings.adaptive_target = ssao_adaptive_target; + settings.blur_passes = ssao_blur_passes; + settings.fadeout_from = ssao_fadeout_from; + settings.fadeout_to = ssao_fadeout_to; + settings.full_screen_size = Size2i(rb->width, rb->height); + settings.half_screen_size = Size2i(buffer_width, buffer_height); + settings.quarter_screen_size = Size2i(half_width, half_height); + + storage->get_effects()->generate_ssao(rb->depth_texture, p_normal_buffer, rb->ssao.depth, rb->ssao.depth_slices, rb->ssao.ao_deinterleaved, rb->ssao.ao_deinterleaved_slices, rb->ssao.ao_pong, rb->ssao.ao_pong_slices, rb->ssao.ao_final, rb->ssao.importance_map[0], rb->ssao.importance_map[1], p_projection, settings, uniform_sets_are_invalid); +} + +void RendererSceneRenderRD::_render_buffers_post_process_and_tonemap(RID p_render_buffers, RID p_environment, RID p_camera_effects, const CameraMatrix &p_projection) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(!rb); + + Environment *env = environment_owner.getornull(p_environment); + //glow (if enabled) + CameraEffects *camfx = camera_effects_owner.getornull(p_camera_effects); + + bool can_use_effects = rb->width >= 8 && rb->height >= 8; + + if (can_use_effects && camfx && (camfx->dof_blur_near_enabled || camfx->dof_blur_far_enabled) && camfx->dof_blur_amount > 0.0) { + if (rb->blur[0].texture.is_null()) { + _allocate_blur_textures(rb); + _render_buffers_uniform_set_changed(p_render_buffers); + } + + float bokeh_size = camfx->dof_blur_amount * 64.0; + storage->get_effects()->bokeh_dof(rb->texture, rb->depth_texture, Size2i(rb->width, rb->height), rb->blur[0].mipmaps[0].texture, rb->blur[1].mipmaps[0].texture, rb->blur[0].mipmaps[1].texture, camfx->dof_blur_far_enabled, camfx->dof_blur_far_distance, camfx->dof_blur_far_transition, camfx->dof_blur_near_enabled, camfx->dof_blur_near_distance, camfx->dof_blur_near_transition, bokeh_size, dof_blur_bokeh_shape, dof_blur_quality, dof_blur_use_jitter, p_projection.get_z_near(), p_projection.get_z_far(), p_projection.is_orthogonal()); + } + + if (can_use_effects && env && env->auto_exposure) { + if (rb->luminance.current.is_null()) { + _allocate_luminance_textures(rb); + _render_buffers_uniform_set_changed(p_render_buffers); + } + + bool set_immediate = env->auto_exposure_version != rb->auto_exposure_version; + rb->auto_exposure_version = env->auto_exposure_version; + + double step = env->auto_exp_speed * time_step; + storage->get_effects()->luminance_reduction(rb->texture, Size2i(rb->width, rb->height), rb->luminance.reduce, rb->luminance.current, env->min_luminance, env->max_luminance, step, set_immediate); + + //swap final reduce with prev luminance + SWAP(rb->luminance.current, rb->luminance.reduce.write[rb->luminance.reduce.size() - 1]); + RenderingServerDefault::redraw_request(); //redraw all the time if auto exposure rendering is on + } + + int max_glow_level = -1; + + if (can_use_effects && env && env->glow_enabled) { + /* see that blur textures are allocated */ + + if (rb->blur[1].texture.is_null()) { + _allocate_blur_textures(rb); + _render_buffers_uniform_set_changed(p_render_buffers); + } + + for (int i = 0; i < RS::MAX_GLOW_LEVELS; i++) { + if (env->glow_levels[i] > 0.0) { + if (i >= rb->blur[1].mipmaps.size()) { + max_glow_level = rb->blur[1].mipmaps.size() - 1; + } else { + max_glow_level = i; + } + } + } + + for (int i = 0; i < (max_glow_level + 1); i++) { + int vp_w = rb->blur[1].mipmaps[i].width; + int vp_h = rb->blur[1].mipmaps[i].height; + + if (i == 0) { + RID luminance_texture; + if (env->auto_exposure && rb->luminance.current.is_valid()) { + luminance_texture = rb->luminance.current; + } + storage->get_effects()->gaussian_glow(rb->texture, rb->blur[1].mipmaps[i].texture, Size2i(vp_w, vp_h), env->glow_strength, glow_high_quality, true, env->glow_hdr_luminance_cap, env->exposure, env->glow_bloom, env->glow_hdr_bleed_threshold, env->glow_hdr_bleed_scale, luminance_texture, env->auto_exp_scale); + } else { + storage->get_effects()->gaussian_glow(rb->blur[1].mipmaps[i - 1].texture, rb->blur[1].mipmaps[i].texture, Size2i(vp_w, vp_h), env->glow_strength, glow_high_quality); + } + } + } + + { + //tonemap + EffectsRD::TonemapSettings tonemap; + + if (can_use_effects && env && env->auto_exposure && rb->luminance.current.is_valid()) { + tonemap.use_auto_exposure = true; + tonemap.exposure_texture = rb->luminance.current; + tonemap.auto_exposure_grey = env->auto_exp_scale; + } else { + tonemap.exposure_texture = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE); + } + + if (can_use_effects && env && env->glow_enabled) { + tonemap.use_glow = true; + tonemap.glow_mode = EffectsRD::TonemapSettings::GlowMode(env->glow_blend_mode); + tonemap.glow_intensity = env->glow_blend_mode == RS::ENV_GLOW_BLEND_MODE_MIX ? env->glow_mix : env->glow_intensity; + for (int i = 0; i < RS::MAX_GLOW_LEVELS; i++) { + tonemap.glow_levels[i] = env->glow_levels[i]; + } + tonemap.glow_texture_size.x = rb->blur[1].mipmaps[0].width; + tonemap.glow_texture_size.y = rb->blur[1].mipmaps[0].height; + tonemap.glow_use_bicubic_upscale = glow_bicubic_upscale; + tonemap.glow_texture = rb->blur[1].texture; + } else { + tonemap.glow_texture = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); + } + + if (rb->screen_space_aa == RS::VIEWPORT_SCREEN_SPACE_AA_FXAA) { + tonemap.use_fxaa = true; + } + + tonemap.use_debanding = rb->use_debanding; + tonemap.texture_size = Vector2i(rb->width, rb->height); + + if (env) { + tonemap.tonemap_mode = env->tone_mapper; + tonemap.white = env->white; + tonemap.exposure = env->exposure; + } + + tonemap.use_color_correction = false; + tonemap.use_1d_color_correction = false; + tonemap.color_correction_texture = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE); + + if (can_use_effects && env) { + tonemap.use_bcs = env->adjustments_enabled; + tonemap.brightness = env->adjustments_brightness; + tonemap.contrast = env->adjustments_contrast; + tonemap.saturation = env->adjustments_saturation; + if (env->adjustments_enabled && env->color_correction.is_valid()) { + tonemap.use_color_correction = true; + tonemap.use_1d_color_correction = env->use_1d_color_correction; + tonemap.color_correction_texture = storage->texture_get_rd_texture(env->color_correction); + } + } + + storage->get_effects()->tonemapper(rb->texture, storage->render_target_get_rd_framebuffer(rb->render_target), tonemap); + } + + storage->render_target_disable_clear_request(rb->render_target); +} + +void RendererSceneRenderRD::_render_buffers_debug_draw(RID p_render_buffers, RID p_shadow_atlas) { + EffectsRD *effects = storage->get_effects(); + + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(!rb); + + if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_SHADOW_ATLAS) { + if (p_shadow_atlas.is_valid()) { + RID shadow_atlas_texture = shadow_atlas_get_texture(p_shadow_atlas); + Size2 rtsize = storage->render_target_get_size(rb->render_target); + + effects->copy_to_fb_rect(shadow_atlas_texture, storage->render_target_get_rd_framebuffer(rb->render_target), Rect2i(Vector2(), rtsize / 2), false, true); + } + } + + if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_DIRECTIONAL_SHADOW_ATLAS) { + if (directional_shadow_get_texture().is_valid()) { + RID shadow_atlas_texture = directional_shadow_get_texture(); + Size2 rtsize = storage->render_target_get_size(rb->render_target); + + effects->copy_to_fb_rect(shadow_atlas_texture, storage->render_target_get_rd_framebuffer(rb->render_target), Rect2i(Vector2(), rtsize / 2), false, true); + } + } + + if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_DECAL_ATLAS) { + RID decal_atlas = storage->decal_atlas_get_texture(); + + if (decal_atlas.is_valid()) { + Size2 rtsize = storage->render_target_get_size(rb->render_target); + + effects->copy_to_fb_rect(decal_atlas, storage->render_target_get_rd_framebuffer(rb->render_target), Rect2i(Vector2(), rtsize / 2), false, false, true); + } + } + + if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_SCENE_LUMINANCE) { + if (rb->luminance.current.is_valid()) { + Size2 rtsize = storage->render_target_get_size(rb->render_target); + + effects->copy_to_fb_rect(rb->luminance.current, storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize / 8), false, true); + } + } + + if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_SSAO && rb->ssao.ao_final.is_valid()) { + Size2 rtsize = storage->render_target_get_size(rb->render_target); + RID ao_buf = rb->ssao.ao_final; + effects->copy_to_fb_rect(ao_buf, storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), false, true); + } + + if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_NORMAL_BUFFER && _render_buffers_get_normal_texture(p_render_buffers).is_valid()) { + Size2 rtsize = storage->render_target_get_size(rb->render_target); + effects->copy_to_fb_rect(_render_buffers_get_normal_texture(p_render_buffers), storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), false, false); + } + + if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_GI_BUFFER && rb->ambient_buffer.is_valid()) { + Size2 rtsize = storage->render_target_get_size(rb->render_target); + RID ambient_texture = rb->ambient_buffer; + RID reflection_texture = rb->reflection_buffer; + effects->copy_to_fb_rect(ambient_texture, storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), false, false, false, true, reflection_texture); + } +} + +void RendererSceneRenderRD::environment_set_adjustment(RID p_env, bool p_enable, float p_brightness, float p_contrast, float p_saturation, bool p_use_1d_color_correction, RID p_color_correction) { + Environment *env = environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + + env->adjustments_enabled = p_enable; + env->adjustments_brightness = p_brightness; + env->adjustments_contrast = p_contrast; + env->adjustments_saturation = p_saturation; + env->use_1d_color_correction = p_use_1d_color_correction; + env->color_correction = p_color_correction; +} + +void RendererSceneRenderRD::_sdfgi_debug_draw(RID p_render_buffers, const CameraMatrix &p_projection, const Transform &p_transform) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(!rb); + + if (!rb->sdfgi) { + return; //eh + } + + if (!rb->sdfgi->debug_uniform_set.is_valid() || !RD::get_singleton()->uniform_set_is_valid(rb->sdfgi->debug_uniform_set)) { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t i = 0; i < SDFGI::MAX_CASCADES; i++) { + if (i < rb->sdfgi->cascades.size()) { + u.ids.push_back(rb->sdfgi->cascades[i].sdf_tex); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 2; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t i = 0; i < SDFGI::MAX_CASCADES; i++) { + if (i < rb->sdfgi->cascades.size()) { + u.ids.push_back(rb->sdfgi->cascades[i].light_tex); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 3; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t i = 0; i < SDFGI::MAX_CASCADES; i++) { + if (i < rb->sdfgi->cascades.size()) { + u.ids.push_back(rb->sdfgi->cascades[i].light_aniso_0_tex); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 4; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t i = 0; i < SDFGI::MAX_CASCADES; i++) { + if (i < rb->sdfgi->cascades.size()) { + u.ids.push_back(rb->sdfgi->cascades[i].light_aniso_1_tex); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 5; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.ids.push_back(rb->sdfgi->occlusion_texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 8; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 9; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.ids.push_back(rb->sdfgi->cascades_ubo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 10; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.ids.push_back(rb->texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 11; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.ids.push_back(rb->sdfgi->lightprobe_texture); + uniforms.push_back(u); + } + rb->sdfgi->debug_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.debug_shader_version, 0); + } + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.debug_pipeline); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->debug_uniform_set, 0); + + SDGIShader::DebugPushConstant push_constant; + push_constant.grid_size[0] = rb->sdfgi->cascade_size; + push_constant.grid_size[1] = rb->sdfgi->cascade_size; + push_constant.grid_size[2] = rb->sdfgi->cascade_size; + push_constant.max_cascades = rb->sdfgi->cascades.size(); + push_constant.screen_size[0] = rb->width; + push_constant.screen_size[1] = rb->height; + push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; + push_constant.use_occlusion = rb->sdfgi->uses_occlusion; + push_constant.y_mult = rb->sdfgi->y_mult; + + Vector2 vp_half = p_projection.get_viewport_half_extents(); + push_constant.cam_extent[0] = vp_half.x; + push_constant.cam_extent[1] = vp_half.y; + push_constant.cam_extent[2] = -p_projection.get_z_near(); + + push_constant.cam_transform[0] = p_transform.basis.elements[0][0]; + push_constant.cam_transform[1] = p_transform.basis.elements[1][0]; + push_constant.cam_transform[2] = p_transform.basis.elements[2][0]; + push_constant.cam_transform[3] = 0; + push_constant.cam_transform[4] = p_transform.basis.elements[0][1]; + push_constant.cam_transform[5] = p_transform.basis.elements[1][1]; + push_constant.cam_transform[6] = p_transform.basis.elements[2][1]; + push_constant.cam_transform[7] = 0; + push_constant.cam_transform[8] = p_transform.basis.elements[0][2]; + push_constant.cam_transform[9] = p_transform.basis.elements[1][2]; + push_constant.cam_transform[10] = p_transform.basis.elements[2][2]; + push_constant.cam_transform[11] = 0; + push_constant.cam_transform[12] = p_transform.origin.x; + push_constant.cam_transform[13] = p_transform.origin.y; + push_constant.cam_transform[14] = p_transform.origin.z; + push_constant.cam_transform[15] = 1; + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::DebugPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width, rb->height, 1); + RD::get_singleton()->compute_list_end(); + + Size2 rtsize = storage->render_target_get_size(rb->render_target); + storage->get_effects()->copy_to_fb_rect(rb->texture, storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), true); +} + +RID RendererSceneRenderRD::render_buffers_get_back_buffer_texture(RID p_render_buffers) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, RID()); + if (!rb->blur[0].texture.is_valid()) { + return RID(); //not valid at the moment + } + return rb->blur[0].texture; +} + +RID RendererSceneRenderRD::render_buffers_get_ao_texture(RID p_render_buffers) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, RID()); + + return rb->ssao.ao_final; +} + +RID RendererSceneRenderRD::render_buffers_get_gi_probe_buffer(RID p_render_buffers) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, RID()); + if (rb->giprobe_buffer.is_null()) { + rb->giprobe_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(GI::GIProbeData) * RenderBuffers::MAX_GIPROBES); + } + return rb->giprobe_buffer; +} + +RID RendererSceneRenderRD::render_buffers_get_default_gi_probe_buffer() { + return default_giprobe_buffer; +} + +RID RendererSceneRenderRD::render_buffers_get_gi_ambient_texture(RID p_render_buffers) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, RID()); + return rb->ambient_buffer; +} +RID RendererSceneRenderRD::render_buffers_get_gi_reflection_texture(RID p_render_buffers) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, RID()); + return rb->reflection_buffer; +} + +uint32_t RendererSceneRenderRD::render_buffers_get_sdfgi_cascade_count(RID p_render_buffers) const { + const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, 0); + ERR_FAIL_COND_V(!rb->sdfgi, 0); + + return rb->sdfgi->cascades.size(); +} +bool RendererSceneRenderRD::render_buffers_is_sdfgi_enabled(RID p_render_buffers) const { + const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, false); + + return rb->sdfgi != nullptr; +} +RID RendererSceneRenderRD::render_buffers_get_sdfgi_irradiance_probes(RID p_render_buffers) const { + const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, RID()); + ERR_FAIL_COND_V(!rb->sdfgi, RID()); + + return rb->sdfgi->lightprobe_texture; +} + +Vector3 RendererSceneRenderRD::render_buffers_get_sdfgi_cascade_offset(RID p_render_buffers, uint32_t p_cascade) const { + const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, Vector3()); + ERR_FAIL_COND_V(!rb->sdfgi, Vector3()); + ERR_FAIL_UNSIGNED_INDEX_V(p_cascade, rb->sdfgi->cascades.size(), Vector3()); + + return Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + rb->sdfgi->cascades[p_cascade].position)) * rb->sdfgi->cascades[p_cascade].cell_size; +} + +Vector3i RendererSceneRenderRD::render_buffers_get_sdfgi_cascade_probe_offset(RID p_render_buffers, uint32_t p_cascade) const { + const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, Vector3i()); + ERR_FAIL_COND_V(!rb->sdfgi, Vector3i()); + ERR_FAIL_UNSIGNED_INDEX_V(p_cascade, rb->sdfgi->cascades.size(), Vector3i()); + int32_t probe_divisor = rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR; + + return rb->sdfgi->cascades[p_cascade].position / probe_divisor; +} + +float RendererSceneRenderRD::render_buffers_get_sdfgi_normal_bias(RID p_render_buffers) const { + const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, 0); + ERR_FAIL_COND_V(!rb->sdfgi, 0); + + return rb->sdfgi->normal_bias; +} +float RendererSceneRenderRD::render_buffers_get_sdfgi_cascade_probe_size(RID p_render_buffers, uint32_t p_cascade) const { + const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, 0); + ERR_FAIL_COND_V(!rb->sdfgi, 0); + ERR_FAIL_UNSIGNED_INDEX_V(p_cascade, rb->sdfgi->cascades.size(), 0); + + return float(rb->sdfgi->cascade_size) * rb->sdfgi->cascades[p_cascade].cell_size / float(rb->sdfgi->probe_axis_count - 1); +} +uint32_t RendererSceneRenderRD::render_buffers_get_sdfgi_cascade_probe_count(RID p_render_buffers) const { + const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, 0); + ERR_FAIL_COND_V(!rb->sdfgi, 0); + + return rb->sdfgi->probe_axis_count; +} + +uint32_t RendererSceneRenderRD::render_buffers_get_sdfgi_cascade_size(RID p_render_buffers) const { + const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, 0); + ERR_FAIL_COND_V(!rb->sdfgi, 0); + + return rb->sdfgi->cascade_size; +} + +bool RendererSceneRenderRD::render_buffers_is_sdfgi_using_occlusion(RID p_render_buffers) const { + const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, false); + ERR_FAIL_COND_V(!rb->sdfgi, false); + + return rb->sdfgi->uses_occlusion; +} + +float RendererSceneRenderRD::render_buffers_get_sdfgi_energy(RID p_render_buffers) const { + const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, 0.0); + ERR_FAIL_COND_V(!rb->sdfgi, 0.0); + + return rb->sdfgi->energy; +} +RID RendererSceneRenderRD::render_buffers_get_sdfgi_occlusion_texture(RID p_render_buffers) const { + const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, RID()); + ERR_FAIL_COND_V(!rb->sdfgi, RID()); + + return rb->sdfgi->occlusion_texture; +} + +bool RendererSceneRenderRD::render_buffers_has_volumetric_fog(RID p_render_buffers) const { + const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, false); + + return rb->volumetric_fog != nullptr; +} +RID RendererSceneRenderRD::render_buffers_get_volumetric_fog_texture(RID p_render_buffers) { + const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb || !rb->volumetric_fog, RID()); + + return rb->volumetric_fog->fog_map; +} + +RID RendererSceneRenderRD::render_buffers_get_volumetric_fog_sky_uniform_set(RID p_render_buffers) { + const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, RID()); + + if (!rb->volumetric_fog) { + return RID(); + } + + return rb->volumetric_fog->sky_uniform_set; +} + +float RendererSceneRenderRD::render_buffers_get_volumetric_fog_end(RID p_render_buffers) { + const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb || !rb->volumetric_fog, 0); + return rb->volumetric_fog->length; +} +float RendererSceneRenderRD::render_buffers_get_volumetric_fog_detail_spread(RID p_render_buffers) { + const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb || !rb->volumetric_fog, 0); + return rb->volumetric_fog->spread; +} + +void RendererSceneRenderRD::render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_width, int p_height, RS::ViewportMSAA p_msaa, RenderingServer::ViewportScreenSpaceAA p_screen_space_aa, bool p_use_debanding) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + rb->width = p_width; + rb->height = p_height; + rb->render_target = p_render_target; + rb->msaa = p_msaa; + rb->screen_space_aa = p_screen_space_aa; + rb->use_debanding = p_use_debanding; + if (rb->cluster_builder == nullptr) { + rb->cluster_builder = memnew(ClusterBuilderRD); + } + rb->cluster_builder->set_shared(&cluster_builder_shared); + + _free_render_buffer_data(rb); + + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf.width = rb->width; + tf.height = rb->height; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + if (rb->msaa != RS::VIEWPORT_MSAA_DISABLED) { + tf.usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + } else { + tf.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + } + + rb->texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); + } + + { + RD::TextureFormat tf; + if (rb->msaa == RS::VIEWPORT_MSAA_DISABLED) { + tf.format = RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_D24_UNORM_S8_UINT, RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ? RD::DATA_FORMAT_D24_UNORM_S8_UINT : RD::DATA_FORMAT_D32_SFLOAT_S8_UINT; + } else { + tf.format = RD::DATA_FORMAT_R32_SFLOAT; + } + + tf.width = p_width; + tf.height = p_height; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT; + + if (rb->msaa != RS::VIEWPORT_MSAA_DISABLED) { + tf.usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + } else { + tf.usage_bits |= RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + } + + rb->depth_texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); + } + + rb->data->configure(rb->texture, rb->depth_texture, p_width, p_height, p_msaa); + _render_buffers_uniform_set_changed(p_render_buffers); + + rb->cluster_builder->setup(Size2i(p_width, p_height), max_cluster_elements, rb->depth_texture, storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED), rb->texture); +} + +void RendererSceneRenderRD::gi_set_use_half_resolution(bool p_enable) { + gi.half_resolution = p_enable; +} + +void RendererSceneRenderRD::sub_surface_scattering_set_quality(RS::SubSurfaceScatteringQuality p_quality) { + sss_quality = p_quality; +} + +RS::SubSurfaceScatteringQuality RendererSceneRenderRD::sub_surface_scattering_get_quality() const { + return sss_quality; +} + +void RendererSceneRenderRD::sub_surface_scattering_set_scale(float p_scale, float p_depth_scale) { + sss_scale = p_scale; + sss_depth_scale = p_depth_scale; +} + +void RendererSceneRenderRD::shadows_quality_set(RS::ShadowQuality p_quality) { + ERR_FAIL_INDEX_MSG(p_quality, RS::SHADOW_QUALITY_MAX, "Shadow quality too high, please see RenderingServer's ShadowQuality enum"); + + if (shadows_quality != p_quality) { + shadows_quality = p_quality; + + switch (shadows_quality) { + case RS::SHADOW_QUALITY_HARD: { + penumbra_shadow_samples = 4; + soft_shadow_samples = 1; + shadows_quality_radius = 1.0; + } break; + case RS::SHADOW_QUALITY_SOFT_LOW: { + penumbra_shadow_samples = 8; + soft_shadow_samples = 4; + shadows_quality_radius = 2.0; + } break; + case RS::SHADOW_QUALITY_SOFT_MEDIUM: { + penumbra_shadow_samples = 12; + soft_shadow_samples = 8; + shadows_quality_radius = 2.0; + } break; + case RS::SHADOW_QUALITY_SOFT_HIGH: { + penumbra_shadow_samples = 24; + soft_shadow_samples = 16; + shadows_quality_radius = 3.0; + } break; + case RS::SHADOW_QUALITY_SOFT_ULTRA: { + penumbra_shadow_samples = 32; + soft_shadow_samples = 32; + shadows_quality_radius = 4.0; + } break; + case RS::SHADOW_QUALITY_MAX: + break; + } + get_vogel_disk(penumbra_shadow_kernel, penumbra_shadow_samples); + get_vogel_disk(soft_shadow_kernel, soft_shadow_samples); + } +} + +void RendererSceneRenderRD::directional_shadow_quality_set(RS::ShadowQuality p_quality) { + ERR_FAIL_INDEX_MSG(p_quality, RS::SHADOW_QUALITY_MAX, "Shadow quality too high, please see RenderingServer's ShadowQuality enum"); + + if (directional_shadow_quality != p_quality) { + directional_shadow_quality = p_quality; + + switch (directional_shadow_quality) { + case RS::SHADOW_QUALITY_HARD: { + directional_penumbra_shadow_samples = 4; + directional_soft_shadow_samples = 1; + directional_shadow_quality_radius = 1.0; + } break; + case RS::SHADOW_QUALITY_SOFT_LOW: { + directional_penumbra_shadow_samples = 8; + directional_soft_shadow_samples = 4; + directional_shadow_quality_radius = 2.0; + } break; + case RS::SHADOW_QUALITY_SOFT_MEDIUM: { + directional_penumbra_shadow_samples = 12; + directional_soft_shadow_samples = 8; + directional_shadow_quality_radius = 2.0; + } break; + case RS::SHADOW_QUALITY_SOFT_HIGH: { + directional_penumbra_shadow_samples = 24; + directional_soft_shadow_samples = 16; + directional_shadow_quality_radius = 3.0; + } break; + case RS::SHADOW_QUALITY_SOFT_ULTRA: { + directional_penumbra_shadow_samples = 32; + directional_soft_shadow_samples = 32; + directional_shadow_quality_radius = 4.0; + } break; + case RS::SHADOW_QUALITY_MAX: + break; + } + get_vogel_disk(directional_penumbra_shadow_kernel, directional_penumbra_shadow_samples); + get_vogel_disk(directional_soft_shadow_kernel, directional_soft_shadow_samples); + } +} + +int RendererSceneRenderRD::get_roughness_layers() const { + return roughness_layers; +} + +bool RendererSceneRenderRD::is_using_radiance_cubemap_array() const { + return sky_use_cubemap_array; +} + +RendererSceneRenderRD::RenderBufferData *RendererSceneRenderRD::render_buffers_get_data(RID p_render_buffers) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND_V(!rb, nullptr); + return rb->data; +} + +void RendererSceneRenderRD::_setup_reflections(const PagedArray<RID> &p_reflections, const Transform &p_camera_inverse_transform, RID p_environment) { + cluster.reflection_count = 0; + + for (uint32_t i = 0; i < (uint32_t)p_reflections.size(); i++) { + if (cluster.reflection_count == cluster.max_reflections) { + break; + } + + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_reflections[i]); + if (!rpi) { + continue; + } + + cluster.reflection_sort[cluster.reflection_count].instance = rpi; + cluster.reflection_sort[cluster.reflection_count].depth = -p_camera_inverse_transform.xform(rpi->transform.origin).z; + cluster.reflection_count++; + } + + if (cluster.reflection_count > 0) { + SortArray<Cluster::InstanceSort<ReflectionProbeInstance>> sort_array; + sort_array.sort(cluster.reflection_sort, cluster.reflection_count); + } + + for (uint32_t i = 0; i < cluster.reflection_count; i++) { + ReflectionProbeInstance *rpi = cluster.reflection_sort[i].instance; + + rpi->render_index = i; + + RID base_probe = rpi->probe; + + Cluster::ReflectionData &reflection_ubo = cluster.reflections[i]; + + Vector3 extents = storage->reflection_probe_get_extents(base_probe); + + reflection_ubo.box_extents[0] = extents.x; + reflection_ubo.box_extents[1] = extents.y; + reflection_ubo.box_extents[2] = extents.z; + reflection_ubo.index = rpi->atlas_index; + + Vector3 origin_offset = storage->reflection_probe_get_origin_offset(base_probe); + + reflection_ubo.box_offset[0] = origin_offset.x; + reflection_ubo.box_offset[1] = origin_offset.y; + reflection_ubo.box_offset[2] = origin_offset.z; + reflection_ubo.mask = storage->reflection_probe_get_cull_mask(base_probe); + + reflection_ubo.intensity = storage->reflection_probe_get_intensity(base_probe); + reflection_ubo.ambient_mode = storage->reflection_probe_get_ambient_mode(base_probe); + + reflection_ubo.exterior = !storage->reflection_probe_is_interior(base_probe); + reflection_ubo.box_project = storage->reflection_probe_is_box_projection(base_probe); + + Color ambient_linear = storage->reflection_probe_get_ambient_color(base_probe).to_linear(); + float interior_ambient_energy = storage->reflection_probe_get_ambient_color_energy(base_probe); + reflection_ubo.ambient[0] = ambient_linear.r * interior_ambient_energy; + reflection_ubo.ambient[1] = ambient_linear.g * interior_ambient_energy; + reflection_ubo.ambient[2] = ambient_linear.b * interior_ambient_energy; + + Transform transform = rpi->transform; + Transform proj = (p_camera_inverse_transform * transform).inverse(); + RendererStorageRD::store_transform(proj, reflection_ubo.local_matrix); + + current_cluster_builder->add_box(ClusterBuilderRD::BOX_TYPE_REFLECTION_PROBE, transform, extents); + + rpi->last_pass = RSG::rasterizer->get_frame_number(); + } + + if (cluster.reflection_count) { + RD::get_singleton()->buffer_update(cluster.reflection_buffer, 0, cluster.reflection_count * sizeof(ReflectionData), cluster.reflections, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + } +} + +void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const Transform &p_camera_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count) { + Transform inverse_transform = p_camera_transform.affine_inverse(); + + r_directional_light_count = 0; + r_positional_light_count = 0; + sky_scene_state.ubo.directional_light_count = 0; + + Plane camera_plane(p_camera_transform.origin, -p_camera_transform.basis.get_axis(Vector3::AXIS_Z).normalized()); + + cluster.omni_light_count = 0; + cluster.spot_light_count = 0; + + for (int i = 0; i < (int)p_lights.size(); i++) { + LightInstance *li = light_instance_owner.getornull(p_lights[i]); + if (!li) { + continue; + } + RID base = li->light; + + ERR_CONTINUE(base.is_null()); + + RS::LightType type = storage->light_get_type(base); + switch (type) { + case RS::LIGHT_DIRECTIONAL: { + // Copy to SkyDirectionalLightData + if (r_directional_light_count < sky_scene_state.max_directional_lights) { + SkyDirectionalLightData &sky_light_data = sky_scene_state.directional_lights[r_directional_light_count]; + Transform light_transform = li->transform; + Vector3 world_direction = light_transform.basis.xform(Vector3(0, 0, 1)).normalized(); + + sky_light_data.direction[0] = world_direction.x; + sky_light_data.direction[1] = world_direction.y; + sky_light_data.direction[2] = -world_direction.z; + + float sign = storage->light_is_negative(base) ? -1 : 1; + sky_light_data.energy = sign * storage->light_get_param(base, RS::LIGHT_PARAM_ENERGY); + + Color linear_col = storage->light_get_color(base).to_linear(); + sky_light_data.color[0] = linear_col.r; + sky_light_data.color[1] = linear_col.g; + sky_light_data.color[2] = linear_col.b; + + sky_light_data.enabled = true; + + float angular_diameter = storage->light_get_param(base, RS::LIGHT_PARAM_SIZE); + if (angular_diameter > 0.0) { + // I know tan(0) is 0, but let's not risk it with numerical precision. + // technically this will keep expanding until reaching the sun, but all we care + // is expand until we reach the radius of the near plane (there can't be more occluders than that) + angular_diameter = Math::tan(Math::deg2rad(angular_diameter)); + } else { + angular_diameter = 0.0; + } + sky_light_data.size = angular_diameter; + sky_scene_state.ubo.directional_light_count++; + } + + if (r_directional_light_count >= cluster.max_directional_lights || storage->light_directional_is_sky_only(base)) { + continue; + } + + Cluster::DirectionalLightData &light_data = cluster.directional_lights[r_directional_light_count]; + + Transform light_transform = li->transform; + + Vector3 direction = inverse_transform.basis.xform(light_transform.basis.xform(Vector3(0, 0, 1))).normalized(); + + light_data.direction[0] = direction.x; + light_data.direction[1] = direction.y; + light_data.direction[2] = direction.z; + + float sign = storage->light_is_negative(base) ? -1 : 1; + + light_data.energy = sign * storage->light_get_param(base, RS::LIGHT_PARAM_ENERGY) * Math_PI; + + Color linear_col = storage->light_get_color(base).to_linear(); + light_data.color[0] = linear_col.r; + light_data.color[1] = linear_col.g; + light_data.color[2] = linear_col.b; + + light_data.specular = storage->light_get_param(base, RS::LIGHT_PARAM_SPECULAR); + light_data.mask = storage->light_get_cull_mask(base); + + float size = storage->light_get_param(base, RS::LIGHT_PARAM_SIZE); + + light_data.size = 1.0 - Math::cos(Math::deg2rad(size)); //angle to cosine offset + + Color shadow_col = storage->light_get_shadow_color(base).to_linear(); + + if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_PSSM_SPLITS) { + light_data.shadow_color1[0] = 1.0; + light_data.shadow_color1[1] = 0.0; + light_data.shadow_color1[2] = 0.0; + light_data.shadow_color1[3] = 1.0; + light_data.shadow_color2[0] = 0.0; + light_data.shadow_color2[1] = 1.0; + light_data.shadow_color2[2] = 0.0; + light_data.shadow_color2[3] = 1.0; + light_data.shadow_color3[0] = 0.0; + light_data.shadow_color3[1] = 0.0; + light_data.shadow_color3[2] = 1.0; + light_data.shadow_color3[3] = 1.0; + light_data.shadow_color4[0] = 1.0; + light_data.shadow_color4[1] = 1.0; + light_data.shadow_color4[2] = 0.0; + light_data.shadow_color4[3] = 1.0; + + } else { + light_data.shadow_color1[0] = shadow_col.r; + light_data.shadow_color1[1] = shadow_col.g; + light_data.shadow_color1[2] = shadow_col.b; + light_data.shadow_color1[3] = 1.0; + light_data.shadow_color2[0] = shadow_col.r; + light_data.shadow_color2[1] = shadow_col.g; + light_data.shadow_color2[2] = shadow_col.b; + light_data.shadow_color2[3] = 1.0; + light_data.shadow_color3[0] = shadow_col.r; + light_data.shadow_color3[1] = shadow_col.g; + light_data.shadow_color3[2] = shadow_col.b; + light_data.shadow_color3[3] = 1.0; + light_data.shadow_color4[0] = shadow_col.r; + light_data.shadow_color4[1] = shadow_col.g; + light_data.shadow_color4[2] = shadow_col.b; + light_data.shadow_color4[3] = 1.0; + } + + light_data.shadow_enabled = p_using_shadows && storage->light_has_shadow(base); + + float angular_diameter = storage->light_get_param(base, RS::LIGHT_PARAM_SIZE); + if (angular_diameter > 0.0) { + // I know tan(0) is 0, but let's not risk it with numerical precision. + // technically this will keep expanding until reaching the sun, but all we care + // is expand until we reach the radius of the near plane (there can't be more occluders than that) + angular_diameter = Math::tan(Math::deg2rad(angular_diameter)); + } else { + angular_diameter = 0.0; + } + + if (light_data.shadow_enabled) { + RS::LightDirectionalShadowMode smode = storage->light_directional_get_shadow_mode(base); + + int limit = smode == RS::LIGHT_DIRECTIONAL_SHADOW_ORTHOGONAL ? 0 : (smode == RS::LIGHT_DIRECTIONAL_SHADOW_PARALLEL_2_SPLITS ? 1 : 3); + light_data.blend_splits = storage->light_directional_get_blend_splits(base); + for (int j = 0; j < 4; j++) { + Rect2 atlas_rect = li->shadow_transform[j].atlas_rect; + CameraMatrix matrix = li->shadow_transform[j].camera; + float split = li->shadow_transform[MIN(limit, j)].split; + + CameraMatrix bias; + bias.set_light_bias(); + CameraMatrix rectm; + rectm.set_light_atlas_rect(atlas_rect); + + Transform modelview = (inverse_transform * li->shadow_transform[j].transform).inverse(); + + CameraMatrix shadow_mtx = rectm * bias * matrix * modelview; + light_data.shadow_split_offsets[j] = split; + float bias_scale = li->shadow_transform[j].bias_scale; + light_data.shadow_bias[j] = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * bias_scale; + light_data.shadow_normal_bias[j] = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * li->shadow_transform[j].shadow_texel_size; + light_data.shadow_transmittance_bias[j] = storage->light_get_transmittance_bias(base) * bias_scale; + light_data.shadow_z_range[j] = li->shadow_transform[j].farplane; + light_data.shadow_range_begin[j] = li->shadow_transform[j].range_begin; + RendererStorageRD::store_camera(shadow_mtx, light_data.shadow_matrices[j]); + + Vector2 uv_scale = li->shadow_transform[j].uv_scale; + uv_scale *= atlas_rect.size; //adapt to atlas size + switch (j) { + case 0: { + light_data.uv_scale1[0] = uv_scale.x; + light_data.uv_scale1[1] = uv_scale.y; + } break; + case 1: { + light_data.uv_scale2[0] = uv_scale.x; + light_data.uv_scale2[1] = uv_scale.y; + } break; + case 2: { + light_data.uv_scale3[0] = uv_scale.x; + light_data.uv_scale3[1] = uv_scale.y; + } break; + case 3: { + light_data.uv_scale4[0] = uv_scale.x; + light_data.uv_scale4[1] = uv_scale.y; + } break; + } + } + + float fade_start = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_FADE_START); + light_data.fade_from = -light_data.shadow_split_offsets[3] * MIN(fade_start, 0.999); //using 1.0 would break smoothstep + light_data.fade_to = -light_data.shadow_split_offsets[3]; + light_data.shadow_volumetric_fog_fade = 1.0 / storage->light_get_shadow_volumetric_fog_fade(base); + + light_data.soft_shadow_scale = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BLUR); + light_data.softshadow_angle = angular_diameter; + + if (angular_diameter <= 0.0) { + light_data.soft_shadow_scale *= directional_shadow_quality_radius_get(); // Only use quality radius for PCF + } + } + + r_directional_light_count++; + } break; + case RS::LIGHT_OMNI: { + if (cluster.omni_light_count >= cluster.max_lights) { + continue; + } + + cluster.omni_light_sort[cluster.omni_light_count].instance = li; + cluster.omni_light_sort[cluster.omni_light_count].depth = camera_plane.distance_to(li->transform.origin); + cluster.omni_light_count++; + } break; + case RS::LIGHT_SPOT: { + if (cluster.spot_light_count >= cluster.max_lights) { + continue; + } + + cluster.spot_light_sort[cluster.spot_light_count].instance = li; + cluster.spot_light_sort[cluster.spot_light_count].depth = camera_plane.distance_to(li->transform.origin); + cluster.spot_light_count++; + } break; + } + + li->last_pass = RSG::rasterizer->get_frame_number(); + } + + if (cluster.omni_light_count) { + SortArray<Cluster::InstanceSort<LightInstance>> sorter; + sorter.sort(cluster.omni_light_sort, cluster.omni_light_count); + } + + if (cluster.spot_light_count) { + SortArray<Cluster::InstanceSort<LightInstance>> sorter; + sorter.sort(cluster.spot_light_sort, cluster.spot_light_count); + } + + ShadowAtlas *shadow_atlas = nullptr; + + if (p_shadow_atlas.is_valid() && p_using_shadows) { + shadow_atlas = shadow_atlas_owner.getornull(p_shadow_atlas); + } + + for (uint32_t i = 0; i < (cluster.omni_light_count + cluster.spot_light_count); i++) { + uint32_t index = (i < cluster.omni_light_count) ? i : i - (cluster.omni_light_count); + Cluster::LightData &light_data = (i < cluster.omni_light_count) ? cluster.omni_lights[index] : cluster.spot_lights[index]; + RS::LightType type = (i < cluster.omni_light_count) ? RS::LIGHT_OMNI : RS::LIGHT_SPOT; + LightInstance *li = (i < cluster.omni_light_count) ? cluster.omni_light_sort[index].instance : cluster.spot_light_sort[index].instance; + RID base = li->light; + + Transform light_transform = li->transform; + + float sign = storage->light_is_negative(base) ? -1 : 1; + Color linear_col = storage->light_get_color(base).to_linear(); + + light_data.attenuation = storage->light_get_param(base, RS::LIGHT_PARAM_ATTENUATION); + + float energy = sign * storage->light_get_param(base, RS::LIGHT_PARAM_ENERGY) * Math_PI; + + light_data.color[0] = linear_col.r * energy; + light_data.color[1] = linear_col.g * energy; + light_data.color[2] = linear_col.b * energy; + light_data.specular_amount = storage->light_get_param(base, RS::LIGHT_PARAM_SPECULAR) * 2.0; + + float radius = MAX(0.001, storage->light_get_param(base, RS::LIGHT_PARAM_RANGE)); + light_data.inv_radius = 1.0 / radius; + + Vector3 pos = inverse_transform.xform(light_transform.origin); + + light_data.position[0] = pos.x; + light_data.position[1] = pos.y; + light_data.position[2] = pos.z; + + Vector3 direction = inverse_transform.basis.xform(light_transform.basis.xform(Vector3(0, 0, -1))).normalized(); + + light_data.direction[0] = direction.x; + light_data.direction[1] = direction.y; + light_data.direction[2] = direction.z; + + float size = storage->light_get_param(base, RS::LIGHT_PARAM_SIZE); + + light_data.size = size; + + light_data.inv_spot_attenuation = 1.0f / storage->light_get_param(base, RS::LIGHT_PARAM_SPOT_ATTENUATION); + float spot_angle = storage->light_get_param(base, RS::LIGHT_PARAM_SPOT_ANGLE); + light_data.cos_spot_angle = Math::cos(Math::deg2rad(spot_angle)); + + light_data.mask = storage->light_get_cull_mask(base); + + light_data.atlas_rect[0] = 0; + light_data.atlas_rect[1] = 0; + light_data.atlas_rect[2] = 0; + light_data.atlas_rect[3] = 0; + + RID projector = storage->light_get_projector(base); + + if (projector.is_valid()) { + Rect2 rect = storage->decal_atlas_get_texture_rect(projector); + + if (type == RS::LIGHT_SPOT) { + light_data.projector_rect[0] = rect.position.x; + light_data.projector_rect[1] = rect.position.y + rect.size.height; //flip because shadow is flipped + light_data.projector_rect[2] = rect.size.width; + light_data.projector_rect[3] = -rect.size.height; + } else { + light_data.projector_rect[0] = rect.position.x; + light_data.projector_rect[1] = rect.position.y; + light_data.projector_rect[2] = rect.size.width; + light_data.projector_rect[3] = rect.size.height * 0.5; //used by dp, so needs to be half + } + } else { + light_data.projector_rect[0] = 0; + light_data.projector_rect[1] = 0; + light_data.projector_rect[2] = 0; + light_data.projector_rect[3] = 0; + } + + if (shadow_atlas && shadow_atlas->shadow_owners.has(li->self)) { + // fill in the shadow information + + light_data.shadow_enabled = true; + + if (type == RS::LIGHT_SPOT) { + light_data.shadow_bias = (storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * radius / 10.0); + float shadow_texel_size = Math::tan(Math::deg2rad(spot_angle)) * radius * 2.0; + shadow_texel_size *= light_instance_get_shadow_texel_size(li->self, p_shadow_atlas); + + light_data.shadow_normal_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * shadow_texel_size; + + } else { //omni + light_data.shadow_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * radius / 10.0; + float shadow_texel_size = light_instance_get_shadow_texel_size(li->self, p_shadow_atlas); + light_data.shadow_normal_bias = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS) * shadow_texel_size * 2.0; // applied in -1 .. 1 space + } + + light_data.transmittance_bias = storage->light_get_transmittance_bias(base); + + Rect2 rect = light_instance_get_shadow_atlas_rect(li->self, p_shadow_atlas); + + light_data.atlas_rect[0] = rect.position.x; + light_data.atlas_rect[1] = rect.position.y; + light_data.atlas_rect[2] = rect.size.width; + light_data.atlas_rect[3] = rect.size.height; + + light_data.soft_shadow_scale = storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BLUR); + light_data.shadow_volumetric_fog_fade = 1.0 / storage->light_get_shadow_volumetric_fog_fade(base); + + if (type == RS::LIGHT_OMNI) { + light_data.atlas_rect[3] *= 0.5; //one paraboloid on top of another + Transform proj = (inverse_transform * light_transform).inverse(); + + RendererStorageRD::store_transform(proj, light_data.shadow_matrix); + + if (size > 0.0) { + light_data.soft_shadow_size = size; + } else { + light_data.soft_shadow_size = 0.0; + light_data.soft_shadow_scale *= shadows_quality_radius_get(); // Only use quality radius for PCF + } + + } else if (type == RS::LIGHT_SPOT) { + Transform modelview = (inverse_transform * light_transform).inverse(); + CameraMatrix bias; + bias.set_light_bias(); + + CameraMatrix shadow_mtx = bias * li->shadow_transform[0].camera * modelview; + RendererStorageRD::store_camera(shadow_mtx, light_data.shadow_matrix); + + if (size > 0.0) { + CameraMatrix cm = li->shadow_transform[0].camera; + float half_np = cm.get_z_near() * Math::tan(Math::deg2rad(spot_angle)); + light_data.soft_shadow_size = (size * 0.5 / radius) / (half_np / cm.get_z_near()) * rect.size.width; + } else { + light_data.soft_shadow_size = 0.0; + light_data.soft_shadow_scale *= shadows_quality_radius_get(); // Only use quality radius for PCF + } + } + } else { + light_data.shadow_enabled = false; + } + + li->light_index = index; + + current_cluster_builder->add_light(type == RS::LIGHT_SPOT ? ClusterBuilderRD::LIGHT_TYPE_SPOT : ClusterBuilderRD::LIGHT_TYPE_OMNI, light_transform, radius, spot_angle); + + r_positional_light_count++; + } + + //update without barriers + if (cluster.omni_light_count) { + RD::get_singleton()->buffer_update(cluster.omni_light_buffer, 0, sizeof(Cluster::LightData) * cluster.omni_light_count, cluster.omni_lights, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + } + + if (cluster.spot_light_count) { + RD::get_singleton()->buffer_update(cluster.spot_light_buffer, 0, sizeof(Cluster::LightData) * cluster.spot_light_count, cluster.spot_lights, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + } + + if (r_directional_light_count) { + RD::get_singleton()->buffer_update(cluster.directional_light_buffer, 0, sizeof(Cluster::DirectionalLightData) * r_directional_light_count, cluster.directional_lights, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + } +} + +void RendererSceneRenderRD::_setup_decals(const PagedArray<RID> &p_decals, const Transform &p_camera_inverse_xform) { + Transform uv_xform; + uv_xform.basis.scale(Vector3(2.0, 1.0, 2.0)); + uv_xform.origin = Vector3(-1.0, 0.0, -1.0); + + uint32_t decal_count = p_decals.size(); + + cluster.decal_count = 0; + + for (uint32_t i = 0; i < decal_count; i++) { + if (cluster.decal_count == cluster.max_decals) { + break; + } + + DecalInstance *di = decal_instance_owner.getornull(p_decals[i]); + if (!di) { + continue; + } + RID decal = di->decal; + + Transform xform = di->transform; + + real_t distance = -p_camera_inverse_xform.xform(xform.origin).z; + + if (storage->decal_is_distance_fade_enabled(decal)) { + float fade_begin = storage->decal_get_distance_fade_begin(decal); + float fade_length = storage->decal_get_distance_fade_length(decal); + + if (distance > fade_begin) { + if (distance > fade_begin + fade_length) { + continue; // do not use this decal, its invisible + } + } + } + + cluster.decal_sort[cluster.decal_count].instance = di; + cluster.decal_sort[cluster.decal_count].depth = distance; + cluster.decal_count++; + } + + if (cluster.decal_count > 0) { + SortArray<Cluster::InstanceSort<DecalInstance>> sort_array; + sort_array.sort(cluster.decal_sort, cluster.decal_count); + } + + for (uint32_t i = 0; i < cluster.decal_count; i++) { + DecalInstance *di = cluster.decal_sort[i].instance; + RID decal = di->decal; + + Transform xform = di->transform; + float fade = 1.0; + + if (storage->decal_is_distance_fade_enabled(decal)) { + real_t distance = -p_camera_inverse_xform.xform(xform.origin).z; + float fade_begin = storage->decal_get_distance_fade_begin(decal); + float fade_length = storage->decal_get_distance_fade_length(decal); + + if (distance > fade_begin) { + fade = 1.0 - (distance - fade_begin) / fade_length; + } + } + + Cluster::DecalData &dd = cluster.decals[i]; + + Vector3 decal_extents = storage->decal_get_extents(decal); + + Transform scale_xform; + scale_xform.basis.scale(Vector3(decal_extents.x, decal_extents.y, decal_extents.z)); + Transform to_decal_xform = (p_camera_inverse_xform * di->transform * scale_xform * uv_xform).affine_inverse(); + RendererStorageRD::store_transform(to_decal_xform, dd.xform); + + Vector3 normal = xform.basis.get_axis(Vector3::AXIS_Y).normalized(); + normal = p_camera_inverse_xform.basis.xform(normal); //camera is normalized, so fine + + dd.normal[0] = normal.x; + dd.normal[1] = normal.y; + dd.normal[2] = normal.z; + dd.normal_fade = storage->decal_get_normal_fade(decal); + + RID albedo_tex = storage->decal_get_texture(decal, RS::DECAL_TEXTURE_ALBEDO); + RID emission_tex = storage->decal_get_texture(decal, RS::DECAL_TEXTURE_EMISSION); + if (albedo_tex.is_valid()) { + Rect2 rect = storage->decal_atlas_get_texture_rect(albedo_tex); + dd.albedo_rect[0] = rect.position.x; + dd.albedo_rect[1] = rect.position.y; + dd.albedo_rect[2] = rect.size.x; + dd.albedo_rect[3] = rect.size.y; + } else { + if (!emission_tex.is_valid()) { + continue; //no albedo, no emission, no decal. + } + dd.albedo_rect[0] = 0; + dd.albedo_rect[1] = 0; + dd.albedo_rect[2] = 0; + dd.albedo_rect[3] = 0; + } + + RID normal_tex = storage->decal_get_texture(decal, RS::DECAL_TEXTURE_NORMAL); + + if (normal_tex.is_valid()) { + Rect2 rect = storage->decal_atlas_get_texture_rect(normal_tex); + dd.normal_rect[0] = rect.position.x; + dd.normal_rect[1] = rect.position.y; + dd.normal_rect[2] = rect.size.x; + dd.normal_rect[3] = rect.size.y; + + Basis normal_xform = p_camera_inverse_xform.basis * xform.basis.orthonormalized(); + RendererStorageRD::store_basis_3x4(normal_xform, dd.normal_xform); + } else { + dd.normal_rect[0] = 0; + dd.normal_rect[1] = 0; + dd.normal_rect[2] = 0; + dd.normal_rect[3] = 0; + } + + RID orm_tex = storage->decal_get_texture(decal, RS::DECAL_TEXTURE_ORM); + if (orm_tex.is_valid()) { + Rect2 rect = storage->decal_atlas_get_texture_rect(orm_tex); + dd.orm_rect[0] = rect.position.x; + dd.orm_rect[1] = rect.position.y; + dd.orm_rect[2] = rect.size.x; + dd.orm_rect[3] = rect.size.y; + } else { + dd.orm_rect[0] = 0; + dd.orm_rect[1] = 0; + dd.orm_rect[2] = 0; + dd.orm_rect[3] = 0; + } + + if (emission_tex.is_valid()) { + Rect2 rect = storage->decal_atlas_get_texture_rect(emission_tex); + dd.emission_rect[0] = rect.position.x; + dd.emission_rect[1] = rect.position.y; + dd.emission_rect[2] = rect.size.x; + dd.emission_rect[3] = rect.size.y; + } else { + dd.emission_rect[0] = 0; + dd.emission_rect[1] = 0; + dd.emission_rect[2] = 0; + dd.emission_rect[3] = 0; + } + + Color modulate = storage->decal_get_modulate(decal); + dd.modulate[0] = modulate.r; + dd.modulate[1] = modulate.g; + dd.modulate[2] = modulate.b; + dd.modulate[3] = modulate.a * fade; + dd.emission_energy = storage->decal_get_emission_energy(decal) * fade; + dd.albedo_mix = storage->decal_get_albedo_mix(decal); + dd.mask = storage->decal_get_cull_mask(decal); + dd.upper_fade = storage->decal_get_upper_fade(decal); + dd.lower_fade = storage->decal_get_lower_fade(decal); + + current_cluster_builder->add_box(ClusterBuilderRD::BOX_TYPE_DECAL, xform, decal_extents); + } + + if (cluster.decal_count > 0) { + RD::get_singleton()->buffer_update(cluster.decal_buffer, 0, sizeof(Cluster::DecalData) * cluster.decal_count, cluster.decals, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + } +} + +void RendererSceneRenderRD::_volumetric_fog_erase(RenderBuffers *rb) { + ERR_FAIL_COND(!rb->volumetric_fog); + + RD::get_singleton()->free(rb->volumetric_fog->prev_light_density_map); + RD::get_singleton()->free(rb->volumetric_fog->light_density_map); + RD::get_singleton()->free(rb->volumetric_fog->fog_map); + + if (rb->volumetric_fog->uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(rb->volumetric_fog->uniform_set)) { + RD::get_singleton()->free(rb->volumetric_fog->uniform_set); + } + if (rb->volumetric_fog->uniform_set2.is_valid() && RD::get_singleton()->uniform_set_is_valid(rb->volumetric_fog->uniform_set2)) { + RD::get_singleton()->free(rb->volumetric_fog->uniform_set2); + } + if (rb->volumetric_fog->sdfgi_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(rb->volumetric_fog->sdfgi_uniform_set)) { + RD::get_singleton()->free(rb->volumetric_fog->sdfgi_uniform_set); + } + if (rb->volumetric_fog->sky_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(rb->volumetric_fog->sky_uniform_set)) { + RD::get_singleton()->free(rb->volumetric_fog->sky_uniform_set); + } + + memdelete(rb->volumetric_fog); + + rb->volumetric_fog = nullptr; +} + +void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_environment, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_shadow_atlas, int p_directional_light_count, bool p_use_directional_shadows, int p_positional_light_count, int p_gi_probe_count) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(!rb); + Environment *env = environment_owner.getornull(p_environment); + + float ratio = float(rb->width) / float((rb->width + rb->height) / 2); + uint32_t target_width = uint32_t(float(volumetric_fog_size) * ratio); + uint32_t target_height = uint32_t(float(volumetric_fog_size) / ratio); + + if (rb->volumetric_fog) { + //validate + if (!env || !env->volumetric_fog_enabled || rb->volumetric_fog->width != target_width || rb->volumetric_fog->height != target_height || rb->volumetric_fog->depth != volumetric_fog_depth) { + _volumetric_fog_erase(rb); + _render_buffers_uniform_set_changed(p_render_buffers); + } + } + + if (!env || !env->volumetric_fog_enabled) { + //no reason to enable or update, bye + return; + } + + RENDER_TIMESTAMP(">Volumetric Fog"); + + if (env && env->volumetric_fog_enabled && !rb->volumetric_fog) { + //required volumetric fog but not existing, create + rb->volumetric_fog = memnew(VolumetricFog); + rb->volumetric_fog->width = target_width; + rb->volumetric_fog->height = target_height; + rb->volumetric_fog->depth = volumetric_fog_depth; + + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf.width = target_width; + tf.height = target_height; + tf.depth = volumetric_fog_depth; + tf.texture_type = RD::TEXTURE_TYPE_3D; + tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + + rb->volumetric_fog->light_density_map = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + + rb->volumetric_fog->prev_light_density_map = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->texture_clear(rb->volumetric_fog->prev_light_density_map, Color(0, 0, 0, 0), 0, 1, 0, 1); + + tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; + + rb->volumetric_fog->fog_map = RD::get_singleton()->texture_create(tf, RD::TextureView()); + _render_buffers_uniform_set_changed(p_render_buffers); + + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.binding = 0; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.ids.push_back(rb->volumetric_fog->fog_map); + uniforms.push_back(u); + } + + rb->volumetric_fog->sky_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sky_shader.default_shader_rd, SKY_SET_FOG); + } + + //update volumetric fog + + if (rb->volumetric_fog->uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(rb->volumetric_fog->uniform_set)) { + //re create uniform set if needed + + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 1; + ShadowAtlas *shadow_atlas = shadow_atlas_owner.getornull(p_shadow_atlas); + if (shadow_atlas == nullptr || shadow_atlas->depth.is_null()) { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK)); + } else { + u.ids.push_back(shadow_atlas->depth); + } + + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 2; + if (directional_shadow.depth.is_valid()) { + u.ids.push_back(directional_shadow.depth); + } else { + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK)); + } + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 3; + u.ids.push_back(get_omni_light_buffer()); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 4; + u.ids.push_back(get_spot_light_buffer()); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 5; + u.ids.push_back(get_directional_light_buffer()); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 6; + u.ids.push_back(rb->cluster_builder->get_cluster_buffer()); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 7; + u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 8; + u.ids.push_back(rb->volumetric_fog->light_density_map); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 9; + u.ids.push_back(rb->volumetric_fog->fog_map); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 10; + u.ids.push_back(shadow_sampler); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 11; + u.ids.push_back(render_buffers_get_gi_probe_buffer(p_render_buffers)); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 12; + for (int i = 0; i < RenderBuffers::MAX_GIPROBES; i++) { + u.ids.push_back(rb->giprobe_textures[i]); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 13; + u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 14; + u.ids.push_back(volumetric_fog.params_ubo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 15; + u.ids.push_back(rb->volumetric_fog->prev_light_density_map); + uniforms.push_back(u); + } + + rb->volumetric_fog->uniform_set = RD::get_singleton()->uniform_set_create(uniforms, volumetric_fog.shader.version_get_shader(volumetric_fog.shader_version, 0), 0); + + SWAP(uniforms.write[7].ids.write[0], uniforms.write[8].ids.write[0]); + + rb->volumetric_fog->uniform_set2 = RD::get_singleton()->uniform_set_create(uniforms, volumetric_fog.shader.version_get_shader(volumetric_fog.shader_version, 0), 0); + } + + bool using_sdfgi = env->volumetric_fog_gi_inject > 0.0001 && env->sdfgi_enabled && (rb->sdfgi != nullptr); + + if (using_sdfgi) { + if (rb->volumetric_fog->sdfgi_uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(rb->volumetric_fog->sdfgi_uniform_set)) { + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 0; + u.ids.push_back(gi.sdfgi_ubo); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 1; + u.ids.push_back(rb->sdfgi->ambient_texture); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 2; + u.ids.push_back(rb->sdfgi->occlusion_texture); + uniforms.push_back(u); + } + + rb->volumetric_fog->sdfgi_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, volumetric_fog.shader.version_get_shader(volumetric_fog.shader_version, VOLUMETRIC_FOG_SHADER_DENSITY_WITH_SDFGI), 1); + } + } + + rb->volumetric_fog->length = env->volumetric_fog_length; + rb->volumetric_fog->spread = env->volumetric_fog_detail_spread; + + VolumetricFogShader::ParamsUBO params; + + Vector2 frustum_near_size = p_cam_projection.get_viewport_half_extents(); + Vector2 frustum_far_size = p_cam_projection.get_far_plane_half_extents(); + float z_near = p_cam_projection.get_z_near(); + float z_far = p_cam_projection.get_z_far(); + float fog_end = env->volumetric_fog_length; + + Vector2 fog_far_size = frustum_near_size.lerp(frustum_far_size, (fog_end - z_near) / (z_far - z_near)); + Vector2 fog_near_size; + if (p_cam_projection.is_orthogonal()) { + fog_near_size = fog_far_size; + } else { + fog_near_size = Vector2(); + } + + params.fog_frustum_size_begin[0] = fog_near_size.x; + params.fog_frustum_size_begin[1] = fog_near_size.y; + + params.fog_frustum_size_end[0] = fog_far_size.x; + params.fog_frustum_size_end[1] = fog_far_size.y; + + params.z_near = z_near; + params.z_far = z_far; + + params.fog_frustum_end = fog_end; + + params.fog_volume_size[0] = rb->volumetric_fog->width; + params.fog_volume_size[1] = rb->volumetric_fog->height; + params.fog_volume_size[2] = rb->volumetric_fog->depth; + + params.directional_light_count = p_directional_light_count; + + Color light = env->volumetric_fog_light.to_linear(); + params.light_energy[0] = light.r * env->volumetric_fog_light_energy; + params.light_energy[1] = light.g * env->volumetric_fog_light_energy; + params.light_energy[2] = light.b * env->volumetric_fog_light_energy; + params.base_density = env->volumetric_fog_density; + + params.detail_spread = env->volumetric_fog_detail_spread; + params.gi_inject = env->volumetric_fog_gi_inject; + + params.cam_rotation[0] = p_cam_transform.basis[0][0]; + params.cam_rotation[1] = p_cam_transform.basis[1][0]; + params.cam_rotation[2] = p_cam_transform.basis[2][0]; + params.cam_rotation[3] = 0; + params.cam_rotation[4] = p_cam_transform.basis[0][1]; + params.cam_rotation[5] = p_cam_transform.basis[1][1]; + params.cam_rotation[6] = p_cam_transform.basis[2][1]; + params.cam_rotation[7] = 0; + params.cam_rotation[8] = p_cam_transform.basis[0][2]; + params.cam_rotation[9] = p_cam_transform.basis[1][2]; + params.cam_rotation[10] = p_cam_transform.basis[2][2]; + params.cam_rotation[11] = 0; + params.filter_axis = 0; + params.max_gi_probes = env->volumetric_fog_gi_inject > 0.001 ? p_gi_probe_count : 0; + params.temporal_frame = RSG::rasterizer->get_frame_number() % VolumetricFog::MAX_TEMPORAL_FRAMES; + + Transform to_prev_cam_view = rb->volumetric_fog->prev_cam_transform.affine_inverse() * p_cam_transform; + storage->store_transform(to_prev_cam_view, params.to_prev_view); + + params.use_temporal_reprojection = env->volumetric_fog_temporal_reprojection; + params.temporal_blend = env->volumetric_fog_temporal_reprojection_amount; + + { + uint32_t cluster_size = rb->cluster_builder->get_cluster_size(); + params.cluster_shift = get_shift_from_power_of_2(cluster_size); + + uint32_t cluster_screen_width = (rb->width - 1) / cluster_size + 1; + uint32_t cluster_screen_height = (rb->height - 1) / cluster_size + 1; + params.cluster_type_size = cluster_screen_width * cluster_screen_height * (32 + 32); + params.cluster_width = cluster_screen_width; + params.max_cluster_element_count_div_32 = max_cluster_elements / 32; + + params.screen_size[0] = rb->width; + params.screen_size[1] = rb->height; + } + + /* Vector2 dssize = directional_shadow_get_size(); + push_constant.directional_shadow_pixel_size[0] = 1.0 / dssize.x; + push_constant.directional_shadow_pixel_size[1] = 1.0 / dssize.y; +*/ + + RD::get_singleton()->draw_command_begin_label("Render Volumetric Fog"); + + RENDER_TIMESTAMP("Render Fog"); + RD::get_singleton()->buffer_update(volumetric_fog.params_ubo, 0, sizeof(VolumetricFogShader::ParamsUBO), ¶ms, RD::BARRIER_MASK_COMPUTE); + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + bool use_filter = volumetric_fog_filter_active; + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[using_sdfgi ? VOLUMETRIC_FOG_SHADER_DENSITY_WITH_SDFGI : VOLUMETRIC_FOG_SHADER_DENSITY]); + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set, 0); + + if (using_sdfgi) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->sdfgi_uniform_set, 1); + } + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth); + + RD::get_singleton()->draw_command_end_label(); + + RD::get_singleton()->compute_list_end(); + + RD::get_singleton()->texture_copy(rb->volumetric_fog->light_density_map, rb->volumetric_fog->prev_light_density_map, Vector3(0, 0, 0), Vector3(0, 0, 0), Vector3(rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth), 0, 0, 0, 0); + + compute_list = RD::get_singleton()->compute_list_begin(); + + if (use_filter) { + RD::get_singleton()->draw_command_begin_label("Filter Fog"); + + RENDER_TIMESTAMP("Filter Fog"); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[VOLUMETRIC_FOG_SHADER_FILTER]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set, 0); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth); + + RD::get_singleton()->compute_list_end(); + //need restart for buffer update + + params.filter_axis = 1; + RD::get_singleton()->buffer_update(volumetric_fog.params_ubo, 0, sizeof(VolumetricFogShader::ParamsUBO), ¶ms); + + compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[VOLUMETRIC_FOG_SHADER_FILTER]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set2, 0); + if (using_sdfgi) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->sdfgi_uniform_set, 1); + } + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth); + + RD::get_singleton()->compute_list_add_barrier(compute_list); + RD::get_singleton()->draw_command_end_label(); + } + + RENDER_TIMESTAMP("Integrate Fog"); + RD::get_singleton()->draw_command_begin_label("Integrate Fog"); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[VOLUMETRIC_FOG_SHADER_FOG]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set, 0); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, 1); + + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_RASTER); + + RENDER_TIMESTAMP("<Volumetric Fog"); + RD::get_singleton()->draw_command_end_label(); + + rb->volumetric_fog->prev_cam_transform = p_cam_transform; +} + +uint32_t RendererSceneRenderRD::_get_render_state_directional_light_count() const { + return render_state.directional_light_count; +} + +bool RendererSceneRenderRD::_needs_post_prepass_render(bool p_use_gi) { + if (render_state.render_buffers.is_valid()) { + RenderBuffers *rb = render_buffers_owner.getornull(render_state.render_buffers); + if (rb->sdfgi != nullptr) { + return true; + } + } + return false; +} + +void RendererSceneRenderRD::_post_prepass_render(bool p_use_gi) { + if (render_state.render_buffers.is_valid()) { + if (p_use_gi) { + _sdfgi_update_probes(render_state.render_buffers, render_state.environment); + } + } +} + +void RendererSceneRenderRD::_pre_resolve_render(bool p_use_gi) { + if (render_state.render_buffers.is_valid()) { + if (p_use_gi) { + RD::get_singleton()->compute_list_end(); + } + } +} + +void RendererSceneRenderRD::_pre_opaque_render(bool p_use_ssao, bool p_use_gi, RID p_normal_roughness_buffer, RID p_gi_probe_buffer) { + // Render shadows while GI is rendering, due to how barriers are handled, this should happen at the same time + + if (render_state.render_buffers.is_valid() && p_use_gi) { + _sdfgi_store_probes(render_state.render_buffers); + } + + render_state.cube_shadows.clear(); + render_state.shadows.clear(); + render_state.directional_shadows.clear(); + + Plane camera_plane(render_state.cam_transform.origin, -render_state.cam_transform.basis.get_axis(Vector3::AXIS_Z)); + float lod_distance_multiplier = render_state.cam_projection.get_lod_multiplier(); + + { + for (int i = 0; i < render_state.render_shadow_count; i++) { + LightInstance *li = light_instance_owner.getornull(render_state.render_shadows[i].light); + + if (storage->light_get_type(li->light) == RS::LIGHT_DIRECTIONAL) { + render_state.directional_shadows.push_back(i); + } else if (storage->light_get_type(li->light) == RS::LIGHT_OMNI && storage->light_omni_get_shadow_mode(li->light) == RS::LIGHT_OMNI_SHADOW_CUBE) { + render_state.cube_shadows.push_back(i); + } else { + render_state.shadows.push_back(i); + } + } + + //cube shadows are rendered in their own way + for (uint32_t i = 0; i < render_state.cube_shadows.size(); i++) { + _render_shadow_pass(render_state.render_shadows[render_state.cube_shadows[i]].light, render_state.shadow_atlas, render_state.render_shadows[render_state.cube_shadows[i]].pass, render_state.render_shadows[render_state.cube_shadows[i]].instances, camera_plane, lod_distance_multiplier, render_state.screen_lod_threshold, true, true, true); + } + + if (render_state.directional_shadows.size()) { + //open the pass for directional shadows + _update_directional_shadow_atlas(); + RD::get_singleton()->draw_list_begin(directional_shadow.fb, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_CONTINUE); + RD::get_singleton()->draw_list_end(); + } + } + + // Render GI + + bool render_shadows = render_state.directional_shadows.size() || render_state.shadows.size(); + bool render_gi = render_state.render_buffers.is_valid() && p_use_gi; + + if (render_shadows && render_gi) { + RENDER_TIMESTAMP("Render GI + Render Shadows (parallel)"); + } else if (render_shadows) { + RENDER_TIMESTAMP("Render Shadows"); + } else if (render_gi) { + RENDER_TIMESTAMP("Render GI"); + } + + //prepare shadow rendering + if (render_shadows) { + _render_shadow_begin(); + + //render directional shadows + for (uint32_t i = 0; i < render_state.directional_shadows.size(); i++) { + _render_shadow_pass(render_state.render_shadows[render_state.directional_shadows[i]].light, render_state.shadow_atlas, render_state.render_shadows[render_state.directional_shadows[i]].pass, render_state.render_shadows[render_state.directional_shadows[i]].instances, camera_plane, lod_distance_multiplier, render_state.screen_lod_threshold, false, i == render_state.directional_shadows.size() - 1, false); + } + //render positional shadows + for (uint32_t i = 0; i < render_state.shadows.size(); i++) { + _render_shadow_pass(render_state.render_shadows[render_state.shadows[i]].light, render_state.shadow_atlas, render_state.render_shadows[render_state.shadows[i]].pass, render_state.render_shadows[render_state.shadows[i]].instances, camera_plane, lod_distance_multiplier, render_state.screen_lod_threshold, i == 0, i == render_state.shadows.size() - 1, true); + } + + _render_shadow_process(); + } + + //start GI + if (render_gi) { + _process_gi(render_state.render_buffers, p_normal_roughness_buffer, p_gi_probe_buffer, render_state.environment, render_state.cam_projection, render_state.cam_transform, *render_state.gi_probes); + } + + //Do shadow rendering (in parallel with GI) + if (render_shadows) { + _render_shadow_end(RD::BARRIER_MASK_NO_BARRIER); + } + + if (render_gi) { + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); //use a later barrier + } + + if (render_state.render_buffers.is_valid()) { + if (p_use_ssao) { + _process_ssao(render_state.render_buffers, render_state.environment, p_normal_roughness_buffer, render_state.cam_projection); + } + } + + //full barrier here, we need raster, transfer and compute and it depends from the previous work + RD::get_singleton()->barrier(RD::BARRIER_MASK_ALL, RD::BARRIER_MASK_ALL); + + if (current_cluster_builder) { + current_cluster_builder->begin(render_state.cam_transform, render_state.cam_projection, !render_state.reflection_probe.is_valid()); + } + + bool using_shadows = true; + + if (render_state.reflection_probe.is_valid()) { + if (!storage->reflection_probe_renders_shadows(reflection_probe_instance_get_probe(render_state.reflection_probe))) { + using_shadows = false; + } + } else { + //do not render reflections when rendering a reflection probe + _setup_reflections(*render_state.reflection_probes, render_state.cam_transform.affine_inverse(), render_state.environment); + } + + uint32_t directional_light_count = 0; + uint32_t positional_light_count = 0; + _setup_lights(*render_state.lights, render_state.cam_transform, render_state.shadow_atlas, using_shadows, directional_light_count, positional_light_count); + _setup_decals(*render_state.decals, render_state.cam_transform.affine_inverse()); + + render_state.directional_light_count = directional_light_count; + + if (current_cluster_builder) { + current_cluster_builder->bake_cluster(); + } + + if (render_state.render_buffers.is_valid()) { + bool directional_shadows = false; + for (uint32_t i = 0; i < directional_light_count; i++) { + if (cluster.directional_lights[i].shadow_enabled) { + directional_shadows = true; + break; + } + } + _update_volumetric_fog(render_state.render_buffers, render_state.environment, render_state.cam_projection, render_state.cam_transform, render_state.shadow_atlas, directional_light_count, directional_shadows, positional_light_count, render_state.gi_probe_count); + } +} + +void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data) { + //assign render data + { + render_state.render_buffers = p_render_buffers; + render_state.cam_transform = p_cam_transform; + render_state.cam_projection = p_cam_projection; + render_state.cam_ortogonal = p_cam_projection.is_orthogonal(); + render_state.instances = &p_instances; + render_state.lights = &p_lights; + render_state.reflection_probes = &p_reflection_probes; + render_state.gi_probes = &p_gi_probes; + render_state.decals = &p_decals; + render_state.lightmaps = &p_lightmaps; + render_state.environment = p_environment; + render_state.camera_effects = p_camera_effects; + render_state.shadow_atlas = p_shadow_atlas; + render_state.reflection_atlas = p_reflection_atlas; + render_state.reflection_probe = p_reflection_probe; + render_state.reflection_probe_pass = p_reflection_probe_pass; + render_state.screen_lod_threshold = p_screen_lod_threshold; + + render_state.render_shadows = p_render_shadows; + render_state.render_shadow_count = p_render_shadow_count; + render_state.render_sdfgi_regions = p_render_sdfgi_regions; + render_state.render_sdfgi_region_count = p_render_sdfgi_region_count; + render_state.sdfgi_update_data = p_sdfgi_update_data; + } + + PagedArray<RID> empty; + + if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_UNSHADED) { + render_state.lights = ∅ + render_state.reflection_probes = ∅ + render_state.gi_probes = ∅ + } + + //sdfgi first + if (p_render_buffers.is_valid()) { + for (int i = 0; i < render_state.render_sdfgi_region_count; i++) { + _render_sdfgi_region(p_render_buffers, render_state.render_sdfgi_regions[i].region, render_state.render_sdfgi_regions[i].instances); + } + if (render_state.sdfgi_update_data->update_static) { + _render_sdfgi_static_lights(p_render_buffers, render_state.sdfgi_update_data->static_cascade_count, p_sdfgi_update_data->static_cascade_indices, render_state.sdfgi_update_data->static_positional_lights); + } + } + + Color clear_color; + if (p_render_buffers.is_valid()) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(!rb); + clear_color = storage->render_target_get_clear_request_color(rb->render_target); + } else { + clear_color = storage->get_default_clear_color(); + } + + //assign render indices to giprobes + for (uint32_t i = 0; i < (uint32_t)p_gi_probes.size(); i++) { + GIProbeInstance *giprobe_inst = gi_probe_instance_owner.getornull(p_gi_probes[i]); + if (giprobe_inst) { + giprobe_inst->render_index = i; + } + } + + if (render_buffers_owner.owns(render_state.render_buffers)) { + RenderBuffers *rb = render_buffers_owner.getornull(render_state.render_buffers); + current_cluster_builder = rb->cluster_builder; + } else if (reflection_probe_instance_owner.owns(render_state.reflection_probe)) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(render_state.reflection_probe); + ReflectionAtlas *ra = reflection_atlas_owner.getornull(rpi->atlas); + if (!ra) { + ERR_PRINT("reflection probe has no reflection atlas! Bug?"); + current_cluster_builder = nullptr; + } else { + current_cluster_builder = ra->cluster_builder; + } + } else { + ERR_PRINT("No cluster builder, bug"); //should never happen, will crash + current_cluster_builder = nullptr; + } + + if (p_render_buffers.is_valid()) { + _pre_process_gi(p_render_buffers, p_cam_transform); + } + + render_state.gi_probe_count = 0; + if (render_state.render_buffers.is_valid()) { + _setup_giprobes(render_state.render_buffers, render_state.cam_transform, *render_state.gi_probes, render_state.gi_probe_count); + _sdfgi_update_light(render_state.render_buffers, render_state.environment); + } + + render_state.depth_prepass_used = false; + //calls _pre_opaque_render between depth pre-pass and opaque pass + _render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_ortogonal, p_instances, *render_state.gi_probes, p_lightmaps, p_environment, current_cluster_builder->get_cluster_buffer(), current_cluster_builder->get_cluster_size(), current_cluster_builder->get_max_cluster_elements(), p_camera_effects, p_shadow_atlas, p_reflection_atlas, p_reflection_probe, p_reflection_probe_pass, clear_color, p_screen_lod_threshold); + + if (p_render_buffers.is_valid()) { + if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_OMNI_LIGHTS || debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_SPOT_LIGHTS || debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_DECALS || debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_REFLECTION_PROBES) { + ClusterBuilderRD::ElementType elem_type = ClusterBuilderRD::ELEMENT_TYPE_MAX; + switch (debug_draw) { + case RS::VIEWPORT_DEBUG_DRAW_CLUSTER_OMNI_LIGHTS: + elem_type = ClusterBuilderRD::ELEMENT_TYPE_OMNI_LIGHT; + break; + case RS::VIEWPORT_DEBUG_DRAW_CLUSTER_SPOT_LIGHTS: + elem_type = ClusterBuilderRD::ELEMENT_TYPE_SPOT_LIGHT; + break; + case RS::VIEWPORT_DEBUG_DRAW_CLUSTER_DECALS: + elem_type = ClusterBuilderRD::ELEMENT_TYPE_DECAL; + break; + case RS::VIEWPORT_DEBUG_DRAW_CLUSTER_REFLECTION_PROBES: + elem_type = ClusterBuilderRD::ELEMENT_TYPE_REFLECTION_PROBE; + break; + default: { + } + } + current_cluster_builder->debug(elem_type); + } + + RENDER_TIMESTAMP("Tonemap"); + + _render_buffers_post_process_and_tonemap(p_render_buffers, p_environment, p_camera_effects, p_cam_projection); + _render_buffers_debug_draw(p_render_buffers, p_shadow_atlas); + if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_SDFGI) { + _sdfgi_debug_draw(p_render_buffers, p_cam_projection, p_cam_transform); + } + } +} + +void RendererSceneRenderRD::_render_shadow_pass(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<GeometryInstance *> &p_instances, const Plane &p_camera_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold, bool p_open_pass, bool p_close_pass, bool p_clear_region) { + LightInstance *light_instance = light_instance_owner.getornull(p_light); + ERR_FAIL_COND(!light_instance); + + Rect2i atlas_rect; + uint32_t atlas_size; + RID atlas_fb; + + bool using_dual_paraboloid = false; + bool using_dual_paraboloid_flip = false; + RID render_fb; + RID render_texture; + float zfar; + + bool use_pancake = false; + bool render_cubemap = false; + bool finalize_cubemap = false; + + bool flip_y = false; + + CameraMatrix light_projection; + Transform light_transform; + + if (storage->light_get_type(light_instance->light) == RS::LIGHT_DIRECTIONAL) { + //set pssm stuff + if (light_instance->last_scene_shadow_pass != scene_pass) { + light_instance->directional_rect = _get_directional_shadow_rect(directional_shadow.size, directional_shadow.light_count, directional_shadow.current_light); + directional_shadow.current_light++; + light_instance->last_scene_shadow_pass = scene_pass; + } + + use_pancake = storage->light_get_param(light_instance->light, RS::LIGHT_PARAM_SHADOW_PANCAKE_SIZE) > 0; + light_projection = light_instance->shadow_transform[p_pass].camera; + light_transform = light_instance->shadow_transform[p_pass].transform; + + atlas_rect.position.x = light_instance->directional_rect.position.x; + atlas_rect.position.y = light_instance->directional_rect.position.y; + atlas_rect.size.width = light_instance->directional_rect.size.x; + atlas_rect.size.height = light_instance->directional_rect.size.y; + + if (storage->light_directional_get_shadow_mode(light_instance->light) == RS::LIGHT_DIRECTIONAL_SHADOW_PARALLEL_4_SPLITS) { + atlas_rect.size.width /= 2; + atlas_rect.size.height /= 2; + + if (p_pass == 1) { + atlas_rect.position.x += atlas_rect.size.width; + } else if (p_pass == 2) { + atlas_rect.position.y += atlas_rect.size.height; + } else if (p_pass == 3) { + atlas_rect.position.x += atlas_rect.size.width; + atlas_rect.position.y += atlas_rect.size.height; + } + } else if (storage->light_directional_get_shadow_mode(light_instance->light) == RS::LIGHT_DIRECTIONAL_SHADOW_PARALLEL_2_SPLITS) { + atlas_rect.size.height /= 2; + + if (p_pass == 0) { + } else { + atlas_rect.position.y += atlas_rect.size.height; + } + } + + light_instance->shadow_transform[p_pass].atlas_rect = atlas_rect; + + light_instance->shadow_transform[p_pass].atlas_rect.position /= directional_shadow.size; + light_instance->shadow_transform[p_pass].atlas_rect.size /= directional_shadow.size; + + zfar = storage->light_get_param(light_instance->light, RS::LIGHT_PARAM_RANGE); + + render_fb = directional_shadow.fb; + render_texture = RID(); + flip_y = true; + + } else { + //set from shadow atlas + + ShadowAtlas *shadow_atlas = shadow_atlas_owner.getornull(p_shadow_atlas); + ERR_FAIL_COND(!shadow_atlas); + ERR_FAIL_COND(!shadow_atlas->shadow_owners.has(p_light)); + + _update_shadow_atlas(shadow_atlas); + + uint32_t key = shadow_atlas->shadow_owners[p_light]; + + uint32_t quadrant = (key >> ShadowAtlas::QUADRANT_SHIFT) & 0x3; + uint32_t shadow = key & ShadowAtlas::SHADOW_INDEX_MASK; + + ERR_FAIL_INDEX((int)shadow, shadow_atlas->quadrants[quadrant].shadows.size()); + + uint32_t quadrant_size = shadow_atlas->size >> 1; + + atlas_rect.position.x = (quadrant & 1) * quadrant_size; + atlas_rect.position.y = (quadrant >> 1) * quadrant_size; + + uint32_t shadow_size = (quadrant_size / shadow_atlas->quadrants[quadrant].subdivision); + atlas_rect.position.x += (shadow % shadow_atlas->quadrants[quadrant].subdivision) * shadow_size; + atlas_rect.position.y += (shadow / shadow_atlas->quadrants[quadrant].subdivision) * shadow_size; + + atlas_rect.size.width = shadow_size; + atlas_rect.size.height = shadow_size; + + zfar = storage->light_get_param(light_instance->light, RS::LIGHT_PARAM_RANGE); + + if (storage->light_get_type(light_instance->light) == RS::LIGHT_OMNI) { + if (storage->light_omni_get_shadow_mode(light_instance->light) == RS::LIGHT_OMNI_SHADOW_CUBE) { + ShadowCubemap *cubemap = _get_shadow_cubemap(shadow_size / 2); + + render_fb = cubemap->side_fb[p_pass]; + render_texture = cubemap->cubemap; + + light_projection = light_instance->shadow_transform[p_pass].camera; + light_transform = light_instance->shadow_transform[p_pass].transform; + render_cubemap = true; + finalize_cubemap = p_pass == 5; + atlas_fb = shadow_atlas->fb; + + atlas_size = shadow_atlas->size; + + if (p_pass == 0) { + _render_shadow_begin(); + } + + } else { + light_projection = light_instance->shadow_transform[0].camera; + light_transform = light_instance->shadow_transform[0].transform; + + atlas_rect.size.height /= 2; + atlas_rect.position.y += p_pass * atlas_rect.size.height; + + using_dual_paraboloid = true; + using_dual_paraboloid_flip = p_pass == 1; + render_fb = shadow_atlas->fb; + flip_y = true; + } + + } else if (storage->light_get_type(light_instance->light) == RS::LIGHT_SPOT) { + light_projection = light_instance->shadow_transform[0].camera; + light_transform = light_instance->shadow_transform[0].transform; + + render_fb = shadow_atlas->fb; + + flip_y = true; + } + } + + if (render_cubemap) { + //rendering to cubemap + _render_shadow_append(render_fb, p_instances, light_projection, light_transform, zfar, 0, 0, false, false, use_pancake, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold, Rect2(), false, true, true, true); + if (finalize_cubemap) { + _render_shadow_process(); + _render_shadow_end(); + //reblit + Rect2 atlas_rect_norm = atlas_rect; + atlas_rect_norm.position.x /= float(atlas_size); + atlas_rect_norm.position.y /= float(atlas_size); + atlas_rect_norm.size.x /= float(atlas_size); + atlas_rect_norm.size.y /= float(atlas_size); + atlas_rect_norm.size.height /= 2; + storage->get_effects()->copy_cubemap_to_dp(render_texture, atlas_fb, atlas_rect_norm, light_projection.get_z_near(), light_projection.get_z_far(), false); + atlas_rect_norm.position.y += atlas_rect_norm.size.height; + storage->get_effects()->copy_cubemap_to_dp(render_texture, atlas_fb, atlas_rect_norm, light_projection.get_z_near(), light_projection.get_z_far(), true); + + //restore transform so it can be properly used + light_instance_set_shadow_transform(p_light, CameraMatrix(), light_instance->transform, zfar, 0, 0, 0); + } + + } else { + //render shadow + _render_shadow_append(render_fb, p_instances, light_projection, light_transform, zfar, 0, 0, using_dual_paraboloid, using_dual_paraboloid_flip, use_pancake, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold, atlas_rect, flip_y, p_clear_region, p_open_pass, p_close_pass); + } +} + +void RendererSceneRenderRD::render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) { + _render_material(p_cam_transform, p_cam_projection, p_cam_ortogonal, p_instances, p_framebuffer, p_region); +} + +void RendererSceneRenderRD::_render_sdfgi_region(RID p_render_buffers, int p_region, const PagedArray<GeometryInstance *> &p_instances) { + //print_line("rendering region " + itos(p_region)); + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(!rb); + ERR_FAIL_COND(!rb->sdfgi); + AABB bounds; + Vector3i from; + Vector3i size; + + int cascade_prev = _sdfgi_get_pending_region_data(p_render_buffers, p_region - 1, from, size, bounds); + int cascade_next = _sdfgi_get_pending_region_data(p_render_buffers, p_region + 1, from, size, bounds); + int cascade = _sdfgi_get_pending_region_data(p_render_buffers, p_region, from, size, bounds); + ERR_FAIL_COND(cascade < 0); + + if (cascade_prev != cascade) { + //initialize render + RD::get_singleton()->texture_clear(rb->sdfgi->render_albedo, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(rb->sdfgi->render_emission, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(rb->sdfgi->render_emission_aniso, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(rb->sdfgi->render_geom_facing, Color(0, 0, 0, 0), 0, 1, 0, 1); + } + + //print_line("rendering cascade " + itos(p_region) + " objects: " + itos(p_cull_count) + " bounds: " + bounds + " from: " + from + " size: " + size + " cell size: " + rtos(rb->sdfgi->cascades[cascade].cell_size)); + _render_sdfgi(p_render_buffers, from, size, bounds, p_instances, rb->sdfgi->render_albedo, rb->sdfgi->render_emission, rb->sdfgi->render_emission_aniso, rb->sdfgi->render_geom_facing); + + if (cascade_next != cascade) { + RD::get_singleton()->draw_command_begin_label("SDFGI Pre-Process Cascade"); + + RENDER_TIMESTAMP(">SDFGI Update SDF"); + //done rendering! must update SDF + //clear dispatch indirect data + + SDGIShader::PreprocessPushConstant push_constant; + zeromem(&push_constant, sizeof(SDGIShader::PreprocessPushConstant)); + + RENDER_TIMESTAMP("Scroll SDF"); + + //scroll + if (rb->sdfgi->cascades[cascade].dirty_regions != SDFGI::Cascade::DIRTY_ALL) { + //for scroll + Vector3i dirty = rb->sdfgi->cascades[cascade].dirty_regions; + push_constant.scroll[0] = dirty.x; + push_constant.scroll[1] = dirty.y; + push_constant.scroll[2] = dirty.z; + } else { + //for no scroll + push_constant.scroll[0] = 0; + push_constant.scroll[1] = 0; + push_constant.scroll[2] = 0; + } + + rb->sdfgi->cascades[cascade].all_dynamic_lights_dirty = true; + + push_constant.grid_size = rb->sdfgi->cascade_size; + push_constant.cascade = cascade; + + if (rb->sdfgi->cascades[cascade].dirty_regions != SDFGI::Cascade::DIRTY_ALL) { + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + //must pre scroll existing data because not all is dirty + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_SCROLL]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].scroll_uniform_set, 0); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_indirect(compute_list, rb->sdfgi->cascades[cascade].solid_cell_dispatch_buffer, 0); + // no barrier do all together + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_SCROLL_OCCLUSION]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].scroll_occlusion_uniform_set, 0); + + Vector3i dirty = rb->sdfgi->cascades[cascade].dirty_regions; + Vector3i groups; + groups.x = rb->sdfgi->cascade_size - ABS(dirty.x); + groups.y = rb->sdfgi->cascade_size - ABS(dirty.y); + groups.z = rb->sdfgi->cascade_size - ABS(dirty.z); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, groups.x, groups.y, groups.z); + + //no barrier, continue together + + { + //scroll probes and their history also + + SDGIShader::IntegratePushConstant ipush_constant; + ipush_constant.grid_size[1] = rb->sdfgi->cascade_size; + ipush_constant.grid_size[2] = rb->sdfgi->cascade_size; + ipush_constant.grid_size[0] = rb->sdfgi->cascade_size; + ipush_constant.max_cascades = rb->sdfgi->cascades.size(); + ipush_constant.probe_axis_size = rb->sdfgi->probe_axis_count; + ipush_constant.history_index = 0; + ipush_constant.history_size = rb->sdfgi->history_size; + ipush_constant.ray_count = 0; + ipush_constant.ray_bias = 0; + ipush_constant.sky_mode = 0; + ipush_constant.sky_energy = 0; + ipush_constant.sky_color[0] = 0; + ipush_constant.sky_color[1] = 0; + ipush_constant.sky_color[2] = 0; + ipush_constant.y_mult = rb->sdfgi->y_mult; + ipush_constant.store_ambient_texture = false; + + ipush_constant.image_size[0] = rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count; + ipush_constant.image_size[1] = rb->sdfgi->probe_axis_count; + + int32_t probe_divisor = rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR; + ipush_constant.cascade = cascade; + ipush_constant.world_offset[0] = rb->sdfgi->cascades[cascade].position.x / probe_divisor; + ipush_constant.world_offset[1] = rb->sdfgi->cascades[cascade].position.y / probe_divisor; + ipush_constant.world_offset[2] = rb->sdfgi->cascades[cascade].position.z / probe_divisor; + + ipush_constant.scroll[0] = dirty.x / probe_divisor; + ipush_constant.scroll[1] = dirty.y / probe_divisor; + ipush_constant.scroll[2] = dirty.z / probe_divisor; + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.integrate_pipeline[SDGIShader::INTEGRATE_MODE_SCROLL]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].integrate_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdfgi_shader.integrate_default_sky_uniform_set, 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDGIShader::IntegratePushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1); + + RD::get_singleton()->compute_list_add_barrier(compute_list); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.integrate_pipeline[SDGIShader::INTEGRATE_MODE_SCROLL_STORE]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].integrate_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdfgi_shader.integrate_default_sky_uniform_set, 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDGIShader::IntegratePushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1); + + RD::get_singleton()->compute_list_add_barrier(compute_list); + + if (rb->sdfgi->bounce_feedback > 0.0) { + //multibounce requires this to be stored so direct light can read from it + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.integrate_pipeline[SDGIShader::INTEGRATE_MODE_STORE]); + + //convert to octahedral to store + ipush_constant.image_size[0] *= SDFGI::LIGHTPROBE_OCT_SIZE; + ipush_constant.image_size[1] *= SDFGI::LIGHTPROBE_OCT_SIZE; + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].integrate_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdfgi_shader.integrate_default_sky_uniform_set, 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDGIShader::IntegratePushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1); + } + } + + //ok finally barrier + RD::get_singleton()->compute_list_end(); + } + + //clear dispatch indirect data + uint32_t dispatch_indirct_data[4] = { 0, 0, 0, 0 }; + RD::get_singleton()->buffer_update(rb->sdfgi->cascades[cascade].solid_cell_dispatch_buffer, 0, sizeof(uint32_t) * 4, dispatch_indirct_data); + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + bool half_size = true; //much faster, very little difference + static const int optimized_jf_group_size = 8; + + if (half_size) { + push_constant.grid_size >>= 1; + + uint32_t cascade_half_size = rb->sdfgi->cascade_size >> 1; + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE_HALF]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->sdf_initialize_half_uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size); + RD::get_singleton()->compute_list_add_barrier(compute_list); + + //must start with regular jumpflood + + push_constant.half_size = true; + { + RENDER_TIMESTAMP("SDFGI Jump Flood (Half Size)"); + + uint32_t s = cascade_half_size; + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD]); + + int jf_us = 0; + //start with regular jump flood for very coarse reads, as this is impossible to optimize + while (s > 1) { + s /= 2; + push_constant.step_size = s; + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_half_uniform_set[jf_us], 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size); + RD::get_singleton()->compute_list_add_barrier(compute_list); + jf_us = jf_us == 0 ? 1 : 0; + + if (cascade_half_size / (s / 2) >= optimized_jf_group_size) { + break; + } + } + + RENDER_TIMESTAMP("SDFGI Jump Flood Optimized (Half Size)"); + + //continue with optimized jump flood for smaller reads + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_OPTIMIZED]); + while (s > 1) { + s /= 2; + push_constant.step_size = s; + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_half_uniform_set[jf_us], 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size); + RD::get_singleton()->compute_list_add_barrier(compute_list); + jf_us = jf_us == 0 ? 1 : 0; + } + } + + // restore grid size for last passes + push_constant.grid_size = rb->sdfgi->cascade_size; + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_UPSCALE]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->sdf_upscale_uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); + RD::get_singleton()->compute_list_add_barrier(compute_list); + + //run one pass of fullsize jumpflood to fix up half size arctifacts + + push_constant.half_size = false; + push_constant.step_size = 1; + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_OPTIMIZED]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_uniform_set[rb->sdfgi->upscale_jfa_uniform_set_index], 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); + RD::get_singleton()->compute_list_add_barrier(compute_list); + + } else { + //full size jumpflood + RENDER_TIMESTAMP("SDFGI Jump Flood"); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->sdf_initialize_uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); + + RD::get_singleton()->compute_list_add_barrier(compute_list); + + push_constant.half_size = false; + { + uint32_t s = rb->sdfgi->cascade_size; + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD]); + + int jf_us = 0; + //start with regular jump flood for very coarse reads, as this is impossible to optimize + while (s > 1) { + s /= 2; + push_constant.step_size = s; + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_uniform_set[jf_us], 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); + RD::get_singleton()->compute_list_add_barrier(compute_list); + jf_us = jf_us == 0 ? 1 : 0; + + if (rb->sdfgi->cascade_size / (s / 2) >= optimized_jf_group_size) { + break; + } + } + + RENDER_TIMESTAMP("SDFGI Jump Flood Optimized"); + + //continue with optimized jump flood for smaller reads + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_OPTIMIZED]); + while (s > 1) { + s /= 2; + push_constant.step_size = s; + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_uniform_set[jf_us], 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); + RD::get_singleton()->compute_list_add_barrier(compute_list); + jf_us = jf_us == 0 ? 1 : 0; + } + } + } + + RENDER_TIMESTAMP("SDFGI Occlusion"); + + // occlusion + { + uint32_t probe_size = rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR; + Vector3i probe_global_pos = rb->sdfgi->cascades[cascade].position / probe_size; + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_OCCLUSION]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->occlusion_uniform_set, 0); + for (int i = 0; i < 8; i++) { + //dispatch all at once for performance + Vector3i offset(i & 1, (i >> 1) & 1, (i >> 2) & 1); + + if ((probe_global_pos.x & 1) != 0) { + offset.x = (offset.x + 1) & 1; + } + if ((probe_global_pos.y & 1) != 0) { + offset.y = (offset.y + 1) & 1; + } + if ((probe_global_pos.z & 1) != 0) { + offset.z = (offset.z + 1) & 1; + } + push_constant.probe_offset[0] = offset.x; + push_constant.probe_offset[1] = offset.y; + push_constant.probe_offset[2] = offset.z; + push_constant.occlusion_index = i; + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); + + Vector3i groups = Vector3i(probe_size + 1, probe_size + 1, probe_size + 1) - offset; //if offset, it's one less probe per axis to compute + RD::get_singleton()->compute_list_dispatch(compute_list, groups.x, groups.y, groups.z); + } + RD::get_singleton()->compute_list_add_barrier(compute_list); + } + + RENDER_TIMESTAMP("SDFGI Store"); + + // store + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_STORE]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].sdf_store_uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); + + RD::get_singleton()->compute_list_end(); + + //clear these textures, as they will have previous garbage on next draw + RD::get_singleton()->texture_clear(rb->sdfgi->cascades[cascade].light_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(rb->sdfgi->cascades[cascade].light_aniso_0_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(rb->sdfgi->cascades[cascade].light_aniso_1_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); + +#if 0 + Vector<uint8_t> data = RD::get_singleton()->texture_get_data(rb->sdfgi->cascades[cascade].sdf, 0); + Ref<Image> img; + img.instance(); + for (uint32_t i = 0; i < rb->sdfgi->cascade_size; i++) { + Vector<uint8_t> subarr = data.subarray(128 * 128 * i, 128 * 128 * (i + 1) - 1); + img->create(rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, false, Image::FORMAT_L8, subarr); + img->save_png("res://cascade_sdf_" + itos(cascade) + "_" + itos(i) + ".png"); + } + + //finalize render and update sdf +#endif + +#if 0 + Vector<uint8_t> data = RD::get_singleton()->texture_get_data(rb->sdfgi->render_albedo, 0); + Ref<Image> img; + img.instance(); + for (uint32_t i = 0; i < rb->sdfgi->cascade_size; i++) { + Vector<uint8_t> subarr = data.subarray(128 * 128 * i * 2, 128 * 128 * (i + 1) * 2 - 1); + img->create(rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, false, Image::FORMAT_RGB565, subarr); + img->convert(Image::FORMAT_RGBA8); + img->save_png("res://cascade_" + itos(cascade) + "_" + itos(i) + ".png"); + } + + //finalize render and update sdf +#endif + + RENDER_TIMESTAMP("<SDFGI Update SDF"); + RD::get_singleton()->draw_command_end_label(); + } +} + +void RendererSceneRenderRD::render_particle_collider_heightfield(RID p_collider, const Transform &p_transform, const PagedArray<GeometryInstance *> &p_instances) { + ERR_FAIL_COND(!storage->particles_collision_is_heightfield(p_collider)); + Vector3 extents = storage->particles_collision_get_extents(p_collider) * p_transform.basis.get_scale(); + CameraMatrix cm; + cm.set_orthogonal(-extents.x, extents.x, -extents.z, extents.z, 0, extents.y * 2.0); + + Vector3 cam_pos = p_transform.origin; + cam_pos.y += extents.y; + + Transform cam_xform; + cam_xform.set_look_at(cam_pos, cam_pos - p_transform.basis.get_axis(Vector3::AXIS_Y), -p_transform.basis.get_axis(Vector3::AXIS_Z).normalized()); + + RID fb = storage->particles_collision_get_heightfield_framebuffer(p_collider); + + _render_particle_collider_heightfield(fb, cam_xform, cm, p_instances); +} + +void RendererSceneRenderRD::_render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_light_cull_result) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(!rb); + ERR_FAIL_COND(!rb->sdfgi); + + RD::get_singleton()->draw_command_begin_label("SDFGI Render Static Lighs"); + + _sdfgi_update_cascades(p_render_buffers); //need cascades updated for this + + SDGIShader::Light lights[SDFGI::MAX_STATIC_LIGHTS]; + uint32_t light_count[SDFGI::MAX_STATIC_LIGHTS]; + + for (uint32_t i = 0; i < p_cascade_count; i++) { + ERR_CONTINUE(p_cascade_indices[i] >= rb->sdfgi->cascades.size()); + + SDFGI::Cascade &cc = rb->sdfgi->cascades[p_cascade_indices[i]]; + + { //fill light buffer + + AABB cascade_aabb; + cascade_aabb.position = Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + cc.position)) * cc.cell_size; + cascade_aabb.size = Vector3(1, 1, 1) * rb->sdfgi->cascade_size * cc.cell_size; + + int idx = 0; + + for (uint32_t j = 0; j < (uint32_t)p_positional_light_cull_result[i].size(); j++) { + if (idx == SDFGI::MAX_STATIC_LIGHTS) { + break; + } + + LightInstance *li = light_instance_owner.getornull(p_positional_light_cull_result[i][j]); + ERR_CONTINUE(!li); + + uint32_t max_sdfgi_cascade = storage->light_get_max_sdfgi_cascade(li->light); + if (p_cascade_indices[i] > max_sdfgi_cascade) { + continue; + } + + if (!cascade_aabb.intersects(li->aabb)) { + continue; + } + + lights[idx].type = storage->light_get_type(li->light); + + Vector3 dir = -li->transform.basis.get_axis(Vector3::AXIS_Z); + if (lights[idx].type == RS::LIGHT_DIRECTIONAL) { + dir.y *= rb->sdfgi->y_mult; //only makes sense for directional + dir.normalize(); + } + lights[idx].direction[0] = dir.x; + lights[idx].direction[1] = dir.y; + lights[idx].direction[2] = dir.z; + Vector3 pos = li->transform.origin; + pos.y *= rb->sdfgi->y_mult; + lights[idx].position[0] = pos.x; + lights[idx].position[1] = pos.y; + lights[idx].position[2] = pos.z; + Color color = storage->light_get_color(li->light); + color = color.to_linear(); + lights[idx].color[0] = color.r; + lights[idx].color[1] = color.g; + lights[idx].color[2] = color.b; + lights[idx].energy = storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY); + lights[idx].has_shadow = storage->light_has_shadow(li->light); + lights[idx].attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_ATTENUATION); + lights[idx].radius = storage->light_get_param(li->light, RS::LIGHT_PARAM_RANGE); + lights[idx].cos_spot_angle = Math::cos(Math::deg2rad(storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ANGLE))); + lights[idx].inv_spot_attenuation = 1.0f / storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ATTENUATION); + + idx++; + } + + if (idx > 0) { + RD::get_singleton()->buffer_update(cc.lights_buffer, 0, idx * sizeof(SDGIShader::Light), lights); + } + + light_count[i] = idx; + } + } + + /* Static Lights */ + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.direct_light_pipeline[SDGIShader::DIRECT_LIGHT_MODE_STATIC]); + + SDGIShader::DirectLightPushConstant dl_push_constant; + + dl_push_constant.grid_size[0] = rb->sdfgi->cascade_size; + dl_push_constant.grid_size[1] = rb->sdfgi->cascade_size; + dl_push_constant.grid_size[2] = rb->sdfgi->cascade_size; + dl_push_constant.max_cascades = rb->sdfgi->cascades.size(); + dl_push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; + dl_push_constant.bounce_feedback = 0.0; // this is static light, do not multibounce yet + dl_push_constant.y_mult = rb->sdfgi->y_mult; + dl_push_constant.use_occlusion = rb->sdfgi->uses_occlusion; + + //all must be processed + dl_push_constant.process_offset = 0; + dl_push_constant.process_increment = 1; + + for (uint32_t i = 0; i < p_cascade_count; i++) { + ERR_CONTINUE(p_cascade_indices[i] >= rb->sdfgi->cascades.size()); + + SDFGI::Cascade &cc = rb->sdfgi->cascades[p_cascade_indices[i]]; + + dl_push_constant.light_count = light_count[i]; + dl_push_constant.cascade = p_cascade_indices[i]; + + if (dl_push_constant.light_count > 0) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cc.sdf_direct_light_uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &dl_push_constant, sizeof(SDGIShader::DirectLightPushConstant)); + RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cc.solid_cell_dispatch_buffer, 0); + } + } + + RD::get_singleton()->compute_list_end(); + + RD::get_singleton()->draw_command_end_label(); +} + +bool RendererSceneRenderRD::free(RID p_rid) { + if (render_buffers_owner.owns(p_rid)) { + RenderBuffers *rb = render_buffers_owner.getornull(p_rid); + _free_render_buffer_data(rb); + memdelete(rb->data); + if (rb->sdfgi) { + _sdfgi_erase(rb); + } + if (rb->volumetric_fog) { + _volumetric_fog_erase(rb); + } + if (rb->cluster_builder) { + memdelete(rb->cluster_builder); + } + render_buffers_owner.free(p_rid); + } else if (environment_owner.owns(p_rid)) { + //not much to delete, just free it + environment_owner.free(p_rid); + } else if (camera_effects_owner.owns(p_rid)) { + //not much to delete, just free it + camera_effects_owner.free(p_rid); + } else if (reflection_atlas_owner.owns(p_rid)) { + reflection_atlas_set_size(p_rid, 0, 0); + ReflectionAtlas *ra = reflection_atlas_owner.getornull(p_rid); + if (ra->cluster_builder) { + memdelete(ra->cluster_builder); + } + reflection_atlas_owner.free(p_rid); + } else if (reflection_probe_instance_owner.owns(p_rid)) { + //not much to delete, just free it + //ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_rid); + reflection_probe_release_atlas_index(p_rid); + reflection_probe_instance_owner.free(p_rid); + } else if (decal_instance_owner.owns(p_rid)) { + decal_instance_owner.free(p_rid); + } else if (lightmap_instance_owner.owns(p_rid)) { + lightmap_instance_owner.free(p_rid); + } else if (gi_probe_instance_owner.owns(p_rid)) { + GIProbeInstance *gi_probe = gi_probe_instance_owner.getornull(p_rid); + if (gi_probe->texture.is_valid()) { + RD::get_singleton()->free(gi_probe->texture); + RD::get_singleton()->free(gi_probe->write_buffer); + } + + for (int i = 0; i < gi_probe->dynamic_maps.size(); i++) { + RD::get_singleton()->free(gi_probe->dynamic_maps[i].texture); + RD::get_singleton()->free(gi_probe->dynamic_maps[i].depth); + } + + gi_probe_instance_owner.free(p_rid); + } else if (sky_owner.owns(p_rid)) { + _update_dirty_skys(); + Sky *sky = sky_owner.getornull(p_rid); + + if (sky->radiance.is_valid()) { + RD::get_singleton()->free(sky->radiance); + sky->radiance = RID(); + } + _clear_reflection_data(sky->reflection); + + if (sky->uniform_buffer.is_valid()) { + RD::get_singleton()->free(sky->uniform_buffer); + sky->uniform_buffer = RID(); + } + + if (sky->half_res_pass.is_valid()) { + RD::get_singleton()->free(sky->half_res_pass); + sky->half_res_pass = RID(); + } + + if (sky->quarter_res_pass.is_valid()) { + RD::get_singleton()->free(sky->quarter_res_pass); + sky->quarter_res_pass = RID(); + } + + if (sky->material.is_valid()) { + storage->free(sky->material); + } + + sky_owner.free(p_rid); + } else if (light_instance_owner.owns(p_rid)) { + LightInstance *light_instance = light_instance_owner.getornull(p_rid); + + //remove from shadow atlases.. + for (Set<RID>::Element *E = light_instance->shadow_atlases.front(); E; E = E->next()) { + ShadowAtlas *shadow_atlas = shadow_atlas_owner.getornull(E->get()); + ERR_CONTINUE(!shadow_atlas->shadow_owners.has(p_rid)); + uint32_t key = shadow_atlas->shadow_owners[p_rid]; + uint32_t q = (key >> ShadowAtlas::QUADRANT_SHIFT) & 0x3; + uint32_t s = key & ShadowAtlas::SHADOW_INDEX_MASK; + + shadow_atlas->quadrants[q].shadows.write[s].owner = RID(); + shadow_atlas->shadow_owners.erase(p_rid); + } + + light_instance_owner.free(p_rid); + + } else if (shadow_atlas_owner.owns(p_rid)) { + shadow_atlas_set_size(p_rid, 0); + shadow_atlas_owner.free(p_rid); + + } else { + return false; + } + + return true; +} + +void RendererSceneRenderRD::set_debug_draw_mode(RS::ViewportDebugDraw p_debug_draw) { + debug_draw = p_debug_draw; +} + +void RendererSceneRenderRD::update() { + _update_dirty_skys(); +} + +void RendererSceneRenderRD::set_time(double p_time, double p_step) { + time = p_time; + time_step = p_step; +} + +void RendererSceneRenderRD::screen_space_roughness_limiter_set_active(bool p_enable, float p_amount, float p_limit) { + screen_space_roughness_limiter = p_enable; + screen_space_roughness_limiter_amount = p_amount; + screen_space_roughness_limiter_limit = p_limit; +} + +bool RendererSceneRenderRD::screen_space_roughness_limiter_is_active() const { + return screen_space_roughness_limiter; +} + +float RendererSceneRenderRD::screen_space_roughness_limiter_get_amount() const { + return screen_space_roughness_limiter_amount; +} + +float RendererSceneRenderRD::screen_space_roughness_limiter_get_limit() const { + return screen_space_roughness_limiter_limit; +} + +TypedArray<Image> RendererSceneRenderRD::bake_render_uv2(RID p_base, const Vector<RID> &p_material_overrides, const Size2i &p_image_size) { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + tf.width = p_image_size.width; // Always 64x64 + tf.height = p_image_size.height; + tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + + RID albedo_alpha_tex = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RID normal_tex = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RID orm_tex = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + RID emission_tex = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + tf.format = RD::DATA_FORMAT_R32_SFLOAT; + RID depth_write_tex = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + tf.usage_bits = RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + tf.format = RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_D32_SFLOAT, RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ? RD::DATA_FORMAT_D32_SFLOAT : RD::DATA_FORMAT_X8_D24_UNORM_PACK32; + RID depth_tex = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + Vector<RID> fb_tex; + fb_tex.push_back(albedo_alpha_tex); + fb_tex.push_back(normal_tex); + fb_tex.push_back(orm_tex); + fb_tex.push_back(emission_tex); + fb_tex.push_back(depth_write_tex); + fb_tex.push_back(depth_tex); + + RID fb = RD::get_singleton()->framebuffer_create(fb_tex); + + //RID sampled_light; + + GeometryInstance *gi = geometry_instance_create(p_base); + + uint32_t sc = RSG::storage->mesh_get_surface_count(p_base); + Vector<RID> materials; + materials.resize(sc); + + for (uint32_t i = 0; i < sc; i++) { + if (i < (uint32_t)p_material_overrides.size()) { + materials.write[i] = p_material_overrides[i]; + } + } + + geometry_instance_set_surface_materials(gi, materials); + + if (cull_argument.size() == 0) { + cull_argument.push_back(nullptr); + } + cull_argument[0] = gi; + _render_uv2(cull_argument, fb, Rect2i(0, 0, p_image_size.width, p_image_size.height)); + + geometry_instance_free(gi); + + TypedArray<Image> ret; + + { + PackedByteArray data = RD::get_singleton()->texture_get_data(albedo_alpha_tex, 0); + Ref<Image> img; + img.instance(); + img->create(p_image_size.width, p_image_size.height, false, Image::FORMAT_RGBA8, data); + RD::get_singleton()->free(albedo_alpha_tex); + ret.push_back(img); + } + + { + PackedByteArray data = RD::get_singleton()->texture_get_data(normal_tex, 0); + Ref<Image> img; + img.instance(); + img->create(p_image_size.width, p_image_size.height, false, Image::FORMAT_RGBA8, data); + RD::get_singleton()->free(normal_tex); + ret.push_back(img); + } + + { + PackedByteArray data = RD::get_singleton()->texture_get_data(orm_tex, 0); + Ref<Image> img; + img.instance(); + img->create(p_image_size.width, p_image_size.height, false, Image::FORMAT_RGBA8, data); + RD::get_singleton()->free(orm_tex); + ret.push_back(img); + } + + { + PackedByteArray data = RD::get_singleton()->texture_get_data(emission_tex, 0); + Ref<Image> img; + img.instance(); + img->create(p_image_size.width, p_image_size.height, false, Image::FORMAT_RGBAH, data); + RD::get_singleton()->free(emission_tex); + ret.push_back(img); + } + + RD::get_singleton()->free(depth_write_tex); + RD::get_singleton()->free(depth_tex); + + return ret; +} + +void RendererSceneRenderRD::sdfgi_set_debug_probe_select(const Vector3 &p_position, const Vector3 &p_dir) { + sdfgi_debug_probe_pos = p_position; + sdfgi_debug_probe_dir = p_dir; +} + +RendererSceneRenderRD *RendererSceneRenderRD::singleton = nullptr; + +RID RendererSceneRenderRD::get_reflection_probe_buffer() { + return cluster.reflection_buffer; +} +RID RendererSceneRenderRD::get_omni_light_buffer() { + return cluster.omni_light_buffer; +} + +RID RendererSceneRenderRD::get_spot_light_buffer() { + return cluster.spot_light_buffer; +} + +RID RendererSceneRenderRD::get_directional_light_buffer() { + return cluster.directional_light_buffer; +} +RID RendererSceneRenderRD::get_decal_buffer() { + return cluster.decal_buffer; +} +int RendererSceneRenderRD::get_max_directional_lights() const { + return cluster.max_directional_lights; +} + +bool RendererSceneRenderRD::is_low_end() const { + return low_end; +} + +RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { + max_cluster_elements = GLOBAL_GET("rendering/limits/cluster_builder/max_clustered_elements"); + + storage = p_storage; + singleton = this; + + roughness_layers = GLOBAL_GET("rendering/reflections/sky_reflections/roughness_layers"); + sky_ggx_samples_quality = GLOBAL_GET("rendering/reflections/sky_reflections/ggx_samples"); + sky_use_cubemap_array = GLOBAL_GET("rendering/reflections/sky_reflections/texture_array_reflections"); + + sdfgi_ray_count = RS::EnvironmentSDFGIRayCount(CLAMP(int32_t(GLOBAL_GET("rendering/global_illumination/sdfgi/probe_ray_count")), 0, int32_t(RS::ENV_SDFGI_RAY_COUNT_MAX - 1))); + sdfgi_frames_to_converge = RS::EnvironmentSDFGIFramesToConverge(CLAMP(int32_t(GLOBAL_GET("rendering/global_illumination/sdfgi/frames_to_converge")), 0, int32_t(RS::ENV_SDFGI_CONVERGE_MAX - 1))); + sdfgi_frames_to_update_light = RS::EnvironmentSDFGIFramesToUpdateLight(CLAMP(int32_t(GLOBAL_GET("rendering/global_illumination/sdfgi/frames_to_update_lights")), 0, int32_t(RS::ENV_SDFGI_UPDATE_LIGHT_MAX - 1))); + + directional_shadow.size = GLOBAL_GET("rendering/shadows/directional_shadow/size"); + directional_shadow.use_16_bits = GLOBAL_GET("rendering/shadows/directional_shadow/16_bits"); + + uint32_t textures_per_stage = RD::get_singleton()->limit_get(RD::LIMIT_MAX_TEXTURES_PER_SHADER_STAGE); + + low_end = GLOBAL_GET("rendering/driver/rd_renderer/use_low_end_renderer"); + + if (textures_per_stage < 48) { + low_end = true; + } + + if (!low_end) { + //kinda complicated to compute the amount of slots, we try to use as many as we can + + gi_probe_max_lights = 32; + + gi_probe_lights = memnew_arr(GIProbeLight, gi_probe_max_lights); + gi_probe_lights_uniform = RD::get_singleton()->uniform_buffer_create(gi_probe_max_lights * sizeof(GIProbeLight)); + gi_probe_quality = RS::GIProbeQuality(CLAMP(int(GLOBAL_GET("rendering/global_illumination/gi_probes/quality")), 0, 1)); + + String defines = "\n#define MAX_LIGHTS " + itos(gi_probe_max_lights) + "\n"; + + Vector<String> versions; + versions.push_back("\n#define MODE_COMPUTE_LIGHT\n"); + versions.push_back("\n#define MODE_SECOND_BOUNCE\n"); + versions.push_back("\n#define MODE_UPDATE_MIPMAPS\n"); + versions.push_back("\n#define MODE_WRITE_TEXTURE\n"); + versions.push_back("\n#define MODE_DYNAMIC\n#define MODE_DYNAMIC_LIGHTING\n"); + versions.push_back("\n#define MODE_DYNAMIC\n#define MODE_DYNAMIC_SHRINK\n#define MODE_DYNAMIC_SHRINK_WRITE\n"); + versions.push_back("\n#define MODE_DYNAMIC\n#define MODE_DYNAMIC_SHRINK\n#define MODE_DYNAMIC_SHRINK_PLOT\n"); + versions.push_back("\n#define MODE_DYNAMIC\n#define MODE_DYNAMIC_SHRINK\n#define MODE_DYNAMIC_SHRINK_PLOT\n#define MODE_DYNAMIC_SHRINK_WRITE\n"); + + giprobe_shader.initialize(versions, defines); + giprobe_lighting_shader_version = giprobe_shader.version_create(); + for (int i = 0; i < GI_PROBE_SHADER_VERSION_MAX; i++) { + giprobe_lighting_shader_version_shaders[i] = giprobe_shader.version_get_shader(giprobe_lighting_shader_version, i); + giprobe_lighting_shader_version_pipelines[i] = RD::get_singleton()->compute_pipeline_create(giprobe_lighting_shader_version_shaders[i]); + } + } + + if (!low_end) { + String defines; + Vector<String> versions; + versions.push_back("\n#define MODE_DEBUG_COLOR\n"); + versions.push_back("\n#define MODE_DEBUG_LIGHT\n"); + versions.push_back("\n#define MODE_DEBUG_EMISSION\n"); + versions.push_back("\n#define MODE_DEBUG_LIGHT\n#define MODE_DEBUG_LIGHT_FULL\n"); + + giprobe_debug_shader.initialize(versions, defines); + giprobe_debug_shader_version = giprobe_debug_shader.version_create(); + for (int i = 0; i < GI_PROBE_DEBUG_MAX; i++) { + giprobe_debug_shader_version_shaders[i] = giprobe_debug_shader.version_get_shader(giprobe_debug_shader_version, i); + + RD::PipelineRasterizationState rs; + rs.cull_mode = RD::POLYGON_CULL_FRONT; + RD::PipelineDepthStencilState ds; + ds.enable_depth_test = true; + ds.enable_depth_write = true; + ds.depth_compare_operator = RD::COMPARE_OP_LESS_OR_EQUAL; + + giprobe_debug_shader_version_pipelines[i].setup(giprobe_debug_shader_version_shaders[i], RD::RENDER_PRIMITIVE_TRIANGLES, rs, RD::PipelineMultisampleState(), ds, RD::PipelineColorBlendState::create_disabled(), 0); + } + } + + /* SKY SHADER */ + + { + // Start with the directional lights for the sky + sky_scene_state.max_directional_lights = 4; + uint32_t directional_light_buffer_size = sky_scene_state.max_directional_lights * sizeof(SkyDirectionalLightData); + sky_scene_state.directional_lights = memnew_arr(SkyDirectionalLightData, sky_scene_state.max_directional_lights); + sky_scene_state.last_frame_directional_lights = memnew_arr(SkyDirectionalLightData, sky_scene_state.max_directional_lights); + sky_scene_state.last_frame_directional_light_count = sky_scene_state.max_directional_lights + 1; + sky_scene_state.directional_light_buffer = RD::get_singleton()->uniform_buffer_create(directional_light_buffer_size); + + String defines = "\n#define MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS " + itos(sky_scene_state.max_directional_lights) + "\n"; + + // Initialize sky + Vector<String> sky_modes; + sky_modes.push_back(""); // Full size + sky_modes.push_back("\n#define USE_HALF_RES_PASS\n"); // Half Res + sky_modes.push_back("\n#define USE_QUARTER_RES_PASS\n"); // Quarter res + sky_modes.push_back("\n#define USE_CUBEMAP_PASS\n"); // Cubemap + sky_modes.push_back("\n#define USE_CUBEMAP_PASS\n#define USE_HALF_RES_PASS\n"); // Half Res Cubemap + sky_modes.push_back("\n#define USE_CUBEMAP_PASS\n#define USE_QUARTER_RES_PASS\n"); // Quarter res Cubemap + sky_shader.shader.initialize(sky_modes, defines); + } + + // register our shader funds + storage->shader_set_data_request_function(RendererStorageRD::SHADER_TYPE_SKY, _create_sky_shader_funcs); + storage->material_set_data_request_function(RendererStorageRD::SHADER_TYPE_SKY, _create_sky_material_funcs); + + { + ShaderCompilerRD::DefaultIdentifierActions actions; + + actions.renames["COLOR"] = "color"; + actions.renames["ALPHA"] = "alpha"; + actions.renames["EYEDIR"] = "cube_normal"; + actions.renames["POSITION"] = "params.position_multiplier.xyz"; + actions.renames["SKY_COORDS"] = "panorama_coords"; + actions.renames["SCREEN_UV"] = "uv"; + actions.renames["TIME"] = "params.time"; + actions.renames["HALF_RES_COLOR"] = "half_res_color"; + actions.renames["QUARTER_RES_COLOR"] = "quarter_res_color"; + actions.renames["RADIANCE"] = "radiance"; + actions.renames["FOG"] = "custom_fog"; + actions.renames["LIGHT0_ENABLED"] = "directional_lights.data[0].enabled"; + actions.renames["LIGHT0_DIRECTION"] = "directional_lights.data[0].direction_energy.xyz"; + actions.renames["LIGHT0_ENERGY"] = "directional_lights.data[0].direction_energy.w"; + actions.renames["LIGHT0_COLOR"] = "directional_lights.data[0].color_size.xyz"; + actions.renames["LIGHT0_SIZE"] = "directional_lights.data[0].color_size.w"; + actions.renames["LIGHT1_ENABLED"] = "directional_lights.data[1].enabled"; + actions.renames["LIGHT1_DIRECTION"] = "directional_lights.data[1].direction_energy.xyz"; + actions.renames["LIGHT1_ENERGY"] = "directional_lights.data[1].direction_energy.w"; + actions.renames["LIGHT1_COLOR"] = "directional_lights.data[1].color_size.xyz"; + actions.renames["LIGHT1_SIZE"] = "directional_lights.data[1].color_size.w"; + actions.renames["LIGHT2_ENABLED"] = "directional_lights.data[2].enabled"; + actions.renames["LIGHT2_DIRECTION"] = "directional_lights.data[2].direction_energy.xyz"; + actions.renames["LIGHT2_ENERGY"] = "directional_lights.data[2].direction_energy.w"; + actions.renames["LIGHT2_COLOR"] = "directional_lights.data[2].color_size.xyz"; + actions.renames["LIGHT2_SIZE"] = "directional_lights.data[2].color_size.w"; + actions.renames["LIGHT3_ENABLED"] = "directional_lights.data[3].enabled"; + actions.renames["LIGHT3_DIRECTION"] = "directional_lights.data[3].direction_energy.xyz"; + actions.renames["LIGHT3_ENERGY"] = "directional_lights.data[3].direction_energy.w"; + actions.renames["LIGHT3_COLOR"] = "directional_lights.data[3].color_size.xyz"; + actions.renames["LIGHT3_SIZE"] = "directional_lights.data[3].color_size.w"; + actions.renames["AT_CUBEMAP_PASS"] = "AT_CUBEMAP_PASS"; + actions.renames["AT_HALF_RES_PASS"] = "AT_HALF_RES_PASS"; + actions.renames["AT_QUARTER_RES_PASS"] = "AT_QUARTER_RES_PASS"; + actions.custom_samplers["RADIANCE"] = "material_samplers[3]"; + actions.usage_defines["HALF_RES_COLOR"] = "\n#define USES_HALF_RES_COLOR\n"; + actions.usage_defines["QUARTER_RES_COLOR"] = "\n#define USES_QUARTER_RES_COLOR\n"; + actions.render_mode_defines["disable_fog"] = "#define DISABLE_FOG\n"; + + actions.sampler_array_name = "material_samplers"; + actions.base_texture_binding_index = 1; + actions.texture_layout_set = 1; + actions.base_uniform_string = "material."; + actions.base_varying_index = 10; + + actions.default_filter = ShaderLanguage::FILTER_LINEAR_MIPMAP; + actions.default_repeat = ShaderLanguage::REPEAT_ENABLE; + actions.global_buffer_array_variable = "global_variables.data"; + + sky_shader.compiler.initialize(actions); + } + + { + // default material and shader for sky shader + sky_shader.default_shader = storage->shader_allocate(); + storage->shader_initialize(sky_shader.default_shader); + + storage->shader_set_code(sky_shader.default_shader, "shader_type sky; void fragment() { COLOR = vec3(0.0); } \n"); + + sky_shader.default_material = storage->material_allocate(); + storage->material_initialize(sky_shader.default_material); + + storage->material_set_shader(sky_shader.default_material, sky_shader.default_shader); + + SkyMaterialData *md = (SkyMaterialData *)storage->material_get_data(sky_shader.default_material, RendererStorageRD::SHADER_TYPE_SKY); + sky_shader.default_shader_rd = sky_shader.shader.version_get_shader(md->shader_data->version, SKY_VERSION_BACKGROUND); + + sky_scene_state.uniform_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(SkySceneState::UBO)); + + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 0; + u.ids.resize(12); + RID *ids_ptr = u.ids.ptrw(); + ids_ptr[0] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[1] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[2] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[3] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[4] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[5] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[6] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[7] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[8] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[9] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[10] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[11] = storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 1; + u.ids.push_back(storage->global_variables_get_storage_buffer()); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.binding = 2; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.ids.push_back(sky_scene_state.uniform_buffer); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.binding = 3; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.ids.push_back(sky_scene_state.directional_light_buffer); + uniforms.push_back(u); + } + + sky_scene_state.uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sky_shader.default_shader_rd, SKY_SET_UNIFORMS); + } + + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.binding = 0; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + RID vfog = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE); + u.ids.push_back(vfog); + uniforms.push_back(u); + } + + sky_scene_state.default_fog_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sky_shader.default_shader_rd, SKY_SET_FOG); + } + + { + // Need defaults for using fog with clear color + sky_scene_state.fog_shader = storage->shader_allocate(); + storage->shader_initialize(sky_scene_state.fog_shader); + + storage->shader_set_code(sky_scene_state.fog_shader, "shader_type sky; uniform vec4 clear_color; void fragment() { COLOR = clear_color.rgb; } \n"); + sky_scene_state.fog_material = storage->material_allocate(); + storage->material_initialize(sky_scene_state.fog_material); + + storage->material_set_shader(sky_scene_state.fog_material, sky_scene_state.fog_shader); + + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 0; + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_BLACK)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 1; + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 2; + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE)); + uniforms.push_back(u); + } + + sky_scene_state.fog_only_texture_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sky_shader.default_shader_rd, SKY_SET_TEXTURES); + } + + if (!low_end) { + //SDFGI + { + Vector<String> preprocess_modes; + preprocess_modes.push_back("\n#define MODE_SCROLL\n"); + preprocess_modes.push_back("\n#define MODE_SCROLL_OCCLUSION\n"); + preprocess_modes.push_back("\n#define MODE_INITIALIZE_JUMP_FLOOD\n"); + preprocess_modes.push_back("\n#define MODE_INITIALIZE_JUMP_FLOOD_HALF\n"); + preprocess_modes.push_back("\n#define MODE_JUMPFLOOD\n"); + preprocess_modes.push_back("\n#define MODE_JUMPFLOOD_OPTIMIZED\n"); + preprocess_modes.push_back("\n#define MODE_UPSCALE_JUMP_FLOOD\n"); + preprocess_modes.push_back("\n#define MODE_OCCLUSION\n"); + preprocess_modes.push_back("\n#define MODE_STORE\n"); + String defines = "\n#define OCCLUSION_SIZE " + itos(SDFGI::CASCADE_SIZE / SDFGI::PROBE_DIVISOR) + "\n"; + sdfgi_shader.preprocess.initialize(preprocess_modes, defines); + sdfgi_shader.preprocess_shader = sdfgi_shader.preprocess.version_create(); + for (int i = 0; i < SDGIShader::PRE_PROCESS_MAX; i++) { + sdfgi_shader.preprocess_pipeline[i] = RD::get_singleton()->compute_pipeline_create(sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, i)); + } + } + + { + //calculate tables + String defines = "\n#define OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; + + Vector<String> direct_light_modes; + direct_light_modes.push_back("\n#define MODE_PROCESS_STATIC\n"); + direct_light_modes.push_back("\n#define MODE_PROCESS_DYNAMIC\n"); + sdfgi_shader.direct_light.initialize(direct_light_modes, defines); + sdfgi_shader.direct_light_shader = sdfgi_shader.direct_light.version_create(); + for (int i = 0; i < SDGIShader::DIRECT_LIGHT_MODE_MAX; i++) { + sdfgi_shader.direct_light_pipeline[i] = RD::get_singleton()->compute_pipeline_create(sdfgi_shader.direct_light.version_get_shader(sdfgi_shader.direct_light_shader, i)); + } + } + + { + //calculate tables + String defines = "\n#define OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; + defines += "\n#define SH_SIZE " + itos(SDFGI::SH_SIZE) + "\n"; + if (sky_use_cubemap_array) { + defines += "\n#define USE_CUBEMAP_ARRAY\n"; + } + + Vector<String> integrate_modes; + integrate_modes.push_back("\n#define MODE_PROCESS\n"); + integrate_modes.push_back("\n#define MODE_STORE\n"); + integrate_modes.push_back("\n#define MODE_SCROLL\n"); + integrate_modes.push_back("\n#define MODE_SCROLL_STORE\n"); + sdfgi_shader.integrate.initialize(integrate_modes, defines); + sdfgi_shader.integrate_shader = sdfgi_shader.integrate.version_create(); + + for (int i = 0; i < SDGIShader::INTEGRATE_MODE_MAX; i++) { + sdfgi_shader.integrate_pipeline[i] = RD::get_singleton()->compute_pipeline_create(sdfgi_shader.integrate.version_get_shader(sdfgi_shader.integrate_shader, i)); + } + + { + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 0; + u.ids.push_back(storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_WHITE)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 1; + u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + + sdfgi_shader.integrate_default_sky_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.integrate.version_get_shader(sdfgi_shader.integrate_shader, 0), 1); + } + } + //GK + { + //calculate tables + String defines = "\n#define SDFGI_OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; + Vector<String> gi_modes; + gi_modes.push_back("\n#define USE_GIPROBES\n"); + gi_modes.push_back("\n#define USE_SDFGI\n"); + gi_modes.push_back("\n#define USE_SDFGI\n\n#define USE_GIPROBES\n"); + gi_modes.push_back("\n#define MODE_HALF_RES\n#define USE_GIPROBES\n"); + gi_modes.push_back("\n#define MODE_HALF_RES\n#define USE_SDFGI\n"); + gi_modes.push_back("\n#define MODE_HALF_RES\n#define USE_SDFGI\n\n#define USE_GIPROBES\n"); + + gi.shader.initialize(gi_modes, defines); + gi.shader_version = gi.shader.version_create(); + for (int i = 0; i < GI::MODE_MAX; i++) { + gi.pipelines[i] = RD::get_singleton()->compute_pipeline_create(gi.shader.version_get_shader(gi.shader_version, i)); + } + + gi.sdfgi_ubo = RD::get_singleton()->uniform_buffer_create(sizeof(GI::SDFGIData)); + } + { + String defines = "\n#define OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; + Vector<String> debug_modes; + debug_modes.push_back(""); + sdfgi_shader.debug.initialize(debug_modes, defines); + sdfgi_shader.debug_shader = sdfgi_shader.debug.version_create(); + sdfgi_shader.debug_shader_version = sdfgi_shader.debug.version_get_shader(sdfgi_shader.debug_shader, 0); + sdfgi_shader.debug_pipeline = RD::get_singleton()->compute_pipeline_create(sdfgi_shader.debug_shader_version); + } + { + String defines = "\n#define OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; + + Vector<String> versions; + versions.push_back("\n#define MODE_PROBES\n"); + versions.push_back("\n#define MODE_VISIBILITY\n"); + + sdfgi_shader.debug_probes.initialize(versions, defines); + sdfgi_shader.debug_probes_shader = sdfgi_shader.debug_probes.version_create(); + + { + RD::PipelineRasterizationState rs; + rs.cull_mode = RD::POLYGON_CULL_DISABLED; + RD::PipelineDepthStencilState ds; + ds.enable_depth_test = true; + ds.enable_depth_write = true; + ds.depth_compare_operator = RD::COMPARE_OP_LESS_OR_EQUAL; + for (int i = 0; i < SDGIShader::PROBE_DEBUG_MAX; i++) { + RID debug_probes_shader_version = sdfgi_shader.debug_probes.version_get_shader(sdfgi_shader.debug_probes_shader, i); + sdfgi_shader.debug_probes_pipeline[i].setup(debug_probes_shader_version, RD::RENDER_PRIMITIVE_TRIANGLE_STRIPS, rs, RD::PipelineMultisampleState(), ds, RD::PipelineColorBlendState::create_disabled(), 0); + } + } + } + default_giprobe_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(GI::GIProbeData) * RenderBuffers::MAX_GIPROBES); + } + + { //decals + cluster.max_decals = max_cluster_elements; + uint32_t decal_buffer_size = cluster.max_decals * sizeof(Cluster::DecalData); + cluster.decals = memnew_arr(Cluster::DecalData, cluster.max_decals); + cluster.decal_sort = memnew_arr(Cluster::InstanceSort<DecalInstance>, cluster.max_decals); + cluster.decal_buffer = RD::get_singleton()->storage_buffer_create(decal_buffer_size); + } + + { //reflections + + cluster.max_reflections = max_cluster_elements; + cluster.reflections = memnew_arr(Cluster::ReflectionData, cluster.max_reflections); + cluster.reflection_sort = memnew_arr(Cluster::InstanceSort<ReflectionProbeInstance>, cluster.max_reflections); + cluster.reflection_buffer = RD::get_singleton()->storage_buffer_create(sizeof(Cluster::ReflectionData) * cluster.max_reflections); + } + + { //lights + cluster.max_lights = max_cluster_elements; + + uint32_t light_buffer_size = cluster.max_lights * sizeof(Cluster::LightData); + cluster.omni_lights = memnew_arr(Cluster::LightData, cluster.max_lights); + cluster.omni_light_buffer = RD::get_singleton()->storage_buffer_create(light_buffer_size); + cluster.omni_light_sort = memnew_arr(Cluster::InstanceSort<LightInstance>, cluster.max_lights); + cluster.spot_lights = memnew_arr(Cluster::LightData, cluster.max_lights); + cluster.spot_light_buffer = RD::get_singleton()->storage_buffer_create(light_buffer_size); + cluster.spot_light_sort = memnew_arr(Cluster::InstanceSort<LightInstance>, cluster.max_lights); + //defines += "\n#define MAX_LIGHT_DATA_STRUCTS " + itos(cluster.max_lights) + "\n"; + + cluster.max_directional_lights = MAX_DIRECTIONAL_LIGHTS; + uint32_t directional_light_buffer_size = cluster.max_directional_lights * sizeof(Cluster::DirectionalLightData); + cluster.directional_lights = memnew_arr(Cluster::DirectionalLightData, cluster.max_directional_lights); + cluster.directional_light_buffer = RD::get_singleton()->uniform_buffer_create(directional_light_buffer_size); + } + + if (!low_end) { + String defines = "\n#define MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS " + itos(cluster.max_directional_lights) + "\n"; + Vector<String> volumetric_fog_modes; + volumetric_fog_modes.push_back("\n#define MODE_DENSITY\n"); + volumetric_fog_modes.push_back("\n#define MODE_DENSITY\n#define ENABLE_SDFGI\n"); + volumetric_fog_modes.push_back("\n#define MODE_FILTER\n"); + volumetric_fog_modes.push_back("\n#define MODE_FOG\n"); + volumetric_fog.shader.initialize(volumetric_fog_modes, defines); + volumetric_fog.shader_version = volumetric_fog.shader.version_create(); + for (int i = 0; i < VOLUMETRIC_FOG_SHADER_MAX; i++) { + volumetric_fog.pipelines[i] = RD::get_singleton()->compute_pipeline_create(volumetric_fog.shader.version_get_shader(volumetric_fog.shader_version, i)); + } + volumetric_fog.params_ubo = RD::get_singleton()->uniform_buffer_create(sizeof(VolumetricFogShader::ParamsUBO)); + } + + { + RD::SamplerState sampler; + sampler.mag_filter = RD::SAMPLER_FILTER_NEAREST; + sampler.min_filter = RD::SAMPLER_FILTER_NEAREST; + sampler.enable_compare = true; + sampler.compare_op = RD::COMPARE_OP_LESS; + shadow_sampler = RD::get_singleton()->sampler_create(sampler); + } + + camera_effects_set_dof_blur_bokeh_shape(RS::DOFBokehShape(int(GLOBAL_GET("rendering/camera/depth_of_field/depth_of_field_bokeh_shape")))); + camera_effects_set_dof_blur_quality(RS::DOFBlurQuality(int(GLOBAL_GET("rendering/camera/depth_of_field/depth_of_field_bokeh_quality"))), GLOBAL_GET("rendering/camera/depth_of_field/depth_of_field_use_jitter")); + environment_set_ssao_quality(RS::EnvironmentSSAOQuality(int(GLOBAL_GET("rendering/environment/ssao/quality"))), GLOBAL_GET("rendering/environment/ssao/half_size"), GLOBAL_GET("rendering/environment/ssao/adaptive_target"), GLOBAL_GET("rendering/environment/ssao/blur_passes"), GLOBAL_GET("rendering/environment/ssao/fadeout_from"), GLOBAL_GET("rendering/environment/ssao/fadeout_to")); + screen_space_roughness_limiter = GLOBAL_GET("rendering/anti_aliasing/screen_space_roughness_limiter/enabled"); + screen_space_roughness_limiter_amount = GLOBAL_GET("rendering/anti_aliasing/screen_space_roughness_limiter/amount"); + screen_space_roughness_limiter_limit = GLOBAL_GET("rendering/anti_aliasing/screen_space_roughness_limiter/limit"); + glow_bicubic_upscale = int(GLOBAL_GET("rendering/environment/glow/upscale_mode")) > 0; + glow_high_quality = GLOBAL_GET("rendering/environment/glow/use_high_quality"); + ssr_roughness_quality = RS::EnvironmentSSRRoughnessQuality(int(GLOBAL_GET("rendering/environment/screen_space_reflection/roughness_quality"))); + sss_quality = RS::SubSurfaceScatteringQuality(int(GLOBAL_GET("rendering/environment/subsurface_scattering/subsurface_scattering_quality"))); + sss_scale = GLOBAL_GET("rendering/environment/subsurface_scattering/subsurface_scattering_scale"); + sss_depth_scale = GLOBAL_GET("rendering/environment/subsurface_scattering/subsurface_scattering_depth_scale"); + directional_penumbra_shadow_kernel = memnew_arr(float, 128); + directional_soft_shadow_kernel = memnew_arr(float, 128); + penumbra_shadow_kernel = memnew_arr(float, 128); + soft_shadow_kernel = memnew_arr(float, 128); + shadows_quality_set(RS::ShadowQuality(int(GLOBAL_GET("rendering/shadows/shadows/soft_shadow_quality")))); + directional_shadow_quality_set(RS::ShadowQuality(int(GLOBAL_GET("rendering/shadows/directional_shadow/soft_shadow_quality")))); + + environment_set_volumetric_fog_volume_size(GLOBAL_GET("rendering/environment/volumetric_fog/volume_size"), GLOBAL_GET("rendering/environment/volumetric_fog/volume_depth")); + environment_set_volumetric_fog_filter_active(GLOBAL_GET("rendering/environment/volumetric_fog/use_filter")); + + cull_argument.set_page_pool(&cull_argument_pool); + + gi.half_resolution = GLOBAL_GET("rendering/global_illumination/gi/use_half_resolution"); +} + +RendererSceneRenderRD::~RendererSceneRenderRD() { + for (Map<int, ShadowCubemap>::Element *E = shadow_cubemaps.front(); E; E = E->next()) { + RD::get_singleton()->free(E->get().cubemap); + } + + if (sky_scene_state.uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(sky_scene_state.uniform_set)) { + RD::get_singleton()->free(sky_scene_state.uniform_set); + } + + if (!low_end) { + RD::get_singleton()->free(default_giprobe_buffer); + RD::get_singleton()->free(gi_probe_lights_uniform); + RD::get_singleton()->free(gi.sdfgi_ubo); + + giprobe_debug_shader.version_free(giprobe_debug_shader_version); + giprobe_shader.version_free(giprobe_lighting_shader_version); + gi.shader.version_free(gi.shader_version); + sdfgi_shader.debug_probes.version_free(sdfgi_shader.debug_probes_shader); + sdfgi_shader.debug.version_free(sdfgi_shader.debug_shader); + sdfgi_shader.direct_light.version_free(sdfgi_shader.direct_light_shader); + sdfgi_shader.integrate.version_free(sdfgi_shader.integrate_shader); + sdfgi_shader.preprocess.version_free(sdfgi_shader.preprocess_shader); + + volumetric_fog.shader.version_free(volumetric_fog.shader_version); + RD::get_singleton()->free(volumetric_fog.params_ubo); + + memdelete_arr(gi_probe_lights); + } + + SkyMaterialData *md = (SkyMaterialData *)storage->material_get_data(sky_shader.default_material, RendererStorageRD::SHADER_TYPE_SKY); + sky_shader.shader.version_free(md->shader_data->version); + RD::get_singleton()->free(sky_scene_state.directional_light_buffer); + RD::get_singleton()->free(sky_scene_state.uniform_buffer); + memdelete_arr(sky_scene_state.directional_lights); + memdelete_arr(sky_scene_state.last_frame_directional_lights); + storage->free(sky_shader.default_shader); + storage->free(sky_shader.default_material); + storage->free(sky_scene_state.fog_shader); + storage->free(sky_scene_state.fog_material); + memdelete_arr(directional_penumbra_shadow_kernel); + memdelete_arr(directional_soft_shadow_kernel); + memdelete_arr(penumbra_shadow_kernel); + memdelete_arr(soft_shadow_kernel); + + { + RD::get_singleton()->free(cluster.directional_light_buffer); + RD::get_singleton()->free(cluster.omni_light_buffer); + RD::get_singleton()->free(cluster.spot_light_buffer); + RD::get_singleton()->free(cluster.reflection_buffer); + RD::get_singleton()->free(cluster.decal_buffer); + memdelete_arr(cluster.directional_lights); + memdelete_arr(cluster.omni_lights); + memdelete_arr(cluster.spot_lights); + memdelete_arr(cluster.omni_light_sort); + memdelete_arr(cluster.spot_light_sort); + memdelete_arr(cluster.reflections); + memdelete_arr(cluster.reflection_sort); + memdelete_arr(cluster.decals); + memdelete_arr(cluster.decal_sort); + } + + RD::get_singleton()->free(shadow_sampler); + + directional_shadow_atlas_set_size(0); + cull_argument.reset(); //avoid exit error +} diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.h b/servers/rendering/renderer_rd/renderer_scene_render_rd.h new file mode 100644 index 0000000000..e4eaa93212 --- /dev/null +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.h @@ -0,0 +1,2128 @@ +/*************************************************************************/ +/* renderer_scene_render_rd.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef RENDERING_SERVER_SCENE_RENDER_RD_H +#define RENDERING_SERVER_SCENE_RENDER_RD_H + +#include "core/templates/local_vector.h" +#include "core/templates/rid_owner.h" +#include "servers/rendering/renderer_compositor.h" +#include "servers/rendering/renderer_rd/cluster_builder_rd.h" +#include "servers/rendering/renderer_rd/renderer_storage_rd.h" +#include "servers/rendering/renderer_rd/shaders/gi.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/giprobe.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/giprobe_debug.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/sdfgi_debug_probes.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/sdfgi_preprocess.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/sky.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/volumetric_fog.glsl.gen.h" +#include "servers/rendering/renderer_scene_render.h" +#include "servers/rendering/rendering_device.h" + +class RendererSceneRenderRD : public RendererSceneRender { +protected: + double time; + + // Skys need less info from Directional Lights than the normal shaders + struct SkyDirectionalLightData { + float direction[3]; + float energy; + float color[3]; + float size; + uint32_t enabled; + uint32_t pad[3]; + }; + + struct SkySceneState { + struct UBO { + uint32_t volumetric_fog_enabled; + float volumetric_fog_inv_length; + float volumetric_fog_detail_spread; + + float fog_aerial_perspective; + + float fog_light_color[3]; + float fog_sun_scatter; + + uint32_t fog_enabled; + float fog_density; + + float z_far; + uint32_t directional_light_count; + }; + + UBO ubo; + + SkyDirectionalLightData *directional_lights; + SkyDirectionalLightData *last_frame_directional_lights; + uint32_t max_directional_lights; + uint32_t last_frame_directional_light_count; + RID directional_light_buffer; + RID uniform_set; + RID uniform_buffer; + RID fog_uniform_set; + RID default_fog_uniform_set; + + RID fog_shader; + RID fog_material; + RID fog_only_texture_uniform_set; + } sky_scene_state; + + struct RenderBufferData { + virtual void configure(RID p_color_buffer, RID p_depth_buffer, int p_width, int p_height, RS::ViewportMSAA p_msaa) = 0; + virtual ~RenderBufferData() {} + }; + virtual RenderBufferData *_create_render_buffer_data() = 0; + + void _setup_lights(const PagedArray<RID> &p_lights, const Transform &p_camera_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count); + void _setup_decals(const PagedArray<RID> &p_decals, const Transform &p_camera_inverse_xform); + void _setup_reflections(const PagedArray<RID> &p_reflections, const Transform &p_camera_inverse_transform, RID p_environment); + void _setup_giprobes(RID p_render_buffers, const Transform &p_transform, const PagedArray<RID> &p_gi_probes, uint32_t &r_gi_probes_used); + + virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_cluster_max_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_color, float p_screen_lod_threshold) = 0; + + virtual void _render_shadow_begin() = 0; + virtual void _render_shadow_append(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true) = 0; + virtual void _render_shadow_process() = 0; + virtual void _render_shadow_end(uint32_t p_barrier = RD::BARRIER_MASK_ALL) = 0; + + virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; + virtual void _render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; + virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<GeometryInstance *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) = 0; + virtual void _render_particle_collider_heightfield(RID p_fb, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, const PagedArray<GeometryInstance *> &p_instances) = 0; + + virtual void _debug_giprobe(RID p_gi_probe, RenderingDevice::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform, bool p_lighting, bool p_emission, float p_alpha); + void _debug_sdfgi_probes(RID p_render_buffers, RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform); + + RenderBufferData *render_buffers_get_data(RID p_render_buffers); + + virtual void _base_uniforms_changed() = 0; + virtual void _render_buffers_uniform_set_changed(RID p_render_buffers) = 0; + virtual RID _render_buffers_get_normal_texture(RID p_render_buffers) = 0; + + void _process_ssao(RID p_render_buffers, RID p_environment, RID p_normal_buffer, const CameraMatrix &p_projection); + void _process_ssr(RID p_render_buffers, RID p_dest_framebuffer, RID p_normal_buffer, RID p_specular_buffer, RID p_metallic, const Color &p_metallic_mask, RID p_environment, const CameraMatrix &p_projection, bool p_use_additive); + void _process_sss(RID p_render_buffers, const CameraMatrix &p_camera); + + void _setup_sky(RID p_environment, RID p_render_buffers, const CameraMatrix &p_projection, const Transform &p_transform, const Size2i p_screen_size); + void _update_sky(RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform); + void _draw_sky(bool p_can_continue_color, bool p_can_continue_depth, RID p_fb, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform); + void _pre_process_gi(RID p_render_buffers, const Transform &p_transform); + void _process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, const PagedArray<RID> &p_gi_probes); + + bool _needs_post_prepass_render(bool p_use_gi); + void _post_prepass_render(bool p_use_gi); + void _pre_resolve_render(bool p_use_gi); + + void _pre_opaque_render(bool p_use_ssao, bool p_use_gi, RID p_normal_roughness_buffer, RID p_gi_probe_buffer); + uint32_t _get_render_state_directional_light_count() const; + + // needed for a single argument calls (material and uv2) + PagedArrayPool<GeometryInstance *> cull_argument_pool; + PagedArray<GeometryInstance *> cull_argument; //need this to exist +private: + RS::ViewportDebugDraw debug_draw = RS::VIEWPORT_DEBUG_DRAW_DISABLED; + double time_step = 0; + static RendererSceneRenderRD *singleton; + + int roughness_layers; + + RendererStorageRD *storage; + + struct ReflectionData { + struct Layer { + struct Mipmap { + RID framebuffers[6]; + RID views[6]; + Size2i size; + }; + Vector<Mipmap> mipmaps; //per-face view + Vector<RID> views; // per-cubemap view + }; + + struct DownsampleLayer { + struct Mipmap { + RID view; + Size2i size; + }; + Vector<Mipmap> mipmaps; + }; + + RID radiance_base_cubemap; //cubemap for first layer, first cubemap + RID downsampled_radiance_cubemap; + DownsampleLayer downsampled_layer; + RID coefficient_buffer; + + bool dirty = true; + + Vector<Layer> layers; + }; + + void _clear_reflection_data(ReflectionData &rd); + void _update_reflection_data(ReflectionData &rd, int p_size, int p_mipmaps, bool p_use_array, RID p_base_cube, int p_base_layer, bool p_low_quality); + void _create_reflection_fast_filter(ReflectionData &rd, bool p_use_arrays); + void _create_reflection_importance_sample(ReflectionData &rd, bool p_use_arrays, int p_cube_side, int p_base_layer); + void _update_reflection_mipmaps(ReflectionData &rd, int p_start, int p_end); + + /* Sky shader */ + + enum SkyVersion { + SKY_VERSION_BACKGROUND, + SKY_VERSION_HALF_RES, + SKY_VERSION_QUARTER_RES, + SKY_VERSION_CUBEMAP, + SKY_VERSION_CUBEMAP_HALF_RES, + SKY_VERSION_CUBEMAP_QUARTER_RES, + SKY_VERSION_MAX + }; + + struct SkyShader { + SkyShaderRD shader; + ShaderCompilerRD compiler; + + RID default_shader; + RID default_material; + RID default_shader_rd; + } sky_shader; + + struct SkyShaderData : public RendererStorageRD::ShaderData { + bool valid; + RID version; + + PipelineCacheRD pipelines[SKY_VERSION_MAX]; + Map<StringName, ShaderLanguage::ShaderNode::Uniform> uniforms; + Vector<ShaderCompilerRD::GeneratedCode::Texture> texture_uniforms; + + Vector<uint32_t> ubo_offsets; + uint32_t ubo_size; + + String path; + String code; + Map<StringName, RID> default_texture_params; + + bool uses_time; + bool uses_position; + bool uses_half_res; + bool uses_quarter_res; + bool uses_light; + + virtual void set_code(const String &p_Code); + virtual void set_default_texture_param(const StringName &p_name, RID p_texture); + virtual void get_param_list(List<PropertyInfo> *p_param_list) const; + virtual void get_instance_param_list(List<RendererStorage::InstanceShaderParam> *p_param_list) const; + virtual bool is_param_texture(const StringName &p_param) const; + virtual bool is_animated() const; + virtual bool casts_shadows() const; + virtual Variant get_default_parameter(const StringName &p_parameter) const; + virtual RS::ShaderNativeSourceCode get_native_source_code() const; + SkyShaderData(); + virtual ~SkyShaderData(); + }; + + RendererStorageRD::ShaderData *_create_sky_shader_func(); + static RendererStorageRD::ShaderData *_create_sky_shader_funcs() { + return static_cast<RendererSceneRenderRD *>(singleton)->_create_sky_shader_func(); + }; + + struct SkyMaterialData : public RendererStorageRD::MaterialData { + uint64_t last_frame; + SkyShaderData *shader_data; + RID uniform_buffer; + RID uniform_set; + Vector<RID> texture_cache; + Vector<uint8_t> ubo_data; + bool uniform_set_updated; + + virtual void set_render_priority(int p_priority) {} + virtual void set_next_pass(RID p_pass) {} + virtual void update_parameters(const Map<StringName, Variant> &p_parameters, bool p_uniform_dirty, bool p_textures_dirty); + virtual ~SkyMaterialData(); + }; + + RendererStorageRD::MaterialData *_create_sky_material_func(SkyShaderData *p_shader); + static RendererStorageRD::MaterialData *_create_sky_material_funcs(RendererStorageRD::ShaderData *p_shader) { + return static_cast<RendererSceneRenderRD *>(singleton)->_create_sky_material_func(static_cast<SkyShaderData *>(p_shader)); + }; + + enum SkyTextureSetVersion { + SKY_TEXTURE_SET_BACKGROUND, + SKY_TEXTURE_SET_HALF_RES, + SKY_TEXTURE_SET_QUARTER_RES, + SKY_TEXTURE_SET_CUBEMAP, + SKY_TEXTURE_SET_CUBEMAP_HALF_RES, + SKY_TEXTURE_SET_CUBEMAP_QUARTER_RES, + SKY_TEXTURE_SET_MAX + }; + + enum SkySet { + SKY_SET_UNIFORMS, + SKY_SET_MATERIAL, + SKY_SET_TEXTURES, + SKY_SET_FOG, + SKY_SET_MAX + }; + + /* SKY */ + struct Sky { + RID radiance; + RID half_res_pass; + RID half_res_framebuffer; + RID quarter_res_pass; + RID quarter_res_framebuffer; + Size2i screen_size; + + RID texture_uniform_sets[SKY_TEXTURE_SET_MAX]; + RID uniform_set; + + RID material; + RID uniform_buffer; + + int radiance_size = 256; + + RS::SkyMode mode = RS::SKY_MODE_AUTOMATIC; + + ReflectionData reflection; + bool dirty = false; + int processing_layer = 0; + Sky *dirty_list = nullptr; + + //State to track when radiance cubemap needs updating + SkyMaterialData *prev_material; + Vector3 prev_position; + float prev_time; + + RID sdfgi_integrate_sky_uniform_set; + }; + + Sky *dirty_sky_list = nullptr; + + void _sky_invalidate(Sky *p_sky); + void _update_dirty_skys(); + RID _get_sky_textures(Sky *p_sky, SkyTextureSetVersion p_version); + + uint32_t sky_ggx_samples_quality; + bool sky_use_cubemap_array; + + mutable RID_Owner<Sky, true> sky_owner; + + /* REFLECTION ATLAS */ + + struct ReflectionAtlas { + int count = 0; + int size = 0; + + RID reflection; + RID depth_buffer; + RID depth_fb; + + struct Reflection { + RID owner; + ReflectionData data; + RID fbs[6]; + }; + + Vector<Reflection> reflections; + + ClusterBuilderRD *cluster_builder = nullptr; + }; + + mutable RID_Owner<ReflectionAtlas> reflection_atlas_owner; + + /* REFLECTION PROBE INSTANCE */ + + struct ReflectionProbeInstance { + RID probe; + int atlas_index = -1; + RID atlas; + + bool dirty = true; + bool rendering = false; + int processing_layer = 1; + int processing_side = 0; + + uint32_t render_step = 0; + uint64_t last_pass = 0; + uint32_t render_index = 0; + + Transform transform; + }; + + mutable RID_Owner<ReflectionProbeInstance> reflection_probe_instance_owner; + + /* DECAL INSTANCE */ + + struct DecalInstance { + RID decal; + Transform transform; + }; + + mutable RID_Owner<DecalInstance> decal_instance_owner; + + /* LIGHTMAP INSTANCE */ + + struct LightmapInstance { + RID lightmap; + Transform transform; + }; + + mutable RID_Owner<LightmapInstance> lightmap_instance_owner; + + /* GIPROBE INSTANCE */ + + struct GIProbeLight { + uint32_t type; + float energy; + float radius; + float attenuation; + + float color[3]; + float cos_spot_angle; + + float position[3]; + float inv_spot_attenuation; + + float direction[3]; + uint32_t has_shadow; + }; + + struct GIProbePushConstant { + int32_t limits[3]; + uint32_t stack_size; + + float emission_scale; + float propagation; + float dynamic_range; + uint32_t light_count; + + uint32_t cell_offset; + uint32_t cell_count; + float aniso_strength; + uint32_t pad; + }; + + struct GIProbeDynamicPushConstant { + int32_t limits[3]; + uint32_t light_count; + int32_t x_dir[3]; + float z_base; + int32_t y_dir[3]; + float z_sign; + int32_t z_dir[3]; + float pos_multiplier; + uint32_t rect_pos[2]; + uint32_t rect_size[2]; + uint32_t prev_rect_ofs[2]; + uint32_t prev_rect_size[2]; + uint32_t flip_x; + uint32_t flip_y; + float dynamic_range; + uint32_t on_mipmap; + float propagation; + float pad[3]; + }; + + struct GIProbeInstance { + RID probe; + RID texture; + RID write_buffer; + + struct Mipmap { + RID texture; + RID uniform_set; + RID second_bounce_uniform_set; + RID write_uniform_set; + uint32_t level; + uint32_t cell_offset; + uint32_t cell_count; + }; + Vector<Mipmap> mipmaps; + + struct DynamicMap { + RID texture; //color normally, or emission on first pass + RID fb_depth; //actual depth buffer for the first pass, float depth for later passes + RID depth; //actual depth buffer for the first pass, float depth for later passes + RID normal; //normal buffer for the first pass + RID albedo; //emission buffer for the first pass + RID orm; //orm buffer for the first pass + RID fb; //used for rendering, only valid on first map + RID uniform_set; + uint32_t size; + int mipmap; // mipmap to write to, -1 if no mipmap assigned + }; + + Vector<DynamicMap> dynamic_maps; + + int slot = -1; + uint32_t last_probe_version = 0; + uint32_t last_probe_data_version = 0; + + //uint64_t last_pass = 0; + uint32_t render_index = 0; + + bool has_dynamic_object_data = false; + + Transform transform; + }; + + GIProbeLight *gi_probe_lights; + uint32_t gi_probe_max_lights; + RID gi_probe_lights_uniform; + + enum { + GI_PROBE_SHADER_VERSION_COMPUTE_LIGHT, + GI_PROBE_SHADER_VERSION_COMPUTE_SECOND_BOUNCE, + GI_PROBE_SHADER_VERSION_COMPUTE_MIPMAP, + GI_PROBE_SHADER_VERSION_WRITE_TEXTURE, + GI_PROBE_SHADER_VERSION_DYNAMIC_OBJECT_LIGHTING, + GI_PROBE_SHADER_VERSION_DYNAMIC_SHRINK_WRITE, + GI_PROBE_SHADER_VERSION_DYNAMIC_SHRINK_PLOT, + GI_PROBE_SHADER_VERSION_DYNAMIC_SHRINK_WRITE_PLOT, + GI_PROBE_SHADER_VERSION_MAX + }; + GiprobeShaderRD giprobe_shader; + RID giprobe_lighting_shader_version; + RID giprobe_lighting_shader_version_shaders[GI_PROBE_SHADER_VERSION_MAX]; + RID giprobe_lighting_shader_version_pipelines[GI_PROBE_SHADER_VERSION_MAX]; + + mutable RID_Owner<GIProbeInstance> gi_probe_instance_owner; + + RS::GIProbeQuality gi_probe_quality = RS::GI_PROBE_QUALITY_HIGH; + + enum { + GI_PROBE_DEBUG_COLOR, + GI_PROBE_DEBUG_LIGHT, + GI_PROBE_DEBUG_EMISSION, + GI_PROBE_DEBUG_LIGHT_FULL, + GI_PROBE_DEBUG_MAX + }; + + struct GIProbeDebugPushConstant { + float projection[16]; + uint32_t cell_offset; + float dynamic_range; + float alpha; + uint32_t level; + int32_t bounds[3]; + uint32_t pad; + }; + + GiprobeDebugShaderRD giprobe_debug_shader; + RID giprobe_debug_shader_version; + RID giprobe_debug_shader_version_shaders[GI_PROBE_DEBUG_MAX]; + PipelineCacheRD giprobe_debug_shader_version_pipelines[GI_PROBE_DEBUG_MAX]; + RID giprobe_debug_uniform_set; + + /* SHADOW ATLAS */ + + struct ShadowShrinkStage { + RID texture; + RID filter_texture; + uint32_t size; + }; + + struct ShadowAtlas { + enum { + QUADRANT_SHIFT = 27, + SHADOW_INDEX_MASK = (1 << QUADRANT_SHIFT) - 1, + SHADOW_INVALID = 0xFFFFFFFF + }; + + struct Quadrant { + uint32_t subdivision; + + struct Shadow { + RID owner; + uint64_t version; + uint64_t fog_version; // used for fog + uint64_t alloc_tick; + + Shadow() { + version = 0; + fog_version = 0; + alloc_tick = 0; + } + }; + + Vector<Shadow> shadows; + + Quadrant() { + subdivision = 0; //not in use + } + + } quadrants[4]; + + int size_order[4] = { 0, 1, 2, 3 }; + uint32_t smallest_subdiv = 0; + + int size = 0; + bool use_16_bits = false; + + RID depth; + RID fb; //for copying + + Map<RID, uint32_t> shadow_owners; + }; + + RID_Owner<ShadowAtlas> shadow_atlas_owner; + + void _update_shadow_atlas(ShadowAtlas *shadow_atlas); + + bool _shadow_atlas_find_shadow(ShadowAtlas *shadow_atlas, int *p_in_quadrants, int p_quadrant_count, int p_current_subdiv, uint64_t p_tick, int &r_quadrant, int &r_shadow); + + RS::ShadowQuality shadows_quality = RS::SHADOW_QUALITY_MAX; //So it always updates when first set + RS::ShadowQuality directional_shadow_quality = RS::SHADOW_QUALITY_MAX; + float shadows_quality_radius = 1.0; + float directional_shadow_quality_radius = 1.0; + + float *directional_penumbra_shadow_kernel; + float *directional_soft_shadow_kernel; + float *penumbra_shadow_kernel; + float *soft_shadow_kernel; + int directional_penumbra_shadow_samples = 0; + int directional_soft_shadow_samples = 0; + int penumbra_shadow_samples = 0; + int soft_shadow_samples = 0; + + /* DIRECTIONAL SHADOW */ + + struct DirectionalShadow { + RID depth; + RID fb; //when renderign direct + + int light_count = 0; + int size = 0; + bool use_16_bits = false; + int current_light = 0; + + } directional_shadow; + + void _update_directional_shadow_atlas(); + + /* SHADOW CUBEMAPS */ + + struct ShadowCubemap { + RID cubemap; + RID side_fb[6]; + }; + + Map<int, ShadowCubemap> shadow_cubemaps; + ShadowCubemap *_get_shadow_cubemap(int p_size); + + void _create_shadow_cubemaps(); + + /* LIGHT INSTANCE */ + + struct LightInstance { + struct ShadowTransform { + CameraMatrix camera; + Transform transform; + float farplane; + float split; + float bias_scale; + float shadow_texel_size; + float range_begin; + Rect2 atlas_rect; + Vector2 uv_scale; + }; + + RS::LightType light_type = RS::LIGHT_DIRECTIONAL; + + ShadowTransform shadow_transform[6]; + + AABB aabb; + RID self; + RID light; + Transform transform; + + Vector3 light_vector; + Vector3 spot_vector; + float linear_att = 0.0; + + uint64_t shadow_pass = 0; + uint64_t last_scene_pass = 0; + uint64_t last_scene_shadow_pass = 0; + uint64_t last_pass = 0; + uint32_t light_index = 0; + uint32_t light_directional_index = 0; + + uint32_t current_shadow_atlas_key = 0; + + Vector2 dp; + + Rect2 directional_rect; + + Set<RID> shadow_atlases; //shadow atlases where this light is registered + + LightInstance() {} + }; + + mutable RID_Owner<LightInstance> light_instance_owner; + + /* ENVIRONMENT */ + + struct Environment { + // BG + RS::EnvironmentBG background = RS::ENV_BG_CLEAR_COLOR; + RID sky; + float sky_custom_fov = 0.0; + Basis sky_orientation; + Color bg_color; + float bg_energy = 1.0; + int canvas_max_layer = 0; + RS::EnvironmentAmbientSource ambient_source = RS::ENV_AMBIENT_SOURCE_BG; + Color ambient_light; + float ambient_light_energy = 1.0; + float ambient_sky_contribution = 1.0; + RS::EnvironmentReflectionSource reflection_source = RS::ENV_REFLECTION_SOURCE_BG; + Color ao_color; + + /// Tonemap + + RS::EnvironmentToneMapper tone_mapper; + float exposure = 1.0; + float white = 1.0; + bool auto_exposure = false; + float min_luminance = 0.2; + float max_luminance = 8.0; + float auto_exp_speed = 0.2; + float auto_exp_scale = 0.5; + uint64_t auto_exposure_version = 0; + + // Fog + bool fog_enabled = false; + Color fog_light_color = Color(0.5, 0.6, 0.7); + float fog_light_energy = 1.0; + float fog_sun_scatter = 0.0; + float fog_density = 0.001; + float fog_height = 0.0; + float fog_height_density = 0.0; //can be negative to invert effect + float fog_aerial_perspective = 0.0; + + /// Volumetric Fog + /// + bool volumetric_fog_enabled = false; + float volumetric_fog_density = 0.01; + Color volumetric_fog_light = Color(0, 0, 0); + float volumetric_fog_light_energy = 0.0; + float volumetric_fog_length = 64.0; + float volumetric_fog_detail_spread = 2.0; + float volumetric_fog_gi_inject = 0.0; + bool volumetric_fog_temporal_reprojection = true; + float volumetric_fog_temporal_reprojection_amount = 0.9; + + /// Glow + + bool glow_enabled = false; + Vector<float> glow_levels; + float glow_intensity = 0.8; + float glow_strength = 1.0; + float glow_bloom = 0.0; + float glow_mix = 0.01; + RS::EnvironmentGlowBlendMode glow_blend_mode = RS::ENV_GLOW_BLEND_MODE_SOFTLIGHT; + float glow_hdr_bleed_threshold = 1.0; + float glow_hdr_luminance_cap = 12.0; + float glow_hdr_bleed_scale = 2.0; + + /// SSAO + + bool ssao_enabled = false; + float ssao_radius = 1.0; + float ssao_intensity = 2.0; + float ssao_power = 1.5; + float ssao_detail = 0.5; + float ssao_horizon = 0.06; + float ssao_sharpness = 0.98; + float ssao_direct_light_affect = 0.0; + float ssao_ao_channel_affect = 0.0; + + /// SSR + /// + bool ssr_enabled = false; + int ssr_max_steps = 64; + float ssr_fade_in = 0.15; + float ssr_fade_out = 2.0; + float ssr_depth_tolerance = 0.2; + + /// SDFGI + bool sdfgi_enabled = false; + RS::EnvironmentSDFGICascades sdfgi_cascades; + float sdfgi_min_cell_size = 0.2; + bool sdfgi_use_occlusion = false; + float sdfgi_bounce_feedback = 0.0; + bool sdfgi_read_sky_light = false; + float sdfgi_energy = 1.0; + float sdfgi_normal_bias = 1.1; + float sdfgi_probe_bias = 1.1; + RS::EnvironmentSDFGIYScale sdfgi_y_scale = RS::ENV_SDFGI_Y_SCALE_DISABLED; + + /// Adjustments + + bool adjustments_enabled = false; + float adjustments_brightness = 1.0f; + float adjustments_contrast = 1.0f; + float adjustments_saturation = 1.0f; + bool use_1d_color_correction = false; + RID color_correction = RID(); + }; + + RS::EnvironmentSSAOQuality ssao_quality = RS::ENV_SSAO_QUALITY_MEDIUM; + bool ssao_half_size = false; + bool ssao_using_half_size = false; + float ssao_adaptive_target = 0.5; + int ssao_blur_passes = 2; + float ssao_fadeout_from = 50.0; + float ssao_fadeout_to = 300.0; + + bool glow_bicubic_upscale = false; + bool glow_high_quality = false; + RS::EnvironmentSSRRoughnessQuality ssr_roughness_quality = RS::ENV_SSR_ROUGNESS_QUALITY_LOW; + + static uint64_t auto_exposure_counter; + + mutable RID_Owner<Environment, true> environment_owner; + + /* CAMERA EFFECTS */ + + struct CameraEffects { + bool dof_blur_far_enabled = false; + float dof_blur_far_distance = 10; + float dof_blur_far_transition = 5; + + bool dof_blur_near_enabled = false; + float dof_blur_near_distance = 2; + float dof_blur_near_transition = 1; + + float dof_blur_amount = 0.1; + + bool override_exposure_enabled = false; + float override_exposure = 1; + }; + + RS::DOFBlurQuality dof_blur_quality = RS::DOF_BLUR_QUALITY_MEDIUM; + RS::DOFBokehShape dof_blur_bokeh_shape = RS::DOF_BOKEH_HEXAGON; + bool dof_blur_use_jitter = false; + RS::SubSurfaceScatteringQuality sss_quality = RS::SUB_SURFACE_SCATTERING_QUALITY_MEDIUM; + float sss_scale = 0.05; + float sss_depth_scale = 0.01; + + mutable RID_Owner<CameraEffects, true> camera_effects_owner; + + /* RENDER BUFFERS */ + + ClusterBuilderSharedDataRD cluster_builder_shared; + ClusterBuilderRD *current_cluster_builder = nullptr; + + struct SDFGI; + struct VolumetricFog; + + struct RenderBuffers { + enum { + MAX_GIPROBES = 8 + }; + + RenderBufferData *data = nullptr; + int width = 0, height = 0; + RS::ViewportMSAA msaa = RS::VIEWPORT_MSAA_DISABLED; + RS::ViewportScreenSpaceAA screen_space_aa = RS::VIEWPORT_SCREEN_SPACE_AA_DISABLED; + bool use_debanding = false; + + RID render_target; + + uint64_t auto_exposure_version = 1; + + RID texture; //main texture for rendering to, must be filled after done rendering + RID depth_texture; //main depth texture + + RID gi_uniform_set; + SDFGI *sdfgi = nullptr; + VolumetricFog *volumetric_fog = nullptr; + + ClusterBuilderRD *cluster_builder = nullptr; + + //built-in textures used for ping pong image processing and blurring + struct Blur { + RID texture; + + struct Mipmap { + RID texture; + int width; + int height; + }; + + Vector<Mipmap> mipmaps; + }; + + Blur blur[2]; //the second one starts from the first mipmap + + struct Luminance { + Vector<RID> reduce; + RID current; + } luminance; + + struct SSAO { + RID depth; + Vector<RID> depth_slices; + RID ao_deinterleaved; + Vector<RID> ao_deinterleaved_slices; + RID ao_pong; + Vector<RID> ao_pong_slices; + RID ao_final; + RID importance_map[2]; + } ssao; + + struct SSR { + RID normal_scaled; + RID depth_scaled; + RID blur_radius[2]; + } ssr; + + RID giprobe_textures[MAX_GIPROBES]; + RID giprobe_buffer; + + RID ambient_buffer; + RID reflection_buffer; + bool using_half_size_gi = false; + + struct GI { + RID full_buffer; + RID full_dispatch; + RID full_mask; + } gi; + }; + + RID default_giprobe_buffer; + + /* SDFGI */ + + struct SDFGI { + enum { + MAX_CASCADES = 8, + CASCADE_SIZE = 128, + PROBE_DIVISOR = 16, + ANISOTROPY_SIZE = 6, + MAX_DYNAMIC_LIGHTS = 128, + MAX_STATIC_LIGHTS = 1024, + LIGHTPROBE_OCT_SIZE = 6, + SH_SIZE = 16 + }; + + struct Cascade { + struct UBO { + float offset[3]; + float to_cell; + int32_t probe_offset[3]; + uint32_t pad; + }; + + //cascade blocks are full-size for volume (128^3), half size for albedo/emission + RID sdf_tex; + RID light_tex; + RID light_aniso_0_tex; + RID light_aniso_1_tex; + + RID light_data; + RID light_aniso_0_data; + RID light_aniso_1_data; + + struct SolidCell { // this struct is unused, but remains as reference for size + uint32_t position; + uint32_t albedo; + uint32_t static_light; + uint32_t static_light_aniso; + }; + + RID solid_cell_dispatch_buffer; //buffer for indirect compute dispatch + RID solid_cell_buffer; + + RID lightprobe_history_tex; + RID lightprobe_average_tex; + + float cell_size; + Vector3i position; + + static const Vector3i DIRTY_ALL; + Vector3i dirty_regions; //(0,0,0 is not dirty, negative is refresh from the end, DIRTY_ALL is refresh all. + + RID sdf_store_uniform_set; + RID sdf_direct_light_uniform_set; + RID scroll_uniform_set; + RID scroll_occlusion_uniform_set; + RID integrate_uniform_set; + RID lights_buffer; + + bool all_dynamic_lights_dirty = true; + }; + + //used for rendering (voxelization) + RID render_albedo; + RID render_emission; + RID render_emission_aniso; + RID render_occlusion[8]; + RID render_geom_facing; + + RID render_sdf[2]; + RID render_sdf_half[2]; + + //used for ping pong processing in cascades + RID sdf_initialize_uniform_set; + RID sdf_initialize_half_uniform_set; + RID jump_flood_uniform_set[2]; + RID jump_flood_half_uniform_set[2]; + RID sdf_upscale_uniform_set; + int upscale_jfa_uniform_set_index; + RID occlusion_uniform_set; + + uint32_t cascade_size = 128; + + LocalVector<Cascade> cascades; + + RID lightprobe_texture; + RID lightprobe_data; + RID occlusion_texture; + RID occlusion_data; + RID ambient_texture; //integrates with volumetric fog + + RID lightprobe_history_scroll; //used for scrolling lightprobes + RID lightprobe_average_scroll; //used for scrolling lightprobes + + uint32_t history_size = 0; + float solid_cell_ratio = 0; + uint32_t solid_cell_count = 0; + + RS::EnvironmentSDFGICascades cascade_mode; + float min_cell_size = 0; + uint32_t probe_axis_count = 0; //amount of probes per axis, this is an odd number because it encloses endpoints + + RID debug_uniform_set; + RID debug_probes_uniform_set; + RID cascades_ubo; + + bool uses_occlusion = false; + float bounce_feedback = 0.0; + bool reads_sky = false; + float energy = 1.0; + float normal_bias = 1.1; + float probe_bias = 1.1; + RS::EnvironmentSDFGIYScale y_scale_mode = RS::ENV_SDFGI_Y_SCALE_DISABLED; + + float y_mult = 1.0; + + uint32_t render_pass = 0; + + int32_t cascade_dynamic_light_count[SDFGI::MAX_CASCADES]; //used dynamically + }; + + void _sdfgi_update_light(RID p_render_buffers, RID p_environment); + void _sdfgi_update_probes(RID p_render_buffers, RID p_environment); + void _sdfgi_store_probes(RID p_render_buffers); + + RS::EnvironmentSDFGIRayCount sdfgi_ray_count = RS::ENV_SDFGI_RAY_COUNT_16; + RS::EnvironmentSDFGIFramesToConverge sdfgi_frames_to_converge = RS::ENV_SDFGI_CONVERGE_IN_10_FRAMES; + RS::EnvironmentSDFGIFramesToUpdateLight sdfgi_frames_to_update_light = RS::ENV_SDFGI_UPDATE_LIGHT_IN_4_FRAMES; + + float sdfgi_solid_cell_ratio = 0.25; + Vector3 sdfgi_debug_probe_pos; + Vector3 sdfgi_debug_probe_dir; + bool sdfgi_debug_probe_enabled = false; + Vector3i sdfgi_debug_probe_index; + + struct SDGIShader { + enum SDFGIPreprocessShaderVersion { + PRE_PROCESS_SCROLL, + PRE_PROCESS_SCROLL_OCCLUSION, + PRE_PROCESS_JUMP_FLOOD_INITIALIZE, + PRE_PROCESS_JUMP_FLOOD_INITIALIZE_HALF, + PRE_PROCESS_JUMP_FLOOD, + PRE_PROCESS_JUMP_FLOOD_OPTIMIZED, + PRE_PROCESS_JUMP_FLOOD_UPSCALE, + PRE_PROCESS_OCCLUSION, + PRE_PROCESS_STORE, + PRE_PROCESS_MAX + }; + + struct PreprocessPushConstant { + int32_t scroll[3]; + int32_t grid_size; + + int32_t probe_offset[3]; + int32_t step_size; + + int32_t half_size; + uint32_t occlusion_index; + int32_t cascade; + uint32_t pad; + }; + + SdfgiPreprocessShaderRD preprocess; + RID preprocess_shader; + RID preprocess_pipeline[PRE_PROCESS_MAX]; + + struct DebugPushConstant { + float grid_size[3]; + uint32_t max_cascades; + + int32_t screen_size[2]; + uint32_t use_occlusion; + float y_mult; + + float cam_extent[3]; + uint32_t probe_axis_size; + + float cam_transform[16]; + }; + + SdfgiDebugShaderRD debug; + RID debug_shader; + RID debug_shader_version; + RID debug_pipeline; + + enum ProbeDebugMode { + PROBE_DEBUG_PROBES, + PROBE_DEBUG_VISIBILITY, + PROBE_DEBUG_MAX + }; + + struct DebugProbesPushConstant { + float projection[16]; + + uint32_t band_power; + uint32_t sections_in_band; + uint32_t band_mask; + float section_arc; + + float grid_size[3]; + uint32_t cascade; + + uint32_t pad; + float y_mult; + int32_t probe_debug_index; + int32_t probe_axis_size; + }; + + SdfgiDebugProbesShaderRD debug_probes; + RID debug_probes_shader; + RID debug_probes_shader_version; + + PipelineCacheRD debug_probes_pipeline[PROBE_DEBUG_MAX]; + + struct Light { + float color[3]; + float energy; + + float direction[3]; + uint32_t has_shadow; + + float position[3]; + float attenuation; + + uint32_t type; + float cos_spot_angle; + float inv_spot_attenuation; + float radius; + + float shadow_color[4]; + }; + + struct DirectLightPushConstant { + float grid_size[3]; + uint32_t max_cascades; + + uint32_t cascade; + uint32_t light_count; + uint32_t process_offset; + uint32_t process_increment; + + int32_t probe_axis_size; + float bounce_feedback; + float y_mult; + uint32_t use_occlusion; + }; + + enum { + DIRECT_LIGHT_MODE_STATIC, + DIRECT_LIGHT_MODE_DYNAMIC, + DIRECT_LIGHT_MODE_MAX + }; + SdfgiDirectLightShaderRD direct_light; + RID direct_light_shader; + RID direct_light_pipeline[DIRECT_LIGHT_MODE_MAX]; + + enum { + INTEGRATE_MODE_PROCESS, + INTEGRATE_MODE_STORE, + INTEGRATE_MODE_SCROLL, + INTEGRATE_MODE_SCROLL_STORE, + INTEGRATE_MODE_MAX + }; + struct IntegratePushConstant { + enum { + SKY_MODE_DISABLED, + SKY_MODE_COLOR, + SKY_MODE_SKY, + }; + + float grid_size[3]; + uint32_t max_cascades; + + uint32_t probe_axis_size; + uint32_t cascade; + uint32_t history_index; + uint32_t history_size; + + uint32_t ray_count; + float ray_bias; + int32_t image_size[2]; + + int32_t world_offset[3]; + uint32_t sky_mode; + + int32_t scroll[3]; + float sky_energy; + + float sky_color[3]; + float y_mult; + + uint32_t store_ambient_texture; + uint32_t pad[3]; + }; + + SdfgiIntegrateShaderRD integrate; + RID integrate_shader; + RID integrate_pipeline[INTEGRATE_MODE_MAX]; + + RID integrate_default_sky_uniform_set; + + } sdfgi_shader; + + void _sdfgi_erase(RenderBuffers *rb); + int _sdfgi_get_pending_region_data(RID p_render_buffers, int p_region, Vector3i &r_local_offset, Vector3i &r_local_size, AABB &r_bounds) const; + void _sdfgi_update_cascades(RID p_render_buffers); + + /* GI */ + + struct GI { + struct SDFGIData { + float grid_size[3]; + uint32_t max_cascades; + + uint32_t use_occlusion; + int32_t probe_axis_size; + float probe_to_uvw; + float normal_bias; + + float lightprobe_tex_pixel_size[3]; + float energy; + + float lightprobe_uv_offset[3]; + float y_mult; + + float occlusion_clamp[3]; + uint32_t pad3; + + float occlusion_renormalize[3]; + uint32_t pad4; + + float cascade_probe_size[3]; + uint32_t pad5; + + struct ProbeCascadeData { + float position[3]; //offset of (0,0,0) in world coordinates + float to_probe; // 1/bounds * grid_size + int32_t probe_world_offset[3]; + float to_cell; // 1/bounds * grid_size + }; + + ProbeCascadeData cascades[SDFGI::MAX_CASCADES]; + }; + + struct GIProbeData { + float xform[16]; + float bounds[3]; + float dynamic_range; + + float bias; + float normal_bias; + uint32_t blend_ambient; + uint32_t texture_slot; + + float anisotropy_strength; + float ao; + float ao_size; + uint32_t mipmaps; + }; + + struct PushConstant { + int32_t screen_size[2]; + float z_near; + float z_far; + + float proj_info[4]; + float ao_color[3]; + uint32_t max_giprobes; + + uint32_t high_quality_vct; + uint32_t orthogonal; + uint32_t pad[2]; + + float cam_rotation[12]; + }; + + RID sdfgi_ubo; + enum Mode { + MODE_GIPROBE, + MODE_SDFGI, + MODE_COMBINED, + MODE_HALF_RES_GIPROBE, + MODE_HALF_RES_SDFGI, + MODE_HALF_RES_COMBINED, + MODE_MAX + }; + + bool half_resolution = false; + GiShaderRD shader; + RID shader_version; + RID pipelines[MODE_MAX]; + } gi; + + bool screen_space_roughness_limiter = false; + float screen_space_roughness_limiter_amount = 0.25; + float screen_space_roughness_limiter_limit = 0.18; + + mutable RID_Owner<RenderBuffers> render_buffers_owner; + + void _free_render_buffer_data(RenderBuffers *rb); + void _allocate_blur_textures(RenderBuffers *rb); + void _allocate_luminance_textures(RenderBuffers *rb); + + void _render_buffers_debug_draw(RID p_render_buffers, RID p_shadow_atlas); + void _render_buffers_post_process_and_tonemap(RID p_render_buffers, RID p_environment, RID p_camera_effects, const CameraMatrix &p_projection); + void _sdfgi_debug_draw(RID p_render_buffers, const CameraMatrix &p_projection, const Transform &p_transform); + + /* Cluster */ + + struct Cluster { + /* Scene State UBO */ + + enum { + REFLECTION_AMBIENT_DISABLED = 0, + REFLECTION_AMBIENT_ENVIRONMENT = 1, + REFLECTION_AMBIENT_COLOR = 2, + }; + + struct ReflectionData { + float box_extents[3]; + float index; + float box_offset[3]; + uint32_t mask; + float ambient[3]; // ambient color, + float intensity; + bool exterior; + bool box_project; + uint32_t ambient_mode; + uint32_t pad; + float local_matrix[16]; // up to here for spot and omni, rest is for directional + }; + + struct LightData { + float position[3]; + float inv_radius; + float direction[3]; + float size; + + float color[3]; + float attenuation; + + float inv_spot_attenuation; + float cos_spot_angle; + float specular_amount; + uint32_t shadow_enabled; + + float atlas_rect[4]; // in omni, used for atlas uv, in spot, used for projector uv + float shadow_matrix[16]; + float shadow_bias; + float shadow_normal_bias; + float transmittance_bias; + float soft_shadow_size; + float soft_shadow_scale; + uint32_t mask; + float shadow_volumetric_fog_fade; + uint32_t pad; + float projector_rect[4]; + }; + + struct DirectionalLightData { + float direction[3]; + float energy; + float color[3]; + float size; + float specular; + uint32_t mask; + float softshadow_angle; + float soft_shadow_scale; + uint32_t blend_splits; + uint32_t shadow_enabled; + float fade_from; + float fade_to; + uint32_t pad[3]; + float shadow_volumetric_fog_fade; + float shadow_bias[4]; + float shadow_normal_bias[4]; + float shadow_transmittance_bias[4]; + float shadow_z_range[4]; + float shadow_range_begin[4]; + float shadow_split_offsets[4]; + float shadow_matrices[4][16]; + float shadow_color1[4]; + float shadow_color2[4]; + float shadow_color3[4]; + float shadow_color4[4]; + float uv_scale1[2]; + float uv_scale2[2]; + float uv_scale3[2]; + float uv_scale4[2]; + }; + + struct DecalData { + float xform[16]; + float inv_extents[3]; + float albedo_mix; + float albedo_rect[4]; + float normal_rect[4]; + float orm_rect[4]; + float emission_rect[4]; + float modulate[4]; + float emission_energy; + uint32_t mask; + float upper_fade; + float lower_fade; + float normal_xform[12]; + float normal[3]; + float normal_fade; + }; + + template <class T> + struct InstanceSort { + float depth; + T *instance; + bool operator<(const InstanceSort &p_sort) const { + return depth < p_sort.depth; + } + }; + + ReflectionData *reflections; + InstanceSort<ReflectionProbeInstance> *reflection_sort; + uint32_t max_reflections; + RID reflection_buffer; + uint32_t max_reflection_probes_per_instance; + uint32_t reflection_count = 0; + + DecalData *decals; + InstanceSort<DecalInstance> *decal_sort; + uint32_t max_decals; + RID decal_buffer; + uint32_t decal_count; + + LightData *omni_lights; + LightData *spot_lights; + + InstanceSort<LightInstance> *omni_light_sort; + InstanceSort<LightInstance> *spot_light_sort; + uint32_t max_lights; + RID omni_light_buffer; + RID spot_light_buffer; + uint32_t omni_light_count = 0; + uint32_t spot_light_count = 0; + + DirectionalLightData *directional_lights; + uint32_t max_directional_lights; + RID directional_light_buffer; + + } cluster; + + struct RenderState { + RID render_buffers; + Transform cam_transform; + CameraMatrix cam_projection; + bool cam_ortogonal = false; + const PagedArray<GeometryInstance *> *instances = nullptr; + const PagedArray<RID> *lights = nullptr; + const PagedArray<RID> *reflection_probes = nullptr; + const PagedArray<RID> *gi_probes = nullptr; + const PagedArray<RID> *decals = nullptr; + const PagedArray<RID> *lightmaps = nullptr; + RID environment; + RID camera_effects; + RID shadow_atlas; + RID reflection_atlas; + RID reflection_probe; + int reflection_probe_pass = 0; + float screen_lod_threshold = 0.0; + + const RenderShadowData *render_shadows = nullptr; + int render_shadow_count = 0; + const RenderSDFGIData *render_sdfgi_regions = nullptr; + int render_sdfgi_region_count = 0; + const RenderSDFGIUpdateData *sdfgi_update_data = nullptr; + + uint32_t directional_light_count = 0; + uint32_t gi_probe_count = 0; + + LocalVector<int> cube_shadows; + LocalVector<int> shadows; + LocalVector<int> directional_shadows; + + bool depth_prepass_used; + } render_state; + + struct VolumetricFog { + enum { + MAX_TEMPORAL_FRAMES = 16 + }; + + uint32_t width = 0; + uint32_t height = 0; + uint32_t depth = 0; + + float length; + float spread; + + RID light_density_map; + RID prev_light_density_map; + + RID fog_map; + RID uniform_set; + RID uniform_set2; + RID sdfgi_uniform_set; + RID sky_uniform_set; + + int last_shadow_filter = -1; + + Transform prev_cam_transform; + }; + + enum { + VOLUMETRIC_FOG_SHADER_DENSITY, + VOLUMETRIC_FOG_SHADER_DENSITY_WITH_SDFGI, + VOLUMETRIC_FOG_SHADER_FILTER, + VOLUMETRIC_FOG_SHADER_FOG, + VOLUMETRIC_FOG_SHADER_MAX, + }; + + struct VolumetricFogShader { + struct ParamsUBO { + float fog_frustum_size_begin[2]; + float fog_frustum_size_end[2]; + + float fog_frustum_end; + float z_near; + float z_far; + uint32_t filter_axis; + + int32_t fog_volume_size[3]; + uint32_t directional_light_count; + + float light_energy[3]; + float base_density; + + float detail_spread; + float gi_inject; + uint32_t max_gi_probes; + uint32_t cluster_type_size; + + float screen_size[2]; + uint32_t cluster_shift; + uint32_t cluster_width; + + uint32_t max_cluster_element_count_div_32; + uint32_t use_temporal_reprojection; + uint32_t temporal_frame; + float temporal_blend; + + float cam_rotation[12]; + float to_prev_view[16]; + }; + + VolumetricFogShaderRD shader; + + RID params_ubo; + RID shader_version; + RID pipelines[VOLUMETRIC_FOG_SHADER_MAX]; + + } volumetric_fog; + + uint32_t volumetric_fog_depth = 128; + uint32_t volumetric_fog_size = 128; + bool volumetric_fog_filter_active = true; + + void _volumetric_fog_erase(RenderBuffers *rb); + void _update_volumetric_fog(RID p_render_buffers, RID p_environment, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_shadow_atlas, int p_directional_light_count, bool p_use_directional_shadows, int p_positional_light_count, int p_gi_probe_count); + + RID shadow_sampler; + + uint64_t scene_pass = 0; + uint64_t shadow_atlas_realloc_tolerance_msec = 500; + + struct SDFGICosineNeighbour { + uint32_t neighbour; + float weight; + }; + + uint32_t max_cluster_elements = 512; + bool low_end = false; + + void _render_shadow_pass(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<GeometryInstance *> &p_instances, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0, float p_screen_lod_threshold = 0.0, bool p_open_pass = true, bool p_close_pass = true, bool p_clear_region = true); + void _render_sdfgi_region(RID p_render_buffers, int p_region, const PagedArray<GeometryInstance *> &p_instances); + void _render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_light_cull_result); + +public: + virtual Transform geometry_instance_get_transform(GeometryInstance *p_instance) = 0; + virtual AABB geometry_instance_get_aabb(GeometryInstance *p_instance) = 0; + + /* SHADOW ATLAS API */ + + RID shadow_atlas_create(); + void shadow_atlas_set_size(RID p_atlas, int p_size, bool p_16_bits = false); + void shadow_atlas_set_quadrant_subdivision(RID p_atlas, int p_quadrant, int p_subdivision); + bool shadow_atlas_update_light(RID p_atlas, RID p_light_intance, float p_coverage, uint64_t p_light_version); + _FORCE_INLINE_ bool shadow_atlas_owns_light_instance(RID p_atlas, RID p_light_intance) { + ShadowAtlas *atlas = shadow_atlas_owner.getornull(p_atlas); + ERR_FAIL_COND_V(!atlas, false); + return atlas->shadow_owners.has(p_light_intance); + } + + _FORCE_INLINE_ RID shadow_atlas_get_texture(RID p_atlas) { + ShadowAtlas *atlas = shadow_atlas_owner.getornull(p_atlas); + ERR_FAIL_COND_V(!atlas, RID()); + return atlas->depth; + } + + _FORCE_INLINE_ Size2i shadow_atlas_get_size(RID p_atlas) { + ShadowAtlas *atlas = shadow_atlas_owner.getornull(p_atlas); + ERR_FAIL_COND_V(!atlas, Size2i()); + return Size2(atlas->size, atlas->size); + } + + void directional_shadow_atlas_set_size(int p_size, bool p_16_bits = false); + int get_directional_light_shadow_size(RID p_light_intance); + void set_directional_shadow_count(int p_count); + + _FORCE_INLINE_ RID directional_shadow_get_texture() { + return directional_shadow.depth; + } + + _FORCE_INLINE_ Size2i directional_shadow_get_size() { + return Size2i(directional_shadow.size, directional_shadow.size); + } + + /* SDFGI UPDATE */ + + int sdfgi_get_lightprobe_octahedron_size() const { return SDFGI::LIGHTPROBE_OCT_SIZE; } + virtual void sdfgi_update(RID p_render_buffers, RID p_environment, const Vector3 &p_world_position); + virtual int sdfgi_get_pending_region_count(RID p_render_buffers) const; + virtual AABB sdfgi_get_pending_region_bounds(RID p_render_buffers, int p_region) const; + virtual uint32_t sdfgi_get_pending_region_cascade(RID p_render_buffers, int p_region) const; + RID sdfgi_get_ubo() const { return gi.sdfgi_ubo; } + /* SKY API */ + + virtual RID sky_allocate(); + virtual void sky_initialize(RID p_rid); + + void sky_set_radiance_size(RID p_sky, int p_radiance_size); + void sky_set_mode(RID p_sky, RS::SkyMode p_mode); + void sky_set_material(RID p_sky, RID p_material); + Ref<Image> sky_bake_panorama(RID p_sky, float p_energy, bool p_bake_irradiance, const Size2i &p_size); + + RID sky_get_radiance_texture_rd(RID p_sky) const; + RID sky_get_radiance_uniform_set_rd(RID p_sky, RID p_shader, int p_set) const; + RID sky_get_material(RID p_sky) const; + + /* ENVIRONMENT API */ + + virtual RID environment_allocate(); + virtual void environment_initialize(RID p_rid); + + void environment_set_background(RID p_env, RS::EnvironmentBG p_bg); + void environment_set_sky(RID p_env, RID p_sky); + void environment_set_sky_custom_fov(RID p_env, float p_scale); + void environment_set_sky_orientation(RID p_env, const Basis &p_orientation); + void environment_set_bg_color(RID p_env, const Color &p_color); + void environment_set_bg_energy(RID p_env, float p_energy); + void environment_set_canvas_max_layer(RID p_env, int p_max_layer); + void environment_set_ambient_light(RID p_env, const Color &p_color, RS::EnvironmentAmbientSource p_ambient = RS::ENV_AMBIENT_SOURCE_BG, float p_energy = 1.0, float p_sky_contribution = 0.0, RS::EnvironmentReflectionSource p_reflection_source = RS::ENV_REFLECTION_SOURCE_BG, const Color &p_ao_color = Color()); + + RS::EnvironmentBG environment_get_background(RID p_env) const; + RID environment_get_sky(RID p_env) const; + float environment_get_sky_custom_fov(RID p_env) const; + Basis environment_get_sky_orientation(RID p_env) const; + Color environment_get_bg_color(RID p_env) const; + float environment_get_bg_energy(RID p_env) const; + int environment_get_canvas_max_layer(RID p_env) const; + Color environment_get_ambient_light_color(RID p_env) const; + RS::EnvironmentAmbientSource environment_get_ambient_source(RID p_env) const; + float environment_get_ambient_light_energy(RID p_env) const; + float environment_get_ambient_sky_contribution(RID p_env) const; + RS::EnvironmentReflectionSource environment_get_reflection_source(RID p_env) const; + Color environment_get_ao_color(RID p_env) const; + + bool is_environment(RID p_env) const; + + void environment_set_glow(RID p_env, bool p_enable, Vector<float> p_levels, float p_intensity, float p_strength, float p_mix, float p_bloom_threshold, RS::EnvironmentGlowBlendMode p_blend_mode, float p_hdr_bleed_threshold, float p_hdr_bleed_scale, float p_hdr_luminance_cap); + void environment_glow_set_use_bicubic_upscale(bool p_enable); + void environment_glow_set_use_high_quality(bool p_enable); + + void environment_set_fog(RID p_env, bool p_enable, const Color &p_light_color, float p_light_energy, float p_sun_scatter, float p_density, float p_height, float p_height_density, float p_aerial_perspective); + bool environment_is_fog_enabled(RID p_env) const; + Color environment_get_fog_light_color(RID p_env) const; + float environment_get_fog_light_energy(RID p_env) const; + float environment_get_fog_sun_scatter(RID p_env) const; + float environment_get_fog_density(RID p_env) const; + float environment_get_fog_height(RID p_env) const; + float environment_get_fog_height_density(RID p_env) const; + float environment_get_fog_aerial_perspective(RID p_env) const; + + void environment_set_volumetric_fog(RID p_env, bool p_enable, float p_density, const Color &p_light, float p_light_energy, float p_length, float p_detail_spread, float p_gi_inject, bool p_temporal_reprojection, float p_temporal_reprojection_amount); + + virtual void environment_set_volumetric_fog_volume_size(int p_size, int p_depth); + virtual void environment_set_volumetric_fog_filter_active(bool p_enable); + + void environment_set_ssr(RID p_env, bool p_enable, int p_max_steps, float p_fade_int, float p_fade_out, float p_depth_tolerance); + void environment_set_ssao(RID p_env, bool p_enable, float p_radius, float p_intensity, float p_power, float p_detail, float p_horizon, float p_sharpness, float p_light_affect, float p_ao_channel_affect); + void environment_set_ssao_quality(RS::EnvironmentSSAOQuality p_quality, bool p_half_size, float p_adaptive_target, int p_blur_passes, float p_fadeout_from, float p_fadeout_to); + bool environment_is_ssao_enabled(RID p_env) const; + float environment_get_ssao_ao_affect(RID p_env) const; + float environment_get_ssao_light_affect(RID p_env) const; + bool environment_is_ssr_enabled(RID p_env) const; + bool environment_is_sdfgi_enabled(RID p_env) const; + + virtual void environment_set_sdfgi(RID p_env, bool p_enable, RS::EnvironmentSDFGICascades p_cascades, float p_min_cell_size, RS::EnvironmentSDFGIYScale p_y_scale, bool p_use_occlusion, float p_bounce_feedback, bool p_read_sky, float p_energy, float p_normal_bias, float p_probe_bias); + virtual void environment_set_sdfgi_ray_count(RS::EnvironmentSDFGIRayCount p_ray_count); + virtual void environment_set_sdfgi_frames_to_converge(RS::EnvironmentSDFGIFramesToConverge p_frames); + virtual void environment_set_sdfgi_frames_to_update_light(RS::EnvironmentSDFGIFramesToUpdateLight p_update); + + void environment_set_ssr_roughness_quality(RS::EnvironmentSSRRoughnessQuality p_quality); + RS::EnvironmentSSRRoughnessQuality environment_get_ssr_roughness_quality() const; + + void environment_set_tonemap(RID p_env, RS::EnvironmentToneMapper p_tone_mapper, float p_exposure, float p_white, bool p_auto_exposure, float p_min_luminance, float p_max_luminance, float p_auto_exp_speed, float p_auto_exp_scale); + void environment_set_adjustment(RID p_env, bool p_enable, float p_brightness, float p_contrast, float p_saturation, bool p_use_1d_color_correction, RID p_color_correction); + + virtual Ref<Image> environment_bake_panorama(RID p_env, bool p_bake_irradiance, const Size2i &p_size); + + virtual RID camera_effects_allocate(); + virtual void camera_effects_initialize(RID p_rid); + + virtual void camera_effects_set_dof_blur_quality(RS::DOFBlurQuality p_quality, bool p_use_jitter); + virtual void camera_effects_set_dof_blur_bokeh_shape(RS::DOFBokehShape p_shape); + + virtual void camera_effects_set_dof_blur(RID p_camera_effects, bool p_far_enable, float p_far_distance, float p_far_transition, bool p_near_enable, float p_near_distance, float p_near_transition, float p_amount); + virtual void camera_effects_set_custom_exposure(RID p_camera_effects, bool p_enable, float p_exposure); + + RID light_instance_create(RID p_light); + void light_instance_set_transform(RID p_light_instance, const Transform &p_transform); + void light_instance_set_aabb(RID p_light_instance, const AABB &p_aabb); + void light_instance_set_shadow_transform(RID p_light_instance, const CameraMatrix &p_projection, const Transform &p_transform, float p_far, float p_split, int p_pass, float p_shadow_texel_size, float p_bias_scale = 1.0, float p_range_begin = 0, const Vector2 &p_uv_scale = Vector2()); + void light_instance_mark_visible(RID p_light_instance); + + _FORCE_INLINE_ RID light_instance_get_base_light(RID p_light_instance) { + LightInstance *li = light_instance_owner.getornull(p_light_instance); + return li->light; + } + + _FORCE_INLINE_ Transform light_instance_get_base_transform(RID p_light_instance) { + LightInstance *li = light_instance_owner.getornull(p_light_instance); + return li->transform; + } + + _FORCE_INLINE_ Rect2 light_instance_get_shadow_atlas_rect(RID p_light_instance, RID p_shadow_atlas) { + ShadowAtlas *shadow_atlas = shadow_atlas_owner.getornull(p_shadow_atlas); + LightInstance *li = light_instance_owner.getornull(p_light_instance); + uint32_t key = shadow_atlas->shadow_owners[li->self]; + + uint32_t quadrant = (key >> ShadowAtlas::QUADRANT_SHIFT) & 0x3; + uint32_t shadow = key & ShadowAtlas::SHADOW_INDEX_MASK; + + ERR_FAIL_COND_V(shadow >= (uint32_t)shadow_atlas->quadrants[quadrant].shadows.size(), Rect2()); + + uint32_t atlas_size = shadow_atlas->size; + uint32_t quadrant_size = atlas_size >> 1; + + uint32_t x = (quadrant & 1) * quadrant_size; + uint32_t y = (quadrant >> 1) * quadrant_size; + + uint32_t shadow_size = (quadrant_size / shadow_atlas->quadrants[quadrant].subdivision); + x += (shadow % shadow_atlas->quadrants[quadrant].subdivision) * shadow_size; + y += (shadow / shadow_atlas->quadrants[quadrant].subdivision) * shadow_size; + + uint32_t width = shadow_size; + uint32_t height = shadow_size; + + return Rect2(x / float(shadow_atlas->size), y / float(shadow_atlas->size), width / float(shadow_atlas->size), height / float(shadow_atlas->size)); + } + + _FORCE_INLINE_ CameraMatrix light_instance_get_shadow_camera(RID p_light_instance, int p_index) { + LightInstance *li = light_instance_owner.getornull(p_light_instance); + return li->shadow_transform[p_index].camera; + } + + _FORCE_INLINE_ float light_instance_get_shadow_texel_size(RID p_light_instance, RID p_shadow_atlas) { +#ifdef DEBUG_ENABLED + LightInstance *li = light_instance_owner.getornull(p_light_instance); + ERR_FAIL_COND_V(!li->shadow_atlases.has(p_shadow_atlas), 0); +#endif + ShadowAtlas *shadow_atlas = shadow_atlas_owner.getornull(p_shadow_atlas); + ERR_FAIL_COND_V(!shadow_atlas, 0); +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_V(!shadow_atlas->shadow_owners.has(p_light_instance), 0); +#endif + uint32_t key = shadow_atlas->shadow_owners[p_light_instance]; + + uint32_t quadrant = (key >> ShadowAtlas::QUADRANT_SHIFT) & 0x3; + + uint32_t quadrant_size = shadow_atlas->size >> 1; + + uint32_t shadow_size = (quadrant_size / shadow_atlas->quadrants[quadrant].subdivision); + + return float(1.0) / shadow_size; + } + + _FORCE_INLINE_ Transform + light_instance_get_shadow_transform(RID p_light_instance, int p_index) { + LightInstance *li = light_instance_owner.getornull(p_light_instance); + return li->shadow_transform[p_index].transform; + } + _FORCE_INLINE_ float light_instance_get_shadow_bias_scale(RID p_light_instance, int p_index) { + LightInstance *li = light_instance_owner.getornull(p_light_instance); + return li->shadow_transform[p_index].bias_scale; + } + _FORCE_INLINE_ float light_instance_get_shadow_range(RID p_light_instance, int p_index) { + LightInstance *li = light_instance_owner.getornull(p_light_instance); + return li->shadow_transform[p_index].farplane; + } + _FORCE_INLINE_ float light_instance_get_shadow_range_begin(RID p_light_instance, int p_index) { + LightInstance *li = light_instance_owner.getornull(p_light_instance); + return li->shadow_transform[p_index].range_begin; + } + + _FORCE_INLINE_ Vector2 light_instance_get_shadow_uv_scale(RID p_light_instance, int p_index) { + LightInstance *li = light_instance_owner.getornull(p_light_instance); + return li->shadow_transform[p_index].uv_scale; + } + + _FORCE_INLINE_ Rect2 light_instance_get_directional_shadow_atlas_rect(RID p_light_instance, int p_index) { + LightInstance *li = light_instance_owner.getornull(p_light_instance); + return li->shadow_transform[p_index].atlas_rect; + } + + _FORCE_INLINE_ float light_instance_get_directional_shadow_split(RID p_light_instance, int p_index) { + LightInstance *li = light_instance_owner.getornull(p_light_instance); + return li->shadow_transform[p_index].split; + } + + _FORCE_INLINE_ float light_instance_get_directional_shadow_texel_size(RID p_light_instance, int p_index) { + LightInstance *li = light_instance_owner.getornull(p_light_instance); + return li->shadow_transform[p_index].shadow_texel_size; + } + + _FORCE_INLINE_ void light_instance_set_render_pass(RID p_light_instance, uint64_t p_pass) { + LightInstance *li = light_instance_owner.getornull(p_light_instance); + li->last_pass = p_pass; + } + + _FORCE_INLINE_ uint64_t light_instance_get_render_pass(RID p_light_instance) { + LightInstance *li = light_instance_owner.getornull(p_light_instance); + return li->last_pass; + } + + _FORCE_INLINE_ void light_instance_set_index(RID p_light_instance, uint32_t p_index) { + LightInstance *li = light_instance_owner.getornull(p_light_instance); + li->light_index = p_index; + } + + _FORCE_INLINE_ uint32_t light_instance_get_index(RID p_light_instance) { + LightInstance *li = light_instance_owner.getornull(p_light_instance); + return li->light_index; + } + + _FORCE_INLINE_ RS::LightType light_instance_get_type(RID p_light_instance) { + LightInstance *li = light_instance_owner.getornull(p_light_instance); + return li->light_type; + } + + virtual RID reflection_atlas_create(); + virtual void reflection_atlas_set_size(RID p_ref_atlas, int p_reflection_size, int p_reflection_count); + virtual int reflection_atlas_get_size(RID p_ref_atlas) const; + + _FORCE_INLINE_ RID reflection_atlas_get_texture(RID p_ref_atlas) { + ReflectionAtlas *atlas = reflection_atlas_owner.getornull(p_ref_atlas); + ERR_FAIL_COND_V(!atlas, RID()); + return atlas->reflection; + } + + virtual RID reflection_probe_instance_create(RID p_probe); + virtual void reflection_probe_instance_set_transform(RID p_instance, const Transform &p_transform); + virtual void reflection_probe_release_atlas_index(RID p_instance); + virtual bool reflection_probe_instance_needs_redraw(RID p_instance); + virtual bool reflection_probe_instance_has_reflection(RID p_instance); + virtual bool reflection_probe_instance_begin_render(RID p_instance, RID p_reflection_atlas); + virtual bool reflection_probe_instance_postprocess_step(RID p_instance); + + uint32_t reflection_probe_instance_get_resolution(RID p_instance); + RID reflection_probe_instance_get_framebuffer(RID p_instance, int p_index); + RID reflection_probe_instance_get_depth_framebuffer(RID p_instance, int p_index); + + _FORCE_INLINE_ RID reflection_probe_instance_get_probe(RID p_instance) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND_V(!rpi, RID()); + + return rpi->probe; + } + + _FORCE_INLINE_ void reflection_probe_instance_set_render_index(RID p_instance, uint32_t p_render_index) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND(!rpi); + rpi->render_index = p_render_index; + } + + _FORCE_INLINE_ uint32_t reflection_probe_instance_get_render_index(RID p_instance) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND_V(!rpi, 0); + + return rpi->render_index; + } + + _FORCE_INLINE_ void reflection_probe_instance_set_render_pass(RID p_instance, uint32_t p_render_pass) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND(!rpi); + rpi->last_pass = p_render_pass; + } + + _FORCE_INLINE_ uint32_t reflection_probe_instance_get_render_pass(RID p_instance) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND_V(!rpi, 0); + + return rpi->last_pass; + } + + _FORCE_INLINE_ Transform reflection_probe_instance_get_transform(RID p_instance) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND_V(!rpi, Transform()); + + return rpi->transform; + } + + _FORCE_INLINE_ int reflection_probe_instance_get_atlas_index(RID p_instance) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND_V(!rpi, -1); + + return rpi->atlas_index; + } + + virtual RID decal_instance_create(RID p_decal); + virtual void decal_instance_set_transform(RID p_decal, const Transform &p_transform); + + _FORCE_INLINE_ RID decal_instance_get_base(RID p_decal) const { + DecalInstance *decal = decal_instance_owner.getornull(p_decal); + return decal->decal; + } + + _FORCE_INLINE_ Transform decal_instance_get_transform(RID p_decal) const { + DecalInstance *decal = decal_instance_owner.getornull(p_decal); + return decal->transform; + } + + virtual RID lightmap_instance_create(RID p_lightmap); + virtual void lightmap_instance_set_transform(RID p_lightmap, const Transform &p_transform); + _FORCE_INLINE_ bool lightmap_instance_is_valid(RID p_lightmap_instance) { + return lightmap_instance_owner.getornull(p_lightmap_instance) != nullptr; + } + + _FORCE_INLINE_ RID lightmap_instance_get_lightmap(RID p_lightmap_instance) { + LightmapInstance *li = lightmap_instance_owner.getornull(p_lightmap_instance); + return li->lightmap; + } + _FORCE_INLINE_ Transform lightmap_instance_get_transform(RID p_lightmap_instance) { + LightmapInstance *li = lightmap_instance_owner.getornull(p_lightmap_instance); + return li->transform; + } + + RID gi_probe_instance_create(RID p_base); + void gi_probe_instance_set_transform_to_data(RID p_probe, const Transform &p_xform); + bool gi_probe_needs_update(RID p_probe) const; + void gi_probe_update(RID p_probe, bool p_update_light_instances, const Vector<RID> &p_light_instances, const PagedArray<RendererSceneRender::GeometryInstance *> &p_dynamic_objects); + + void gi_probe_set_quality(RS::GIProbeQuality p_quality) { gi_probe_quality = p_quality; } + + _FORCE_INLINE_ uint32_t gi_probe_instance_get_slot(RID p_probe) { + GIProbeInstance *gi_probe = gi_probe_instance_owner.getornull(p_probe); + return gi_probe->slot; + } + _FORCE_INLINE_ RID gi_probe_instance_get_base_probe(RID p_probe) { + GIProbeInstance *gi_probe = gi_probe_instance_owner.getornull(p_probe); + return gi_probe->probe; + } + _FORCE_INLINE_ Transform gi_probe_instance_get_transform_to_cell(RID p_probe) { + GIProbeInstance *gi_probe = gi_probe_instance_owner.getornull(p_probe); + return storage->gi_probe_get_to_cell_xform(gi_probe->probe) * gi_probe->transform.affine_inverse(); + } + + _FORCE_INLINE_ RID gi_probe_instance_get_texture(RID p_probe) { + GIProbeInstance *gi_probe = gi_probe_instance_owner.getornull(p_probe); + return gi_probe->texture; + } + + _FORCE_INLINE_ void gi_probe_instance_set_render_index(RID p_instance, uint32_t p_render_index) { + GIProbeInstance *gi_probe = gi_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND(!gi_probe); + gi_probe->render_index = p_render_index; + } + + _FORCE_INLINE_ uint32_t gi_probe_instance_get_render_index(RID p_instance) { + GIProbeInstance *gi_probe = gi_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND_V(!gi_probe, 0); + + return gi_probe->render_index; + } + /* + _FORCE_INLINE_ void gi_probe_instance_set_render_pass(RID p_instance, uint32_t p_render_pass) { + GIProbeInstance *g_probe = gi_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND(!g_probe); + g_probe->last_pass = p_render_pass; + } + + _FORCE_INLINE_ uint32_t gi_probe_instance_get_render_pass(RID p_instance) { + GIProbeInstance *g_probe = gi_probe_instance_owner.getornull(p_instance); + ERR_FAIL_COND_V(!g_probe, 0); + + return g_probe->last_pass; + } +*/ + RID render_buffers_create(); + void render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_width, int p_height, RS::ViewportMSAA p_msaa, RS::ViewportScreenSpaceAA p_screen_space_aa, bool p_use_debanding); + void gi_set_use_half_resolution(bool p_enable); + + RID render_buffers_get_ao_texture(RID p_render_buffers); + RID render_buffers_get_back_buffer_texture(RID p_render_buffers); + RID render_buffers_get_gi_probe_buffer(RID p_render_buffers); + RID render_buffers_get_default_gi_probe_buffer(); + RID render_buffers_get_gi_ambient_texture(RID p_render_buffers); + RID render_buffers_get_gi_reflection_texture(RID p_render_buffers); + + uint32_t render_buffers_get_sdfgi_cascade_count(RID p_render_buffers) const; + bool render_buffers_is_sdfgi_enabled(RID p_render_buffers) const; + RID render_buffers_get_sdfgi_irradiance_probes(RID p_render_buffers) const; + Vector3 render_buffers_get_sdfgi_cascade_offset(RID p_render_buffers, uint32_t p_cascade) const; + Vector3i render_buffers_get_sdfgi_cascade_probe_offset(RID p_render_buffers, uint32_t p_cascade) const; + float render_buffers_get_sdfgi_cascade_probe_size(RID p_render_buffers, uint32_t p_cascade) const; + float render_buffers_get_sdfgi_normal_bias(RID p_render_buffers) const; + uint32_t render_buffers_get_sdfgi_cascade_probe_count(RID p_render_buffers) const; + uint32_t render_buffers_get_sdfgi_cascade_size(RID p_render_buffers) const; + bool render_buffers_is_sdfgi_using_occlusion(RID p_render_buffers) const; + float render_buffers_get_sdfgi_energy(RID p_render_buffers) const; + RID render_buffers_get_sdfgi_occlusion_texture(RID p_render_buffers) const; + + bool render_buffers_has_volumetric_fog(RID p_render_buffers) const; + RID render_buffers_get_volumetric_fog_texture(RID p_render_buffers); + RID render_buffers_get_volumetric_fog_sky_uniform_set(RID p_render_buffers); + float render_buffers_get_volumetric_fog_end(RID p_render_buffers); + float render_buffers_get_volumetric_fog_detail_spread(RID p_render_buffers); + + void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data = nullptr); + + void render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region); + + void render_particle_collider_heightfield(RID p_collider, const Transform &p_transform, const PagedArray<GeometryInstance *> &p_instances); + + virtual void set_scene_pass(uint64_t p_pass) { + scene_pass = p_pass; + } + _FORCE_INLINE_ uint64_t get_scene_pass() { + return scene_pass; + } + + virtual void screen_space_roughness_limiter_set_active(bool p_enable, float p_amount, float p_limit); + virtual bool screen_space_roughness_limiter_is_active() const; + virtual float screen_space_roughness_limiter_get_amount() const; + virtual float screen_space_roughness_limiter_get_limit() const; + + virtual void sub_surface_scattering_set_quality(RS::SubSurfaceScatteringQuality p_quality); + RS::SubSurfaceScatteringQuality sub_surface_scattering_get_quality() const; + virtual void sub_surface_scattering_set_scale(float p_scale, float p_depth_scale); + + virtual void shadows_quality_set(RS::ShadowQuality p_quality); + virtual void directional_shadow_quality_set(RS::ShadowQuality p_quality); + _FORCE_INLINE_ RS::ShadowQuality shadows_quality_get() const { return shadows_quality; } + _FORCE_INLINE_ RS::ShadowQuality directional_shadow_quality_get() const { return directional_shadow_quality; } + _FORCE_INLINE_ float shadows_quality_radius_get() const { return shadows_quality_radius; } + _FORCE_INLINE_ float directional_shadow_quality_radius_get() const { return directional_shadow_quality_radius; } + + _FORCE_INLINE_ float *directional_penumbra_shadow_kernel_get() { return directional_penumbra_shadow_kernel; } + _FORCE_INLINE_ float *directional_soft_shadow_kernel_get() { return directional_soft_shadow_kernel; } + _FORCE_INLINE_ float *penumbra_shadow_kernel_get() { return penumbra_shadow_kernel; } + _FORCE_INLINE_ float *soft_shadow_kernel_get() { return soft_shadow_kernel; } + + _FORCE_INLINE_ int directional_penumbra_shadow_samples_get() const { return directional_penumbra_shadow_samples; } + _FORCE_INLINE_ int directional_soft_shadow_samples_get() const { return directional_soft_shadow_samples; } + _FORCE_INLINE_ int penumbra_shadow_samples_get() const { return penumbra_shadow_samples; } + _FORCE_INLINE_ int soft_shadow_samples_get() const { return soft_shadow_samples; } + + int get_roughness_layers() const; + bool is_using_radiance_cubemap_array() const; + + virtual TypedArray<Image> bake_render_uv2(RID p_base, const Vector<RID> &p_material_overrides, const Size2i &p_image_size); + + virtual bool free(RID p_rid); + + virtual void update(); + + virtual void set_debug_draw_mode(RS::ViewportDebugDraw p_debug_draw); + _FORCE_INLINE_ RS::ViewportDebugDraw get_debug_draw_mode() const { + return debug_draw; + } + + virtual void set_time(double p_time, double p_step); + + RID get_reflection_probe_buffer(); + RID get_omni_light_buffer(); + RID get_spot_light_buffer(); + RID get_directional_light_buffer(); + RID get_decal_buffer(); + int get_max_directional_lights() const; + + void sdfgi_set_debug_probe_select(const Vector3 &p_position, const Vector3 &p_dir); + + bool is_low_end() const; + + RendererSceneRenderRD(RendererStorageRD *p_storage); + ~RendererSceneRenderRD(); +}; + +#endif // RASTERIZER_SCENE_RD_H diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.cpp b/servers/rendering/renderer_rd/renderer_storage_rd.cpp new file mode 100644 index 0000000000..2a34049675 --- /dev/null +++ b/servers/rendering/renderer_rd/renderer_storage_rd.cpp @@ -0,0 +1,9056 @@ +/*************************************************************************/ +/* renderer_storage_rd.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "renderer_storage_rd.h" + +#include "core/config/engine.h" +#include "core/config/project_settings.h" +#include "core/io/resource_loader.h" +#include "renderer_compositor_rd.h" +#include "servers/rendering/shader_language.h" + +bool RendererStorageRD::can_create_resources_async() const { + return true; +} + +Ref<Image> RendererStorageRD::_validate_texture_format(const Ref<Image> &p_image, TextureToRDFormat &r_format) { + Ref<Image> image = p_image->duplicate(); + + switch (p_image->get_format()) { + case Image::FORMAT_L8: { + r_format.format = RD::DATA_FORMAT_R8_UNORM; + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + } break; //luminance + case Image::FORMAT_LA8: { + r_format.format = RD::DATA_FORMAT_R8G8_UNORM; + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_G; + } break; //luminance-alpha + case Image::FORMAT_R8: { + r_format.format = RD::DATA_FORMAT_R8_UNORM; + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + } break; + case Image::FORMAT_RG8: { + r_format.format = RD::DATA_FORMAT_R8G8_UNORM; + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + } break; + case Image::FORMAT_RGB8: { + //this format is not mandatory for specification, check if supported first + if (false && RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_R8G8B8_UNORM, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT) && RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_R8G8B8_SRGB, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_R8G8B8_UNORM; + r_format.format_srgb = RD::DATA_FORMAT_R8G8B8_SRGB; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + r_format.format_srgb = RD::DATA_FORMAT_R8G8B8A8_SRGB; + image->convert(Image::FORMAT_RGBA8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + + } break; + case Image::FORMAT_RGBA8: { + r_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + r_format.format_srgb = RD::DATA_FORMAT_R8G8B8A8_SRGB; + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_A; + } break; + case Image::FORMAT_RGBA4444: { + r_format.format = RD::DATA_FORMAT_B4G4R4A4_UNORM_PACK16; + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_B; //needs swizzle + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_A; + } break; + case Image::FORMAT_RGB565: { + r_format.format = RD::DATA_FORMAT_B5G6R5_UNORM_PACK16; + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_A; + } break; + case Image::FORMAT_RF: { + r_format.format = RD::DATA_FORMAT_R32_SFLOAT; + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + } break; //float + case Image::FORMAT_RGF: { + r_format.format = RD::DATA_FORMAT_R32G32_SFLOAT; + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + } break; + case Image::FORMAT_RGBF: { + //this format is not mandatory for specification, check if supported first + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_R32G32B32_SFLOAT, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_R32G32B32_SFLOAT; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; + image->convert(Image::FORMAT_RGBAF); + } + + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + } break; + case Image::FORMAT_RGBAF: { + r_format.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_A; + + } break; + case Image::FORMAT_RH: { + r_format.format = RD::DATA_FORMAT_R16_SFLOAT; + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + + } break; //half float + case Image::FORMAT_RGH: { + r_format.format = RD::DATA_FORMAT_R16G16_SFLOAT; + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + + } break; + case Image::FORMAT_RGBH: { + //this format is not mandatory for specification, check if supported first + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_R16G16B16_SFLOAT, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_R16G16B16_SFLOAT; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + image->convert(Image::FORMAT_RGBAH); + } + + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + } break; + case Image::FORMAT_RGBAH: { + r_format.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_A; + + } break; + case Image::FORMAT_RGBE9995: { + r_format.format = RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32; +#ifndef _MSC_VER +#warning TODO need to make a function in Image to swap bits for this +#endif + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_IDENTITY; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_IDENTITY; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_IDENTITY; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_IDENTITY; + } break; + case Image::FORMAT_DXT1: { + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_BC1_RGB_UNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_BC1_RGB_UNORM_BLOCK; + r_format.format_srgb = RD::DATA_FORMAT_BC1_RGB_SRGB_BLOCK; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + r_format.format_srgb = RD::DATA_FORMAT_R8G8B8A8_SRGB; + image->decompress(); + image->convert(Image::FORMAT_RGBA8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + + } break; //s3tc bc1 + case Image::FORMAT_DXT3: { + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_BC2_UNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_BC2_UNORM_BLOCK; + r_format.format_srgb = RD::DATA_FORMAT_BC2_SRGB_BLOCK; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + r_format.format_srgb = RD::DATA_FORMAT_R8G8B8A8_SRGB; + image->decompress(); + image->convert(Image::FORMAT_RGBA8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_A; + + } break; //bc2 + case Image::FORMAT_DXT5: { + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_BC3_UNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_BC3_UNORM_BLOCK; + r_format.format_srgb = RD::DATA_FORMAT_BC3_SRGB_BLOCK; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + r_format.format_srgb = RD::DATA_FORMAT_R8G8B8A8_SRGB; + image->decompress(); + image->convert(Image::FORMAT_RGBA8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_A; + } break; //bc3 + case Image::FORMAT_RGTC_R: { + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_BC4_UNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_BC4_UNORM_BLOCK; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8_UNORM; + image->decompress(); + image->convert(Image::FORMAT_R8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + + } break; + case Image::FORMAT_RGTC_RG: { + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_BC5_UNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_BC5_UNORM_BLOCK; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8G8_UNORM; + image->decompress(); + image->convert(Image::FORMAT_RG8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + + } break; + case Image::FORMAT_BPTC_RGBA: { + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_BC7_UNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_BC7_UNORM_BLOCK; + r_format.format_srgb = RD::DATA_FORMAT_BC7_SRGB_BLOCK; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + r_format.format_srgb = RD::DATA_FORMAT_R8G8B8A8_SRGB; + image->decompress(); + image->convert(Image::FORMAT_RGBA8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_A; + + } break; //btpc bc7 + case Image::FORMAT_BPTC_RGBF: { + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_BC6H_SFLOAT_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_BC6H_SFLOAT_BLOCK; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + image->decompress(); + image->convert(Image::FORMAT_RGBAH); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + } break; //float bc6h + case Image::FORMAT_BPTC_RGBFU: { + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_BC6H_UFLOAT_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_BC6H_UFLOAT_BLOCK; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + image->decompress(); + image->convert(Image::FORMAT_RGBAH); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + } break; //unsigned float bc6hu + case Image::FORMAT_PVRTC1_2: { + //this is not properly supported by MoltekVK it seems, so best to use ETC2 + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG; + r_format.format_srgb = RD::DATA_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + r_format.format_srgb = RD::DATA_FORMAT_R8G8B8A8_SRGB; + image->decompress(); + image->convert(Image::FORMAT_RGBA8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + + } break; //pvrtc + case Image::FORMAT_PVRTC1_2A: { + //this is not properly supported by MoltekVK it seems, so best to use ETC2 + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG; + r_format.format_srgb = RD::DATA_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + r_format.format_srgb = RD::DATA_FORMAT_R8G8B8A8_SRGB; + image->decompress(); + image->convert(Image::FORMAT_RGBA8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_A; + } break; + case Image::FORMAT_PVRTC1_4: { + //this is not properly supported by MoltekVK it seems, so best to use ETC2 + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG; + r_format.format_srgb = RD::DATA_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + r_format.format_srgb = RD::DATA_FORMAT_R8G8B8A8_SRGB; + image->decompress(); + image->convert(Image::FORMAT_RGBA8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + } break; + case Image::FORMAT_PVRTC1_4A: { + //this is not properly supported by MoltekVK it seems, so best to use ETC2 + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG; + r_format.format_srgb = RD::DATA_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + r_format.format_srgb = RD::DATA_FORMAT_R8G8B8A8_SRGB; + image->decompress(); + image->convert(Image::FORMAT_RGBA8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_A; + } break; + case Image::FORMAT_ETC2_R11: { + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_EAC_R11_UNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_EAC_R11_UNORM_BLOCK; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8_UNORM; + image->decompress(); + image->convert(Image::FORMAT_R8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + + } break; //etc2 + case Image::FORMAT_ETC2_R11S: { + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_EAC_R11_SNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_EAC_R11_SNORM_BLOCK; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8_SNORM; + image->decompress(); + image->convert(Image::FORMAT_R8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + } break; //signed: {} break; NOT srgb. + case Image::FORMAT_ETC2_RG11: { + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_EAC_R11G11_UNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_EAC_R11G11_UNORM_BLOCK; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8G8_UNORM; + image->decompress(); + image->convert(Image::FORMAT_RG8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + } break; + case Image::FORMAT_ETC2_RG11S: { + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_EAC_R11G11_SNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_EAC_R11G11_SNORM_BLOCK; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8G8_SNORM; + image->decompress(); + image->convert(Image::FORMAT_RG8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + } break; + case Image::FORMAT_ETC: + case Image::FORMAT_ETC2_RGB8: { + //ETC2 is backwards compatible with ETC1, and all modern platforms support it + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_ETC2_R8G8B8_UNORM_BLOCK; + r_format.format_srgb = RD::DATA_FORMAT_ETC2_R8G8B8_SRGB_BLOCK; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + r_format.format_srgb = RD::DATA_FORMAT_R8G8B8A8_SRGB; + image->decompress(); + image->convert(Image::FORMAT_RGBA8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + + } break; + case Image::FORMAT_ETC2_RGBA8: { + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK; + r_format.format_srgb = RD::DATA_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + r_format.format_srgb = RD::DATA_FORMAT_R8G8B8A8_SRGB; + image->decompress(); + image->convert(Image::FORMAT_RGBA8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_A; + } break; + case Image::FORMAT_ETC2_RGB8A1: { + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK; + r_format.format_srgb = RD::DATA_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + r_format.format_srgb = RD::DATA_FORMAT_R8G8B8A8_SRGB; + image->decompress(); + image->convert(Image::FORMAT_RGBA8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_A; + } break; + case Image::FORMAT_ETC2_RA_AS_RG: { + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK; + r_format.format_srgb = RD::DATA_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + r_format.format_srgb = RD::DATA_FORMAT_R8G8B8A8_SRGB; + image->decompress(); + image->convert(Image::FORMAT_RGBA8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_A; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + } break; + case Image::FORMAT_DXT5_RA_AS_RG: { + if (RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_BC3_UNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT)) { + r_format.format = RD::DATA_FORMAT_BC3_UNORM_BLOCK; + r_format.format_srgb = RD::DATA_FORMAT_BC3_SRGB_BLOCK; + } else { + //not supported, reconvert + r_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + r_format.format_srgb = RD::DATA_FORMAT_R8G8B8A8_SRGB; + image->decompress(); + image->convert(Image::FORMAT_RGBA8); + } + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_A; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_ZERO; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + } break; + + default: { + } + } + + return image; +} + +RID RendererStorageRD::texture_allocate() { + return texture_owner.allocate_rid(); +} + +void RendererStorageRD::texture_2d_initialize(RID p_texture, const Ref<Image> &p_image) { + ERR_FAIL_COND(p_image.is_null()); + ERR_FAIL_COND(p_image->is_empty()); + + TextureToRDFormat ret_format; + Ref<Image> image = _validate_texture_format(p_image, ret_format); + + Texture texture; + + texture.type = Texture::TYPE_2D; + + texture.width = p_image->get_width(); + texture.height = p_image->get_height(); + texture.layers = 1; + texture.mipmaps = p_image->get_mipmap_count() + 1; + texture.depth = 1; + texture.format = p_image->get_format(); + texture.validated_format = image->get_format(); + + texture.rd_type = RD::TEXTURE_TYPE_2D; + texture.rd_format = ret_format.format; + texture.rd_format_srgb = ret_format.format_srgb; + + RD::TextureFormat rd_format; + RD::TextureView rd_view; + { //attempt register + rd_format.format = texture.rd_format; + rd_format.width = texture.width; + rd_format.height = texture.height; + rd_format.depth = 1; + rd_format.array_layers = 1; + rd_format.mipmaps = texture.mipmaps; + rd_format.texture_type = texture.rd_type; + rd_format.samples = RD::TEXTURE_SAMPLES_1; + rd_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + if (texture.rd_format_srgb != RD::DATA_FORMAT_MAX) { + rd_format.shareable_formats.push_back(texture.rd_format); + rd_format.shareable_formats.push_back(texture.rd_format_srgb); + } + } + { + rd_view.swizzle_r = ret_format.swizzle_r; + rd_view.swizzle_g = ret_format.swizzle_g; + rd_view.swizzle_b = ret_format.swizzle_b; + rd_view.swizzle_a = ret_format.swizzle_a; + } + Vector<uint8_t> data = image->get_data(); //use image data + Vector<Vector<uint8_t>> data_slices; + data_slices.push_back(data); + texture.rd_texture = RD::get_singleton()->texture_create(rd_format, rd_view, data_slices); + ERR_FAIL_COND(texture.rd_texture.is_null()); + if (texture.rd_format_srgb != RD::DATA_FORMAT_MAX) { + rd_view.format_override = texture.rd_format_srgb; + texture.rd_texture_srgb = RD::get_singleton()->texture_create_shared(rd_view, texture.rd_texture); + if (texture.rd_texture_srgb.is_null()) { + RD::get_singleton()->free(texture.rd_texture); + ERR_FAIL_COND(texture.rd_texture_srgb.is_null()); + } + } + + //used for 2D, overridable + texture.width_2d = texture.width; + texture.height_2d = texture.height; + texture.is_render_target = false; + texture.rd_view = rd_view; + texture.is_proxy = false; + + texture_owner.initialize_rid(p_texture, texture); +} + +void RendererStorageRD::texture_2d_layered_initialize(RID p_texture, const Vector<Ref<Image>> &p_layers, RS::TextureLayeredType p_layered_type) { + ERR_FAIL_COND(p_layers.size() == 0); + + ERR_FAIL_COND(p_layered_type == RS::TEXTURE_LAYERED_CUBEMAP && p_layers.size() != 6); + ERR_FAIL_COND(p_layered_type == RS::TEXTURE_LAYERED_CUBEMAP_ARRAY && (p_layers.size() < 6 || (p_layers.size() % 6) != 0)); + + TextureToRDFormat ret_format; + Vector<Ref<Image>> images; + { + int valid_width = 0; + int valid_height = 0; + bool valid_mipmaps = false; + Image::Format valid_format = Image::FORMAT_MAX; + + for (int i = 0; i < p_layers.size(); i++) { + ERR_FAIL_COND(p_layers[i]->is_empty()); + + if (i == 0) { + valid_width = p_layers[i]->get_width(); + valid_height = p_layers[i]->get_height(); + valid_format = p_layers[i]->get_format(); + valid_mipmaps = p_layers[i]->has_mipmaps(); + } else { + ERR_FAIL_COND(p_layers[i]->get_width() != valid_width); + ERR_FAIL_COND(p_layers[i]->get_height() != valid_height); + ERR_FAIL_COND(p_layers[i]->get_format() != valid_format); + ERR_FAIL_COND(p_layers[i]->has_mipmaps() != valid_mipmaps); + } + + images.push_back(_validate_texture_format(p_layers[i], ret_format)); + } + } + + Texture texture; + + texture.type = Texture::TYPE_LAYERED; + texture.layered_type = p_layered_type; + + texture.width = p_layers[0]->get_width(); + texture.height = p_layers[0]->get_height(); + texture.layers = p_layers.size(); + texture.mipmaps = p_layers[0]->get_mipmap_count() + 1; + texture.depth = 1; + texture.format = p_layers[0]->get_format(); + texture.validated_format = images[0]->get_format(); + + switch (p_layered_type) { + case RS::TEXTURE_LAYERED_2D_ARRAY: { + texture.rd_type = RD::TEXTURE_TYPE_2D_ARRAY; + } break; + case RS::TEXTURE_LAYERED_CUBEMAP: { + texture.rd_type = RD::TEXTURE_TYPE_CUBE; + } break; + case RS::TEXTURE_LAYERED_CUBEMAP_ARRAY: { + texture.rd_type = RD::TEXTURE_TYPE_CUBE_ARRAY; + } break; + } + + texture.rd_format = ret_format.format; + texture.rd_format_srgb = ret_format.format_srgb; + + RD::TextureFormat rd_format; + RD::TextureView rd_view; + { //attempt register + rd_format.format = texture.rd_format; + rd_format.width = texture.width; + rd_format.height = texture.height; + rd_format.depth = 1; + rd_format.array_layers = texture.layers; + rd_format.mipmaps = texture.mipmaps; + rd_format.texture_type = texture.rd_type; + rd_format.samples = RD::TEXTURE_SAMPLES_1; + rd_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + if (texture.rd_format_srgb != RD::DATA_FORMAT_MAX) { + rd_format.shareable_formats.push_back(texture.rd_format); + rd_format.shareable_formats.push_back(texture.rd_format_srgb); + } + } + { + rd_view.swizzle_r = ret_format.swizzle_r; + rd_view.swizzle_g = ret_format.swizzle_g; + rd_view.swizzle_b = ret_format.swizzle_b; + rd_view.swizzle_a = ret_format.swizzle_a; + } + Vector<Vector<uint8_t>> data_slices; + for (int i = 0; i < images.size(); i++) { + Vector<uint8_t> data = images[i]->get_data(); //use image data + data_slices.push_back(data); + } + texture.rd_texture = RD::get_singleton()->texture_create(rd_format, rd_view, data_slices); + ERR_FAIL_COND(texture.rd_texture.is_null()); + if (texture.rd_format_srgb != RD::DATA_FORMAT_MAX) { + rd_view.format_override = texture.rd_format_srgb; + texture.rd_texture_srgb = RD::get_singleton()->texture_create_shared(rd_view, texture.rd_texture); + if (texture.rd_texture_srgb.is_null()) { + RD::get_singleton()->free(texture.rd_texture); + ERR_FAIL_COND(texture.rd_texture_srgb.is_null()); + } + } + + //used for 2D, overridable + texture.width_2d = texture.width; + texture.height_2d = texture.height; + texture.is_render_target = false; + texture.rd_view = rd_view; + texture.is_proxy = false; + + texture_owner.initialize_rid(p_texture, texture); +} + +void RendererStorageRD::texture_3d_initialize(RID p_texture, Image::Format p_format, int p_width, int p_height, int p_depth, bool p_mipmaps, const Vector<Ref<Image>> &p_data) { + ERR_FAIL_COND(p_data.size() == 0); + Image::Image3DValidateError verr = Image::validate_3d_image(p_format, p_width, p_height, p_depth, p_mipmaps, p_data); + if (verr != Image::VALIDATE_3D_OK) { + ERR_FAIL_MSG(Image::get_3d_image_validation_error_text(verr)); + } + + TextureToRDFormat ret_format; + Image::Format validated_format = Image::FORMAT_MAX; + Vector<uint8_t> all_data; + uint32_t mipmap_count = 0; + Vector<Texture::BufferSlice3D> slices; + { + Vector<Ref<Image>> images; + uint32_t all_data_size = 0; + images.resize(p_data.size()); + for (int i = 0; i < p_data.size(); i++) { + TextureToRDFormat f; + images.write[i] = _validate_texture_format(p_data[i], f); + if (i == 0) { + ret_format = f; + validated_format = images[0]->get_format(); + } + + all_data_size += images[i]->get_data().size(); + } + + all_data.resize(all_data_size); //consolidate all data here + uint32_t offset = 0; + Size2i prev_size; + for (int i = 0; i < p_data.size(); i++) { + uint32_t s = images[i]->get_data().size(); + + copymem(&all_data.write[offset], images[i]->get_data().ptr(), s); + { + Texture::BufferSlice3D slice; + slice.size.width = images[i]->get_width(); + slice.size.height = images[i]->get_height(); + slice.offset = offset; + slice.buffer_size = s; + slices.push_back(slice); + } + offset += s; + + Size2i img_size(images[i]->get_width(), images[i]->get_height()); + if (img_size != prev_size) { + mipmap_count++; + } + prev_size = img_size; + } + } + + Texture texture; + + texture.type = Texture::TYPE_3D; + texture.width = p_width; + texture.height = p_height; + texture.depth = p_depth; + texture.mipmaps = mipmap_count; + texture.format = p_data[0]->get_format(); + texture.validated_format = validated_format; + + texture.buffer_size_3d = all_data.size(); + texture.buffer_slices_3d = slices; + + texture.rd_type = RD::TEXTURE_TYPE_3D; + texture.rd_format = ret_format.format; + texture.rd_format_srgb = ret_format.format_srgb; + + RD::TextureFormat rd_format; + RD::TextureView rd_view; + { //attempt register + rd_format.format = texture.rd_format; + rd_format.width = texture.width; + rd_format.height = texture.height; + rd_format.depth = texture.depth; + rd_format.array_layers = 1; + rd_format.mipmaps = texture.mipmaps; + rd_format.texture_type = texture.rd_type; + rd_format.samples = RD::TEXTURE_SAMPLES_1; + rd_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + if (texture.rd_format_srgb != RD::DATA_FORMAT_MAX) { + rd_format.shareable_formats.push_back(texture.rd_format); + rd_format.shareable_formats.push_back(texture.rd_format_srgb); + } + } + { + rd_view.swizzle_r = ret_format.swizzle_r; + rd_view.swizzle_g = ret_format.swizzle_g; + rd_view.swizzle_b = ret_format.swizzle_b; + rd_view.swizzle_a = ret_format.swizzle_a; + } + Vector<Vector<uint8_t>> data_slices; + data_slices.push_back(all_data); //one slice + + texture.rd_texture = RD::get_singleton()->texture_create(rd_format, rd_view, data_slices); + ERR_FAIL_COND(texture.rd_texture.is_null()); + if (texture.rd_format_srgb != RD::DATA_FORMAT_MAX) { + rd_view.format_override = texture.rd_format_srgb; + texture.rd_texture_srgb = RD::get_singleton()->texture_create_shared(rd_view, texture.rd_texture); + if (texture.rd_texture_srgb.is_null()) { + RD::get_singleton()->free(texture.rd_texture); + ERR_FAIL_COND(texture.rd_texture_srgb.is_null()); + } + } + + //used for 2D, overridable + texture.width_2d = texture.width; + texture.height_2d = texture.height; + texture.is_render_target = false; + texture.rd_view = rd_view; + texture.is_proxy = false; + + texture_owner.initialize_rid(p_texture, texture); +} + +void RendererStorageRD::texture_proxy_initialize(RID p_texture, RID p_base) { + Texture *tex = texture_owner.getornull(p_base); + ERR_FAIL_COND(!tex); + Texture proxy_tex = *tex; + + proxy_tex.rd_view.format_override = tex->rd_format; + proxy_tex.rd_texture = RD::get_singleton()->texture_create_shared(proxy_tex.rd_view, tex->rd_texture); + if (proxy_tex.rd_texture_srgb.is_valid()) { + proxy_tex.rd_view.format_override = tex->rd_format_srgb; + proxy_tex.rd_texture_srgb = RD::get_singleton()->texture_create_shared(proxy_tex.rd_view, tex->rd_texture); + } + proxy_tex.proxy_to = p_base; + proxy_tex.is_render_target = false; + proxy_tex.is_proxy = true; + proxy_tex.proxies.clear(); + + texture_owner.initialize_rid(p_texture, proxy_tex); + + tex->proxies.push_back(p_texture); +} + +void RendererStorageRD::_texture_2d_update(RID p_texture, const Ref<Image> &p_image, int p_layer, bool p_immediate) { + ERR_FAIL_COND(p_image.is_null() || p_image->is_empty()); + + Texture *tex = texture_owner.getornull(p_texture); + ERR_FAIL_COND(!tex); + ERR_FAIL_COND(tex->is_render_target); + ERR_FAIL_COND(p_image->get_width() != tex->width || p_image->get_height() != tex->height); + ERR_FAIL_COND(p_image->get_format() != tex->format); + + if (tex->type == Texture::TYPE_LAYERED) { + ERR_FAIL_INDEX(p_layer, tex->layers); + } + +#ifdef TOOLS_ENABLED + tex->image_cache_2d.unref(); +#endif + TextureToRDFormat f; + Ref<Image> validated = _validate_texture_format(p_image, f); + + RD::get_singleton()->texture_update(tex->rd_texture, p_layer, validated->get_data()); +} + +void RendererStorageRD::texture_2d_update_immediate(RID p_texture, const Ref<Image> &p_image, int p_layer) { + _texture_2d_update(p_texture, p_image, p_layer, true); +} + +void RendererStorageRD::texture_2d_update(RID p_texture, const Ref<Image> &p_image, int p_layer) { + _texture_2d_update(p_texture, p_image, p_layer, false); +} + +void RendererStorageRD::texture_3d_update(RID p_texture, const Vector<Ref<Image>> &p_data) { + Texture *tex = texture_owner.getornull(p_texture); + ERR_FAIL_COND(!tex); + ERR_FAIL_COND(tex->type != Texture::TYPE_3D); + Image::Image3DValidateError verr = Image::validate_3d_image(tex->format, tex->width, tex->height, tex->depth, tex->mipmaps > 1, p_data); + if (verr != Image::VALIDATE_3D_OK) { + ERR_FAIL_MSG(Image::get_3d_image_validation_error_text(verr)); + } + + Vector<uint8_t> all_data; + { + Vector<Ref<Image>> images; + uint32_t all_data_size = 0; + images.resize(p_data.size()); + for (int i = 0; i < p_data.size(); i++) { + Ref<Image> image = p_data[i]; + if (image->get_format() != tex->validated_format) { + image = image->duplicate(); + image->convert(tex->validated_format); + } + all_data_size += images[i]->get_data().size(); + images.push_back(image); + } + + all_data.resize(all_data_size); //consolidate all data here + uint32_t offset = 0; + + for (int i = 0; i < p_data.size(); i++) { + uint32_t s = images[i]->get_data().size(); + copymem(&all_data.write[offset], images[i]->get_data().ptr(), s); + offset += s; + } + } + + RD::get_singleton()->texture_update(tex->rd_texture, 0, all_data); +} + +void RendererStorageRD::texture_proxy_update(RID p_texture, RID p_proxy_to) { + Texture *tex = texture_owner.getornull(p_texture); + ERR_FAIL_COND(!tex); + ERR_FAIL_COND(!tex->is_proxy); + Texture *proxy_to = texture_owner.getornull(p_proxy_to); + ERR_FAIL_COND(!proxy_to); + ERR_FAIL_COND(proxy_to->is_proxy); + + if (tex->proxy_to.is_valid()) { + //unlink proxy + if (RD::get_singleton()->texture_is_valid(tex->rd_texture)) { + RD::get_singleton()->free(tex->rd_texture); + tex->rd_texture = RID(); + } + if (RD::get_singleton()->texture_is_valid(tex->rd_texture_srgb)) { + RD::get_singleton()->free(tex->rd_texture_srgb); + tex->rd_texture_srgb = RID(); + } + Texture *prev_tex = texture_owner.getornull(tex->proxy_to); + ERR_FAIL_COND(!prev_tex); + prev_tex->proxies.erase(p_texture); + } + + *tex = *proxy_to; + + tex->proxy_to = p_proxy_to; + tex->is_render_target = false; + tex->is_proxy = true; + tex->proxies.clear(); + proxy_to->proxies.push_back(p_texture); + + tex->rd_view.format_override = tex->rd_format; + tex->rd_texture = RD::get_singleton()->texture_create_shared(tex->rd_view, proxy_to->rd_texture); + if (tex->rd_texture_srgb.is_valid()) { + tex->rd_view.format_override = tex->rd_format_srgb; + tex->rd_texture_srgb = RD::get_singleton()->texture_create_shared(tex->rd_view, proxy_to->rd_texture); + } +} + +//these two APIs can be used together or in combination with the others. +void RendererStorageRD::texture_2d_placeholder_initialize(RID p_texture) { + //this could be better optimized to reuse an existing image , done this way + //for now to get it working + Ref<Image> image; + image.instance(); + image->create(4, 4, false, Image::FORMAT_RGBA8); + + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + image->set_pixel(i, j, Color(1, 0, 1, 1)); + } + } + + texture_2d_initialize(p_texture, image); +} + +void RendererStorageRD::texture_2d_layered_placeholder_initialize(RID p_texture, RS::TextureLayeredType p_layered_type) { + //this could be better optimized to reuse an existing image , done this way + //for now to get it working + Ref<Image> image; + image.instance(); + image->create(4, 4, false, Image::FORMAT_RGBA8); + + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + image->set_pixel(i, j, Color(1, 0, 1, 1)); + } + } + + Vector<Ref<Image>> images; + if (p_layered_type == RS::TEXTURE_LAYERED_2D_ARRAY) { + images.push_back(image); + } else { + //cube + for (int i = 0; i < 6; i++) { + images.push_back(image); + } + } + + texture_2d_layered_initialize(p_texture, images, p_layered_type); +} + +void RendererStorageRD::texture_3d_placeholder_initialize(RID p_texture) { + //this could be better optimized to reuse an existing image , done this way + //for now to get it working + Ref<Image> image; + image.instance(); + image->create(4, 4, false, Image::FORMAT_RGBA8); + + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + image->set_pixel(i, j, Color(1, 0, 1, 1)); + } + } + + Vector<Ref<Image>> images; + //cube + for (int i = 0; i < 4; i++) { + images.push_back(image); + } + + texture_3d_initialize(p_texture, Image::FORMAT_RGBA8, 4, 4, 4, false, images); +} + +Ref<Image> RendererStorageRD::texture_2d_get(RID p_texture) const { + Texture *tex = texture_owner.getornull(p_texture); + ERR_FAIL_COND_V(!tex, Ref<Image>()); + +#ifdef TOOLS_ENABLED + if (tex->image_cache_2d.is_valid()) { + return tex->image_cache_2d; + } +#endif + Vector<uint8_t> data = RD::get_singleton()->texture_get_data(tex->rd_texture, 0); + ERR_FAIL_COND_V(data.size() == 0, Ref<Image>()); + Ref<Image> image; + image.instance(); + image->create(tex->width, tex->height, tex->mipmaps > 1, tex->validated_format, data); + ERR_FAIL_COND_V(image->is_empty(), Ref<Image>()); + if (tex->format != tex->validated_format) { + image->convert(tex->format); + } + +#ifdef TOOLS_ENABLED + if (Engine::get_singleton()->is_editor_hint()) { + tex->image_cache_2d = image; + } +#endif + + return image; +} + +Ref<Image> RendererStorageRD::texture_2d_layer_get(RID p_texture, int p_layer) const { + Texture *tex = texture_owner.getornull(p_texture); + ERR_FAIL_COND_V(!tex, Ref<Image>()); + + Vector<uint8_t> data = RD::get_singleton()->texture_get_data(tex->rd_texture, p_layer); + ERR_FAIL_COND_V(data.size() == 0, Ref<Image>()); + Ref<Image> image; + image.instance(); + image->create(tex->width, tex->height, tex->mipmaps > 1, tex->validated_format, data); + ERR_FAIL_COND_V(image->is_empty(), Ref<Image>()); + if (tex->format != tex->validated_format) { + image->convert(tex->format); + } + + return image; +} + +Vector<Ref<Image>> RendererStorageRD::texture_3d_get(RID p_texture) const { + Texture *tex = texture_owner.getornull(p_texture); + ERR_FAIL_COND_V(!tex, Vector<Ref<Image>>()); + ERR_FAIL_COND_V(tex->type != Texture::TYPE_3D, Vector<Ref<Image>>()); + + Vector<uint8_t> all_data = RD::get_singleton()->texture_get_data(tex->rd_texture, 0); + + ERR_FAIL_COND_V(all_data.size() != (int)tex->buffer_size_3d, Vector<Ref<Image>>()); + + Vector<Ref<Image>> ret; + + for (int i = 0; i < tex->buffer_slices_3d.size(); i++) { + const Texture::BufferSlice3D &bs = tex->buffer_slices_3d[i]; + ERR_FAIL_COND_V(bs.offset >= (uint32_t)all_data.size(), Vector<Ref<Image>>()); + ERR_FAIL_COND_V(bs.offset + bs.buffer_size > (uint32_t)all_data.size(), Vector<Ref<Image>>()); + Vector<uint8_t> sub_region = all_data.subarray(bs.offset, bs.offset + bs.buffer_size - 1); + + Ref<Image> img; + img.instance(); + img->create(bs.size.width, bs.size.height, false, tex->validated_format, sub_region); + ERR_FAIL_COND_V(img->is_empty(), Vector<Ref<Image>>()); + if (tex->format != tex->validated_format) { + img->convert(tex->format); + } + + ret.push_back(img); + } + + return ret; +} + +void RendererStorageRD::texture_replace(RID p_texture, RID p_by_texture) { + Texture *tex = texture_owner.getornull(p_texture); + ERR_FAIL_COND(!tex); + ERR_FAIL_COND(tex->proxy_to.is_valid()); //can't replace proxy + Texture *by_tex = texture_owner.getornull(p_by_texture); + ERR_FAIL_COND(!by_tex); + ERR_FAIL_COND(by_tex->proxy_to.is_valid()); //can't replace proxy + + if (tex == by_tex) { + return; + } + + if (tex->rd_texture_srgb.is_valid()) { + RD::get_singleton()->free(tex->rd_texture_srgb); + } + RD::get_singleton()->free(tex->rd_texture); + + if (tex->canvas_texture) { + memdelete(tex->canvas_texture); + tex->canvas_texture = nullptr; + } + + Vector<RID> proxies_to_update = tex->proxies; + Vector<RID> proxies_to_redirect = by_tex->proxies; + + *tex = *by_tex; + + tex->proxies = proxies_to_update; //restore proxies, so they can be updated + + if (tex->canvas_texture) { + tex->canvas_texture->diffuse = p_texture; //update + } + + for (int i = 0; i < proxies_to_update.size(); i++) { + texture_proxy_update(proxies_to_update[i], p_texture); + } + for (int i = 0; i < proxies_to_redirect.size(); i++) { + texture_proxy_update(proxies_to_redirect[i], p_texture); + } + //delete last, so proxies can be updated + texture_owner.free(p_by_texture); + + if (decal_atlas.textures.has(p_texture)) { + //belongs to decal atlas.. + + decal_atlas.dirty = true; //mark it dirty since it was most likely modified + } +} + +void RendererStorageRD::texture_set_size_override(RID p_texture, int p_width, int p_height) { + Texture *tex = texture_owner.getornull(p_texture); + ERR_FAIL_COND(!tex); + ERR_FAIL_COND(tex->type != Texture::TYPE_2D); + tex->width_2d = p_width; + tex->height_2d = p_height; +} + +void RendererStorageRD::texture_set_path(RID p_texture, const String &p_path) { + Texture *tex = texture_owner.getornull(p_texture); + ERR_FAIL_COND(!tex); + tex->path = p_path; +} + +String RendererStorageRD::texture_get_path(RID p_texture) const { + return String(); +} + +void RendererStorageRD::texture_set_detect_3d_callback(RID p_texture, RS::TextureDetectCallback p_callback, void *p_userdata) { + Texture *tex = texture_owner.getornull(p_texture); + ERR_FAIL_COND(!tex); + tex->detect_3d_callback_ud = p_userdata; + tex->detect_3d_callback = p_callback; +} + +void RendererStorageRD::texture_set_detect_normal_callback(RID p_texture, RS::TextureDetectCallback p_callback, void *p_userdata) { + Texture *tex = texture_owner.getornull(p_texture); + ERR_FAIL_COND(!tex); + tex->detect_normal_callback_ud = p_userdata; + tex->detect_normal_callback = p_callback; +} + +void RendererStorageRD::texture_set_detect_roughness_callback(RID p_texture, RS::TextureDetectRoughnessCallback p_callback, void *p_userdata) { + Texture *tex = texture_owner.getornull(p_texture); + ERR_FAIL_COND(!tex); + tex->detect_roughness_callback_ud = p_userdata; + tex->detect_roughness_callback = p_callback; +} + +void RendererStorageRD::texture_debug_usage(List<RS::TextureInfo> *r_info) { +} + +void RendererStorageRD::texture_set_proxy(RID p_proxy, RID p_base) { +} + +void RendererStorageRD::texture_set_force_redraw_if_visible(RID p_texture, bool p_enable) { +} + +Size2 RendererStorageRD::texture_size_with_proxy(RID p_proxy) { + return texture_2d_get_size(p_proxy); +} + +/* CANVAS TEXTURE */ + +void RendererStorageRD::CanvasTexture::clear_sets() { + if (cleared_cache) { + return; + } + for (int i = 1; i < RS::CANVAS_ITEM_TEXTURE_FILTER_MAX; i++) { + for (int j = 1; j < RS::CANVAS_ITEM_TEXTURE_REPEAT_MAX; j++) { + if (RD::get_singleton()->uniform_set_is_valid(uniform_sets[i][j])) { + RD::get_singleton()->free(uniform_sets[i][j]); + uniform_sets[i][j] = RID(); + } + } + } + cleared_cache = true; +} + +RendererStorageRD::CanvasTexture::~CanvasTexture() { + clear_sets(); +} + +RID RendererStorageRD::canvas_texture_allocate() { + return canvas_texture_owner.allocate_rid(); +} +void RendererStorageRD::canvas_texture_initialize(RID p_rid) { + canvas_texture_owner.initialize_rid(p_rid, memnew(CanvasTexture)); +} + +void RendererStorageRD::canvas_texture_set_channel(RID p_canvas_texture, RS::CanvasTextureChannel p_channel, RID p_texture) { + CanvasTexture *ct = canvas_texture_owner.getornull(p_canvas_texture); + switch (p_channel) { + case RS::CANVAS_TEXTURE_CHANNEL_DIFFUSE: { + ct->diffuse = p_texture; + } break; + case RS::CANVAS_TEXTURE_CHANNEL_NORMAL: { + ct->normal_map = p_texture; + } break; + case RS::CANVAS_TEXTURE_CHANNEL_SPECULAR: { + ct->specular = p_texture; + } break; + } + + ct->clear_sets(); +} + +void RendererStorageRD::canvas_texture_set_shading_parameters(RID p_canvas_texture, const Color &p_specular_color, float p_shininess) { + CanvasTexture *ct = canvas_texture_owner.getornull(p_canvas_texture); + ct->specular_color.r = p_specular_color.r; + ct->specular_color.g = p_specular_color.g; + ct->specular_color.b = p_specular_color.b; + ct->specular_color.a = p_shininess; + ct->clear_sets(); +} + +void RendererStorageRD::canvas_texture_set_texture_filter(RID p_canvas_texture, RS::CanvasItemTextureFilter p_filter) { + CanvasTexture *ct = canvas_texture_owner.getornull(p_canvas_texture); + ct->texture_filter = p_filter; + ct->clear_sets(); +} + +void RendererStorageRD::canvas_texture_set_texture_repeat(RID p_canvas_texture, RS::CanvasItemTextureRepeat p_repeat) { + CanvasTexture *ct = canvas_texture_owner.getornull(p_canvas_texture); + ct->texture_repeat = p_repeat; + ct->clear_sets(); +} + +bool RendererStorageRD::canvas_texture_get_uniform_set(RID p_texture, RS::CanvasItemTextureFilter p_base_filter, RS::CanvasItemTextureRepeat p_base_repeat, RID p_base_shader, int p_base_set, RID &r_uniform_set, Size2i &r_size, Color &r_specular_shininess, bool &r_use_normal, bool &r_use_specular) { + CanvasTexture *ct = nullptr; + + Texture *t = texture_owner.getornull(p_texture); + + if (t) { + //regular texture + if (!t->canvas_texture) { + t->canvas_texture = memnew(CanvasTexture); + t->canvas_texture->diffuse = p_texture; + } + + ct = t->canvas_texture; + } else { + ct = canvas_texture_owner.getornull(p_texture); + } + + if (!ct) { + return false; //invalid texture RID + } + + RS::CanvasItemTextureFilter filter = ct->texture_filter != RS::CANVAS_ITEM_TEXTURE_FILTER_DEFAULT ? ct->texture_filter : p_base_filter; + ERR_FAIL_COND_V(filter == RS::CANVAS_ITEM_TEXTURE_FILTER_DEFAULT, false); + + RS::CanvasItemTextureRepeat repeat = ct->texture_repeat != RS::CANVAS_ITEM_TEXTURE_REPEAT_DEFAULT ? ct->texture_repeat : p_base_repeat; + ERR_FAIL_COND_V(repeat == RS::CANVAS_ITEM_TEXTURE_REPEAT_DEFAULT, false); + + RID uniform_set = ct->uniform_sets[filter][repeat]; + if (!RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + //create and update + Vector<RD::Uniform> uniforms; + { //diffuse + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 0; + + t = texture_owner.getornull(ct->diffuse); + if (!t) { + u.ids.push_back(texture_rd_get_default(DEFAULT_RD_TEXTURE_WHITE)); + ct->size_cache = Size2i(1, 1); + } else { + u.ids.push_back(t->rd_texture); + ct->size_cache = Size2i(t->width_2d, t->height_2d); + } + uniforms.push_back(u); + } + { //normal + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 1; + + t = texture_owner.getornull(ct->normal_map); + if (!t) { + u.ids.push_back(texture_rd_get_default(DEFAULT_RD_TEXTURE_NORMAL)); + ct->use_normal_cache = false; + } else { + u.ids.push_back(t->rd_texture); + ct->use_normal_cache = true; + } + uniforms.push_back(u); + } + { //specular + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 2; + + t = texture_owner.getornull(ct->specular); + if (!t) { + u.ids.push_back(texture_rd_get_default(DEFAULT_RD_TEXTURE_WHITE)); + ct->use_specular_cache = false; + } else { + u.ids.push_back(t->rd_texture); + ct->use_specular_cache = true; + } + uniforms.push_back(u); + } + { //sampler + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 3; + u.ids.push_back(sampler_rd_get_default(filter, repeat)); + uniforms.push_back(u); + } + + uniform_set = RD::get_singleton()->uniform_set_create(uniforms, p_base_shader, p_base_set); + ct->uniform_sets[filter][repeat] = uniform_set; + ct->cleared_cache = false; + } + + r_uniform_set = uniform_set; + r_size = ct->size_cache; + r_specular_shininess = ct->specular_color; + r_use_normal = ct->use_normal_cache; + r_use_specular = ct->use_specular_cache; + + return true; +} + +/* SHADER API */ + +RID RendererStorageRD::shader_allocate() { + return shader_owner.allocate_rid(); +} +void RendererStorageRD::shader_initialize(RID p_rid) { + Shader shader; + shader.data = nullptr; + shader.type = SHADER_TYPE_MAX; + + shader_owner.initialize_rid(p_rid, shader); +} + +void RendererStorageRD::shader_set_code(RID p_shader, const String &p_code) { + Shader *shader = shader_owner.getornull(p_shader); + ERR_FAIL_COND(!shader); + + shader->code = p_code; + String mode_string = ShaderLanguage::get_shader_type(p_code); + + ShaderType new_type; + if (mode_string == "canvas_item") { + new_type = SHADER_TYPE_2D; + } else if (mode_string == "particles") { + new_type = SHADER_TYPE_PARTICLES; + } else if (mode_string == "spatial") { + new_type = SHADER_TYPE_3D; + } else if (mode_string == "sky") { + new_type = SHADER_TYPE_SKY; + } else { + new_type = SHADER_TYPE_MAX; + } + + if (new_type != shader->type) { + if (shader->data) { + memdelete(shader->data); + shader->data = nullptr; + } + + for (Set<Material *>::Element *E = shader->owners.front(); E; E = E->next()) { + Material *material = E->get(); + material->shader_type = new_type; + if (material->data) { + memdelete(material->data); + material->data = nullptr; + } + } + + shader->type = new_type; + + if (new_type < SHADER_TYPE_MAX && shader_data_request_func[new_type]) { + shader->data = shader_data_request_func[new_type](); + } else { + shader->type = SHADER_TYPE_MAX; //invalid + } + + for (Set<Material *>::Element *E = shader->owners.front(); E; E = E->next()) { + Material *material = E->get(); + if (shader->data) { + material->data = material_data_request_func[new_type](shader->data); + material->data->self = material->self; + material->data->set_next_pass(material->next_pass); + material->data->set_render_priority(material->priority); + } + material->shader_type = new_type; + } + + for (Map<StringName, RID>::Element *E = shader->default_texture_parameter.front(); E; E = E->next()) { + shader->data->set_default_texture_param(E->key(), E->get()); + } + } + + if (shader->data) { + shader->data->set_code(p_code); + } + + for (Set<Material *>::Element *E = shader->owners.front(); E; E = E->next()) { + Material *material = E->get(); + material->dependency.changed_notify(DEPENDENCY_CHANGED_MATERIAL); + _material_queue_update(material, true, true); + } +} + +String RendererStorageRD::shader_get_code(RID p_shader) const { + Shader *shader = shader_owner.getornull(p_shader); + ERR_FAIL_COND_V(!shader, String()); + return shader->code; +} + +void RendererStorageRD::shader_get_param_list(RID p_shader, List<PropertyInfo> *p_param_list) const { + Shader *shader = shader_owner.getornull(p_shader); + ERR_FAIL_COND(!shader); + if (shader->data) { + return shader->data->get_param_list(p_param_list); + } +} + +void RendererStorageRD::shader_set_default_texture_param(RID p_shader, const StringName &p_name, RID p_texture) { + Shader *shader = shader_owner.getornull(p_shader); + ERR_FAIL_COND(!shader); + + if (p_texture.is_valid() && texture_owner.owns(p_texture)) { + shader->default_texture_parameter[p_name] = p_texture; + } else { + shader->default_texture_parameter.erase(p_name); + } + if (shader->data) { + shader->data->set_default_texture_param(p_name, p_texture); + } + for (Set<Material *>::Element *E = shader->owners.front(); E; E = E->next()) { + Material *material = E->get(); + _material_queue_update(material, false, true); + } +} + +RID RendererStorageRD::shader_get_default_texture_param(RID p_shader, const StringName &p_name) const { + Shader *shader = shader_owner.getornull(p_shader); + ERR_FAIL_COND_V(!shader, RID()); + if (shader->default_texture_parameter.has(p_name)) { + return shader->default_texture_parameter[p_name]; + } + + return RID(); +} + +Variant RendererStorageRD::shader_get_param_default(RID p_shader, const StringName &p_param) const { + Shader *shader = shader_owner.getornull(p_shader); + ERR_FAIL_COND_V(!shader, Variant()); + if (shader->data) { + return shader->data->get_default_parameter(p_param); + } + return Variant(); +} + +void RendererStorageRD::shader_set_data_request_function(ShaderType p_shader_type, ShaderDataRequestFunction p_function) { + ERR_FAIL_INDEX(p_shader_type, SHADER_TYPE_MAX); + shader_data_request_func[p_shader_type] = p_function; +} + +RS::ShaderNativeSourceCode RendererStorageRD::shader_get_native_source_code(RID p_shader) const { + Shader *shader = shader_owner.getornull(p_shader); + ERR_FAIL_COND_V(!shader, RS::ShaderNativeSourceCode()); + if (shader->data) { + return shader->data->get_native_source_code(); + } + return RS::ShaderNativeSourceCode(); +} + +/* COMMON MATERIAL API */ + +RID RendererStorageRD::material_allocate() { + return material_owner.allocate_rid(); +} +void RendererStorageRD::material_initialize(RID p_rid) { + Material material; + material.data = nullptr; + material.shader = nullptr; + material.shader_type = SHADER_TYPE_MAX; + material.update_next = nullptr; + material.update_requested = false; + material.uniform_dirty = false; + material.texture_dirty = false; + material.priority = 0; + material.self = p_rid; + material_owner.initialize_rid(p_rid, material); +} + +void RendererStorageRD::_material_queue_update(Material *material, bool p_uniform, bool p_texture) { + if (material->update_requested) { + return; + } + + material->update_next = material_update_list; + material_update_list = material; + material->update_requested = true; + material->uniform_dirty = material->uniform_dirty || p_uniform; + material->texture_dirty = material->texture_dirty || p_texture; +} + +void RendererStorageRD::material_set_shader(RID p_material, RID p_shader) { + Material *material = material_owner.getornull(p_material); + ERR_FAIL_COND(!material); + + if (material->data) { + memdelete(material->data); + material->data = nullptr; + } + + if (material->shader) { + material->shader->owners.erase(material); + material->shader = nullptr; + material->shader_type = SHADER_TYPE_MAX; + } + + if (p_shader.is_null()) { + material->dependency.changed_notify(DEPENDENCY_CHANGED_MATERIAL); + material->shader_id = 0; + return; + } + + Shader *shader = shader_owner.getornull(p_shader); + ERR_FAIL_COND(!shader); + material->shader = shader; + material->shader_type = shader->type; + material->shader_id = p_shader.get_local_index(); + shader->owners.insert(material); + + if (shader->type == SHADER_TYPE_MAX) { + return; + } + + ERR_FAIL_COND(shader->data == nullptr); + + material->data = material_data_request_func[shader->type](shader->data); + material->data->self = p_material; + material->data->set_next_pass(material->next_pass); + material->data->set_render_priority(material->priority); + //updating happens later + material->dependency.changed_notify(DEPENDENCY_CHANGED_MATERIAL); + _material_queue_update(material, true, true); +} + +void RendererStorageRD::material_set_param(RID p_material, const StringName &p_param, const Variant &p_value) { + Material *material = material_owner.getornull(p_material); + ERR_FAIL_COND(!material); + + if (p_value.get_type() == Variant::NIL) { + material->params.erase(p_param); + } else { + material->params[p_param] = p_value; + } + + if (material->shader && material->shader->data) { //shader is valid + bool is_texture = material->shader->data->is_param_texture(p_param); + _material_queue_update(material, !is_texture, is_texture); + } else { + _material_queue_update(material, true, true); + } +} + +Variant RendererStorageRD::material_get_param(RID p_material, const StringName &p_param) const { + Material *material = material_owner.getornull(p_material); + ERR_FAIL_COND_V(!material, Variant()); + if (material->params.has(p_param)) { + return material->params[p_param]; + } else { + return Variant(); + } +} + +void RendererStorageRD::material_set_next_pass(RID p_material, RID p_next_material) { + Material *material = material_owner.getornull(p_material); + ERR_FAIL_COND(!material); + + if (material->next_pass == p_next_material) { + return; + } + + material->next_pass = p_next_material; + if (material->data) { + material->data->set_next_pass(p_next_material); + } + + material->dependency.changed_notify(DEPENDENCY_CHANGED_MATERIAL); +} + +void RendererStorageRD::material_set_render_priority(RID p_material, int priority) { + Material *material = material_owner.getornull(p_material); + ERR_FAIL_COND(!material); + material->priority = priority; + if (material->data) { + material->data->set_render_priority(priority); + } +} + +bool RendererStorageRD::material_is_animated(RID p_material) { + Material *material = material_owner.getornull(p_material); + ERR_FAIL_COND_V(!material, false); + if (material->shader && material->shader->data) { + if (material->shader->data->is_animated()) { + return true; + } else if (material->next_pass.is_valid()) { + return material_is_animated(material->next_pass); + } + } + return false; //by default nothing is animated +} + +bool RendererStorageRD::material_casts_shadows(RID p_material) { + Material *material = material_owner.getornull(p_material); + ERR_FAIL_COND_V(!material, true); + if (material->shader && material->shader->data) { + if (material->shader->data->casts_shadows()) { + return true; + } else if (material->next_pass.is_valid()) { + return material_casts_shadows(material->next_pass); + } + } + return true; //by default everything casts shadows +} + +void RendererStorageRD::material_get_instance_shader_parameters(RID p_material, List<InstanceShaderParam> *r_parameters) { + Material *material = material_owner.getornull(p_material); + ERR_FAIL_COND(!material); + if (material->shader && material->shader->data) { + material->shader->data->get_instance_param_list(r_parameters); + + if (material->next_pass.is_valid()) { + material_get_instance_shader_parameters(material->next_pass, r_parameters); + } + } +} + +void RendererStorageRD::material_update_dependency(RID p_material, DependencyTracker *p_instance) { + Material *material = material_owner.getornull(p_material); + ERR_FAIL_COND(!material); + p_instance->update_dependency(&material->dependency); + if (material->next_pass.is_valid()) { + material_update_dependency(material->next_pass, p_instance); + } +} + +void RendererStorageRD::material_set_data_request_function(ShaderType p_shader_type, MaterialDataRequestFunction p_function) { + ERR_FAIL_INDEX(p_shader_type, SHADER_TYPE_MAX); + material_data_request_func[p_shader_type] = p_function; +} + +_FORCE_INLINE_ static void _fill_std140_variant_ubo_value(ShaderLanguage::DataType type, const Variant &value, uint8_t *data, bool p_linear_color) { + switch (type) { + case ShaderLanguage::TYPE_BOOL: { + bool v = value; + + uint32_t *gui = (uint32_t *)data; + *gui = v ? 1 : 0; + } break; + case ShaderLanguage::TYPE_BVEC2: { + int v = value; + uint32_t *gui = (uint32_t *)data; + gui[0] = v & 1 ? 1 : 0; + gui[1] = v & 2 ? 1 : 0; + + } break; + case ShaderLanguage::TYPE_BVEC3: { + int v = value; + uint32_t *gui = (uint32_t *)data; + gui[0] = (v & 1) ? 1 : 0; + gui[1] = (v & 2) ? 1 : 0; + gui[2] = (v & 4) ? 1 : 0; + + } break; + case ShaderLanguage::TYPE_BVEC4: { + int v = value; + uint32_t *gui = (uint32_t *)data; + gui[0] = (v & 1) ? 1 : 0; + gui[1] = (v & 2) ? 1 : 0; + gui[2] = (v & 4) ? 1 : 0; + gui[3] = (v & 8) ? 1 : 0; + + } break; + case ShaderLanguage::TYPE_INT: { + int v = value; + int32_t *gui = (int32_t *)data; + gui[0] = v; + + } break; + case ShaderLanguage::TYPE_IVEC2: { + Vector<int> iv = value; + int s = iv.size(); + int32_t *gui = (int32_t *)data; + + const int *r = iv.ptr(); + + for (int i = 0; i < 2; i++) { + if (i < s) { + gui[i] = r[i]; + } else { + gui[i] = 0; + } + } + + } break; + case ShaderLanguage::TYPE_IVEC3: { + Vector<int> iv = value; + int s = iv.size(); + int32_t *gui = (int32_t *)data; + + const int *r = iv.ptr(); + + for (int i = 0; i < 3; i++) { + if (i < s) { + gui[i] = r[i]; + } else { + gui[i] = 0; + } + } + } break; + case ShaderLanguage::TYPE_IVEC4: { + Vector<int> iv = value; + int s = iv.size(); + int32_t *gui = (int32_t *)data; + + const int *r = iv.ptr(); + + for (int i = 0; i < 4; i++) { + if (i < s) { + gui[i] = r[i]; + } else { + gui[i] = 0; + } + } + } break; + case ShaderLanguage::TYPE_UINT: { + int v = value; + uint32_t *gui = (uint32_t *)data; + gui[0] = v; + + } break; + case ShaderLanguage::TYPE_UVEC2: { + Vector<int> iv = value; + int s = iv.size(); + uint32_t *gui = (uint32_t *)data; + + const int *r = iv.ptr(); + + for (int i = 0; i < 2; i++) { + if (i < s) { + gui[i] = r[i]; + } else { + gui[i] = 0; + } + } + } break; + case ShaderLanguage::TYPE_UVEC3: { + Vector<int> iv = value; + int s = iv.size(); + uint32_t *gui = (uint32_t *)data; + + const int *r = iv.ptr(); + + for (int i = 0; i < 3; i++) { + if (i < s) { + gui[i] = r[i]; + } else { + gui[i] = 0; + } + } + + } break; + case ShaderLanguage::TYPE_UVEC4: { + Vector<int> iv = value; + int s = iv.size(); + uint32_t *gui = (uint32_t *)data; + + const int *r = iv.ptr(); + + for (int i = 0; i < 4; i++) { + if (i < s) { + gui[i] = r[i]; + } else { + gui[i] = 0; + } + } + } break; + case ShaderLanguage::TYPE_FLOAT: { + float v = value; + float *gui = (float *)data; + gui[0] = v; + + } break; + case ShaderLanguage::TYPE_VEC2: { + Vector2 v = value; + float *gui = (float *)data; + gui[0] = v.x; + gui[1] = v.y; + + } break; + case ShaderLanguage::TYPE_VEC3: { + Vector3 v = value; + float *gui = (float *)data; + gui[0] = v.x; + gui[1] = v.y; + gui[2] = v.z; + + } break; + case ShaderLanguage::TYPE_VEC4: { + float *gui = (float *)data; + + if (value.get_type() == Variant::COLOR) { + Color v = value; + + if (p_linear_color) { + v = v.to_linear(); + } + + gui[0] = v.r; + gui[1] = v.g; + gui[2] = v.b; + gui[3] = v.a; + } else if (value.get_type() == Variant::RECT2) { + Rect2 v = value; + + gui[0] = v.position.x; + gui[1] = v.position.y; + gui[2] = v.size.x; + gui[3] = v.size.y; + } else if (value.get_type() == Variant::QUAT) { + Quat v = value; + + gui[0] = v.x; + gui[1] = v.y; + gui[2] = v.z; + gui[3] = v.w; + } else { + Plane v = value; + + gui[0] = v.normal.x; + gui[1] = v.normal.y; + gui[2] = v.normal.z; + gui[3] = v.d; + } + } break; + case ShaderLanguage::TYPE_MAT2: { + Transform2D v = value; + float *gui = (float *)data; + + //in std140 members of mat2 are treated as vec4s + gui[0] = v.elements[0][0]; + gui[1] = v.elements[0][1]; + gui[2] = 0; + gui[3] = 0; + gui[4] = v.elements[1][0]; + gui[5] = v.elements[1][1]; + gui[6] = 0; + gui[7] = 0; + } break; + case ShaderLanguage::TYPE_MAT3: { + Basis v = value; + float *gui = (float *)data; + + gui[0] = v.elements[0][0]; + gui[1] = v.elements[1][0]; + gui[2] = v.elements[2][0]; + gui[3] = 0; + gui[4] = v.elements[0][1]; + gui[5] = v.elements[1][1]; + gui[6] = v.elements[2][1]; + gui[7] = 0; + gui[8] = v.elements[0][2]; + gui[9] = v.elements[1][2]; + gui[10] = v.elements[2][2]; + gui[11] = 0; + } break; + case ShaderLanguage::TYPE_MAT4: { + Transform v = value; + float *gui = (float *)data; + + gui[0] = v.basis.elements[0][0]; + gui[1] = v.basis.elements[1][0]; + gui[2] = v.basis.elements[2][0]; + gui[3] = 0; + gui[4] = v.basis.elements[0][1]; + gui[5] = v.basis.elements[1][1]; + gui[6] = v.basis.elements[2][1]; + gui[7] = 0; + gui[8] = v.basis.elements[0][2]; + gui[9] = v.basis.elements[1][2]; + gui[10] = v.basis.elements[2][2]; + gui[11] = 0; + gui[12] = v.origin.x; + gui[13] = v.origin.y; + gui[14] = v.origin.z; + gui[15] = 1; + } break; + default: { + } + } +} + +_FORCE_INLINE_ static void _fill_std140_ubo_value(ShaderLanguage::DataType type, const Vector<ShaderLanguage::ConstantNode::Value> &value, uint8_t *data) { + switch (type) { + case ShaderLanguage::TYPE_BOOL: { + uint32_t *gui = (uint32_t *)data; + *gui = value[0].boolean ? 1 : 0; + } break; + case ShaderLanguage::TYPE_BVEC2: { + uint32_t *gui = (uint32_t *)data; + gui[0] = value[0].boolean ? 1 : 0; + gui[1] = value[1].boolean ? 1 : 0; + + } break; + case ShaderLanguage::TYPE_BVEC3: { + uint32_t *gui = (uint32_t *)data; + gui[0] = value[0].boolean ? 1 : 0; + gui[1] = value[1].boolean ? 1 : 0; + gui[2] = value[2].boolean ? 1 : 0; + + } break; + case ShaderLanguage::TYPE_BVEC4: { + uint32_t *gui = (uint32_t *)data; + gui[0] = value[0].boolean ? 1 : 0; + gui[1] = value[1].boolean ? 1 : 0; + gui[2] = value[2].boolean ? 1 : 0; + gui[3] = value[3].boolean ? 1 : 0; + + } break; + case ShaderLanguage::TYPE_INT: { + int32_t *gui = (int32_t *)data; + gui[0] = value[0].sint; + + } break; + case ShaderLanguage::TYPE_IVEC2: { + int32_t *gui = (int32_t *)data; + + for (int i = 0; i < 2; i++) { + gui[i] = value[i].sint; + } + + } break; + case ShaderLanguage::TYPE_IVEC3: { + int32_t *gui = (int32_t *)data; + + for (int i = 0; i < 3; i++) { + gui[i] = value[i].sint; + } + + } break; + case ShaderLanguage::TYPE_IVEC4: { + int32_t *gui = (int32_t *)data; + + for (int i = 0; i < 4; i++) { + gui[i] = value[i].sint; + } + + } break; + case ShaderLanguage::TYPE_UINT: { + uint32_t *gui = (uint32_t *)data; + gui[0] = value[0].uint; + + } break; + case ShaderLanguage::TYPE_UVEC2: { + int32_t *gui = (int32_t *)data; + + for (int i = 0; i < 2; i++) { + gui[i] = value[i].uint; + } + } break; + case ShaderLanguage::TYPE_UVEC3: { + int32_t *gui = (int32_t *)data; + + for (int i = 0; i < 3; i++) { + gui[i] = value[i].uint; + } + + } break; + case ShaderLanguage::TYPE_UVEC4: { + int32_t *gui = (int32_t *)data; + + for (int i = 0; i < 4; i++) { + gui[i] = value[i].uint; + } + } break; + case ShaderLanguage::TYPE_FLOAT: { + float *gui = (float *)data; + gui[0] = value[0].real; + + } break; + case ShaderLanguage::TYPE_VEC2: { + float *gui = (float *)data; + + for (int i = 0; i < 2; i++) { + gui[i] = value[i].real; + } + + } break; + case ShaderLanguage::TYPE_VEC3: { + float *gui = (float *)data; + + for (int i = 0; i < 3; i++) { + gui[i] = value[i].real; + } + + } break; + case ShaderLanguage::TYPE_VEC4: { + float *gui = (float *)data; + + for (int i = 0; i < 4; i++) { + gui[i] = value[i].real; + } + } break; + case ShaderLanguage::TYPE_MAT2: { + float *gui = (float *)data; + + //in std140 members of mat2 are treated as vec4s + gui[0] = value[0].real; + gui[1] = value[1].real; + gui[2] = 0; + gui[3] = 0; + gui[4] = value[2].real; + gui[5] = value[3].real; + gui[6] = 0; + gui[7] = 0; + } break; + case ShaderLanguage::TYPE_MAT3: { + float *gui = (float *)data; + + gui[0] = value[0].real; + gui[1] = value[1].real; + gui[2] = value[2].real; + gui[3] = 0; + gui[4] = value[3].real; + gui[5] = value[4].real; + gui[6] = value[5].real; + gui[7] = 0; + gui[8] = value[6].real; + gui[9] = value[7].real; + gui[10] = value[8].real; + gui[11] = 0; + } break; + case ShaderLanguage::TYPE_MAT4: { + float *gui = (float *)data; + + for (int i = 0; i < 16; i++) { + gui[i] = value[i].real; + } + } break; + default: { + } + } +} + +_FORCE_INLINE_ static void _fill_std140_ubo_empty(ShaderLanguage::DataType type, uint8_t *data) { + switch (type) { + case ShaderLanguage::TYPE_BOOL: + case ShaderLanguage::TYPE_INT: + case ShaderLanguage::TYPE_UINT: + case ShaderLanguage::TYPE_FLOAT: { + zeromem(data, 4); + } break; + case ShaderLanguage::TYPE_BVEC2: + case ShaderLanguage::TYPE_IVEC2: + case ShaderLanguage::TYPE_UVEC2: + case ShaderLanguage::TYPE_VEC2: { + zeromem(data, 8); + } break; + case ShaderLanguage::TYPE_BVEC3: + case ShaderLanguage::TYPE_IVEC3: + case ShaderLanguage::TYPE_UVEC3: + case ShaderLanguage::TYPE_VEC3: + case ShaderLanguage::TYPE_BVEC4: + case ShaderLanguage::TYPE_IVEC4: + case ShaderLanguage::TYPE_UVEC4: + case ShaderLanguage::TYPE_VEC4: { + zeromem(data, 16); + } break; + case ShaderLanguage::TYPE_MAT2: { + zeromem(data, 32); + } break; + case ShaderLanguage::TYPE_MAT3: { + zeromem(data, 48); + } break; + case ShaderLanguage::TYPE_MAT4: { + zeromem(data, 64); + } break; + + default: { + } + } +} + +void RendererStorageRD::MaterialData::update_uniform_buffer(const Map<StringName, ShaderLanguage::ShaderNode::Uniform> &p_uniforms, const uint32_t *p_uniform_offsets, const Map<StringName, Variant> &p_parameters, uint8_t *p_buffer, uint32_t p_buffer_size, bool p_use_linear_color) { + bool uses_global_buffer = false; + + for (Map<StringName, ShaderLanguage::ShaderNode::Uniform>::Element *E = p_uniforms.front(); E; E = E->next()) { + if (E->get().order < 0) { + continue; // texture, does not go here + } + + if (E->get().scope == ShaderLanguage::ShaderNode::Uniform::SCOPE_INSTANCE) { + continue; //instance uniforms don't appear in the bufferr + } + + if (E->get().scope == ShaderLanguage::ShaderNode::Uniform::SCOPE_GLOBAL) { + //this is a global variable, get the index to it + RendererStorageRD *rs = base_singleton; + + GlobalVariables::Variable *gv = rs->global_variables.variables.getptr(E->key()); + uint32_t index = 0; + if (gv) { + index = gv->buffer_index; + } else { + WARN_PRINT("Shader uses global uniform '" + E->key() + "', but it was removed at some point. Material will not display correctly."); + } + + uint32_t offset = p_uniform_offsets[E->get().order]; + uint32_t *intptr = (uint32_t *)&p_buffer[offset]; + *intptr = index; + uses_global_buffer = true; + continue; + } + + //regular uniform + uint32_t offset = p_uniform_offsets[E->get().order]; +#ifdef DEBUG_ENABLED + uint32_t size = ShaderLanguage::get_type_size(E->get().type); + ERR_CONTINUE(offset + size > p_buffer_size); +#endif + uint8_t *data = &p_buffer[offset]; + const Map<StringName, Variant>::Element *V = p_parameters.find(E->key()); + + if (V) { + //user provided + _fill_std140_variant_ubo_value(E->get().type, V->get(), data, p_use_linear_color); + + } else if (E->get().default_value.size()) { + //default value + _fill_std140_ubo_value(E->get().type, E->get().default_value, data); + //value=E->get().default_value; + } else { + //zero because it was not provided + if (E->get().type == ShaderLanguage::TYPE_VEC4 && E->get().hint == ShaderLanguage::ShaderNode::Uniform::HINT_COLOR) { + //colors must be set as black, with alpha as 1.0 + _fill_std140_variant_ubo_value(E->get().type, Color(0, 0, 0, 1), data, p_use_linear_color); + } else { + //else just zero it out + _fill_std140_ubo_empty(E->get().type, data); + } + } + } + + if (uses_global_buffer != (global_buffer_E != nullptr)) { + RendererStorageRD *rs = base_singleton; + if (uses_global_buffer) { + global_buffer_E = rs->global_variables.materials_using_buffer.push_back(self); + } else { + rs->global_variables.materials_using_buffer.erase(global_buffer_E); + global_buffer_E = nullptr; + } + } +} + +RendererStorageRD::MaterialData::~MaterialData() { + if (global_buffer_E) { + //unregister global buffers + RendererStorageRD *rs = base_singleton; + rs->global_variables.materials_using_buffer.erase(global_buffer_E); + } + + if (global_texture_E) { + //unregister global textures + RendererStorageRD *rs = base_singleton; + + for (Map<StringName, uint64_t>::Element *E = used_global_textures.front(); E; E = E->next()) { + GlobalVariables::Variable *v = rs->global_variables.variables.getptr(E->key()); + if (v) { + v->texture_materials.erase(self); + } + } + //unregister material from those using global textures + rs->global_variables.materials_using_texture.erase(global_texture_E); + } +} + +void RendererStorageRD::MaterialData::update_textures(const Map<StringName, Variant> &p_parameters, const Map<StringName, RID> &p_default_textures, const Vector<ShaderCompilerRD::GeneratedCode::Texture> &p_texture_uniforms, RID *p_textures, bool p_use_linear_color) { + RendererStorageRD *singleton = (RendererStorageRD *)RendererStorage::base_singleton; +#ifdef TOOLS_ENABLED + Texture *roughness_detect_texture = nullptr; + RS::TextureDetectRoughnessChannel roughness_channel = RS::TEXTURE_DETECT_ROUGHNESS_R; + Texture *normal_detect_texture = nullptr; +#endif + + bool uses_global_textures = false; + global_textures_pass++; + + for (int i = 0; i < p_texture_uniforms.size(); i++) { + const StringName &uniform_name = p_texture_uniforms[i].name; + + RID texture; + + if (p_texture_uniforms[i].global) { + RendererStorageRD *rs = base_singleton; + + uses_global_textures = true; + + GlobalVariables::Variable *v = rs->global_variables.variables.getptr(uniform_name); + if (v) { + if (v->buffer_index >= 0) { + WARN_PRINT("Shader uses global uniform texture '" + String(uniform_name) + "', but it changed type and is no longer a texture!."); + + } else { + Map<StringName, uint64_t>::Element *E = used_global_textures.find(uniform_name); + if (!E) { + E = used_global_textures.insert(uniform_name, global_textures_pass); + v->texture_materials.insert(self); + } else { + E->get() = global_textures_pass; + } + + texture = v->override.get_type() != Variant::NIL ? v->override : v->value; + } + + } else { + WARN_PRINT("Shader uses global uniform texture '" + String(uniform_name) + "', but it was removed at some point. Material will not display correctly."); + } + } else { + if (!texture.is_valid()) { + const Map<StringName, Variant>::Element *V = p_parameters.find(uniform_name); + if (V) { + texture = V->get(); + } + } + + if (!texture.is_valid()) { + const Map<StringName, RID>::Element *W = p_default_textures.find(uniform_name); + if (W) { + texture = W->get(); + } + } + } + + RID rd_texture; + + if (texture.is_null()) { + //check default usage + switch (p_texture_uniforms[i].hint) { + case ShaderLanguage::ShaderNode::Uniform::HINT_BLACK: + case ShaderLanguage::ShaderNode::Uniform::HINT_BLACK_ALBEDO: { + rd_texture = singleton->texture_rd_get_default(DEFAULT_RD_TEXTURE_BLACK); + } break; + case ShaderLanguage::ShaderNode::Uniform::HINT_NONE: { + rd_texture = singleton->texture_rd_get_default(DEFAULT_RD_TEXTURE_NORMAL); + } break; + case ShaderLanguage::ShaderNode::Uniform::HINT_ANISO: { + rd_texture = singleton->texture_rd_get_default(DEFAULT_RD_TEXTURE_ANISO); + } break; + default: { + rd_texture = singleton->texture_rd_get_default(DEFAULT_RD_TEXTURE_WHITE); + } break; + } + } else { + bool srgb = p_use_linear_color && (p_texture_uniforms[i].hint == ShaderLanguage::ShaderNode::Uniform::HINT_ALBEDO || p_texture_uniforms[i].hint == ShaderLanguage::ShaderNode::Uniform::HINT_BLACK_ALBEDO); + + Texture *tex = singleton->texture_owner.getornull(texture); + + if (tex) { + rd_texture = (srgb && tex->rd_texture_srgb.is_valid()) ? tex->rd_texture_srgb : tex->rd_texture; +#ifdef TOOLS_ENABLED + if (tex->detect_3d_callback && p_use_linear_color) { + tex->detect_3d_callback(tex->detect_3d_callback_ud); + } + if (tex->detect_normal_callback && (p_texture_uniforms[i].hint == ShaderLanguage::ShaderNode::Uniform::HINT_NORMAL || p_texture_uniforms[i].hint == ShaderLanguage::ShaderNode::Uniform::HINT_ROUGHNESS_NORMAL)) { + if (p_texture_uniforms[i].hint == ShaderLanguage::ShaderNode::Uniform::HINT_ROUGHNESS_NORMAL) { + normal_detect_texture = tex; + } + tex->detect_normal_callback(tex->detect_normal_callback_ud); + } + if (tex->detect_roughness_callback && (p_texture_uniforms[i].hint >= ShaderLanguage::ShaderNode::Uniform::HINT_ROUGHNESS_R || p_texture_uniforms[i].hint <= ShaderLanguage::ShaderNode::Uniform::HINT_ROUGHNESS_GRAY)) { + //find the normal texture + roughness_detect_texture = tex; + roughness_channel = RS::TextureDetectRoughnessChannel(p_texture_uniforms[i].hint - ShaderLanguage::ShaderNode::Uniform::HINT_ROUGHNESS_R); + } + +#endif + } + + if (rd_texture.is_null()) { + //wtf + rd_texture = singleton->texture_rd_get_default(DEFAULT_RD_TEXTURE_WHITE); + } + } + + p_textures[i] = rd_texture; + } +#ifdef TOOLS_ENABLED + if (roughness_detect_texture && normal_detect_texture && normal_detect_texture->path != String()) { + roughness_detect_texture->detect_roughness_callback(roughness_detect_texture->detect_roughness_callback_ud, normal_detect_texture->path, roughness_channel); + } +#endif + { + //for textures no longer used, unregister them + List<Map<StringName, uint64_t>::Element *> to_delete; + RendererStorageRD *rs = base_singleton; + + for (Map<StringName, uint64_t>::Element *E = used_global_textures.front(); E; E = E->next()) { + if (E->get() != global_textures_pass) { + to_delete.push_back(E); + + GlobalVariables::Variable *v = rs->global_variables.variables.getptr(E->key()); + if (v) { + v->texture_materials.erase(self); + } + } + } + + while (to_delete.front()) { + used_global_textures.erase(to_delete.front()->get()); + to_delete.pop_front(); + } + //handle registering/unregistering global textures + if (uses_global_textures != (global_texture_E != nullptr)) { + if (uses_global_textures) { + global_texture_E = rs->global_variables.materials_using_texture.push_back(self); + } else { + rs->global_variables.materials_using_texture.erase(global_texture_E); + global_texture_E = nullptr; + } + } + } +} + +void RendererStorageRD::material_force_update_textures(RID p_material, ShaderType p_shader_type) { + Material *material = material_owner.getornull(p_material); + if (material->shader_type != p_shader_type) { + return; + } + if (material->data) { + material->data->update_parameters(material->params, false, true); + } +} + +void RendererStorageRD::_update_queued_materials() { + Material *material = material_update_list; + while (material) { + Material *next = material->update_next; + + if (material->data) { + material->data->update_parameters(material->params, material->uniform_dirty, material->texture_dirty); + } + material->update_requested = false; + material->texture_dirty = false; + material->uniform_dirty = false; + material->update_next = nullptr; + material = next; + } + material_update_list = nullptr; +} + +/* MESH API */ + +RID RendererStorageRD::mesh_allocate() { + return mesh_owner.allocate_rid(); +} +void RendererStorageRD::mesh_initialize(RID p_rid) { + mesh_owner.initialize_rid(p_rid, Mesh()); +} + +void RendererStorageRD::mesh_set_blend_shape_count(RID p_mesh, int p_blend_shape_count) { + ERR_FAIL_COND(p_blend_shape_count < 0); + + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND(!mesh); + + ERR_FAIL_COND(mesh->surface_count > 0); //surfaces already exist + + mesh->blend_shape_count = p_blend_shape_count; +} + +/// Returns stride +void RendererStorageRD::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND(!mesh); + +#ifdef DEBUG_ENABLED + //do a validation, to catch errors first + { + uint32_t stride = 0; + uint32_t attrib_stride = 0; + uint32_t skin_stride = 0; + + for (int i = 0; i < RS::ARRAY_WEIGHTS; i++) { + if ((p_surface.format & (1 << i))) { + switch (i) { + case RS::ARRAY_VERTEX: { + if (p_surface.format & RS::ARRAY_FLAG_USE_2D_VERTICES) { + stride += sizeof(float) * 2; + } else { + stride += sizeof(float) * 3; + } + + } break; + case RS::ARRAY_NORMAL: { + stride += sizeof(int32_t); + + } break; + case RS::ARRAY_TANGENT: { + stride += sizeof(int32_t); + + } break; + case RS::ARRAY_COLOR: { + attrib_stride += sizeof(int16_t) * 4; + } break; + case RS::ARRAY_TEX_UV: { + attrib_stride += sizeof(float) * 2; + + } break; + case RS::ARRAY_TEX_UV2: { + attrib_stride += sizeof(float) * 2; + + } break; + case RS::ARRAY_CUSTOM0: + case RS::ARRAY_CUSTOM1: + case RS::ARRAY_CUSTOM2: + case RS::ARRAY_CUSTOM3: { + int idx = i - RS::ARRAY_CUSTOM0; + uint32_t fmt_shift[RS::ARRAY_CUSTOM_COUNT] = { RS::ARRAY_FORMAT_CUSTOM0_SHIFT, RS::ARRAY_FORMAT_CUSTOM1_SHIFT, RS::ARRAY_FORMAT_CUSTOM2_SHIFT, RS::ARRAY_FORMAT_CUSTOM3_SHIFT }; + uint32_t fmt = (p_surface.format >> fmt_shift[idx]) & RS::ARRAY_FORMAT_CUSTOM_MASK; + uint32_t fmtsize[RS::ARRAY_CUSTOM_MAX] = { 4, 4, 4, 8, 4, 8, 12, 16 }; + attrib_stride += fmtsize[fmt]; + + } break; + case RS::ARRAY_WEIGHTS: + case RS::ARRAY_BONES: { + //uses a separate array + bool use_8 = p_surface.format & RS::ARRAY_FLAG_USE_8_BONE_WEIGHTS; + skin_stride += sizeof(int16_t) * (use_8 ? 16 : 8); + } break; + } + } + } + + int expected_size = stride * p_surface.vertex_count; + ERR_FAIL_COND_MSG(expected_size != p_surface.vertex_data.size(), "Size of vertex data provided (" + itos(p_surface.vertex_data.size()) + ") does not match expected (" + itos(expected_size) + ")"); + + int bs_expected_size = expected_size * mesh->blend_shape_count; + + ERR_FAIL_COND_MSG(bs_expected_size != p_surface.blend_shape_data.size(), "Size of blend shape data provided (" + itos(p_surface.blend_shape_data.size()) + ") does not match expected (" + itos(bs_expected_size) + ")"); + + int expected_attrib_size = attrib_stride * p_surface.vertex_count; + ERR_FAIL_COND_MSG(expected_attrib_size != p_surface.attribute_data.size(), "Size of attribute data provided (" + itos(p_surface.attribute_data.size()) + ") does not match expected (" + itos(expected_attrib_size) + ")"); + + if ((p_surface.format & RS::ARRAY_FORMAT_WEIGHTS) && (p_surface.format & RS::ARRAY_FORMAT_BONES)) { + expected_size = skin_stride * p_surface.vertex_count; + ERR_FAIL_COND_MSG(expected_size != p_surface.skin_data.size(), "Size of skin data provided (" + itos(p_surface.skin_data.size()) + ") does not match expected (" + itos(expected_size) + ")"); + } + } + +#endif + + Mesh::Surface *s = memnew(Mesh::Surface); + + s->format = p_surface.format; + s->primitive = p_surface.primitive; + + bool use_as_storage = (p_surface.skin_data.size() || mesh->blend_shape_count > 0); + + s->vertex_buffer = RD::get_singleton()->vertex_buffer_create(p_surface.vertex_data.size(), p_surface.vertex_data, use_as_storage); + s->vertex_buffer_size = p_surface.vertex_data.size(); + + if (p_surface.attribute_data.size()) { + s->attribute_buffer = RD::get_singleton()->vertex_buffer_create(p_surface.attribute_data.size(), p_surface.attribute_data); + } + if (p_surface.skin_data.size()) { + s->skin_buffer = RD::get_singleton()->vertex_buffer_create(p_surface.skin_data.size(), p_surface.skin_data, use_as_storage); + s->skin_buffer_size = p_surface.skin_data.size(); + } + + s->vertex_count = p_surface.vertex_count; + + if (p_surface.format & RS::ARRAY_FORMAT_BONES) { + mesh->has_bone_weights = true; + } + + if (p_surface.index_count) { + bool is_index_16 = p_surface.vertex_count <= 65536; + + s->index_buffer = RD::get_singleton()->index_buffer_create(p_surface.index_count, is_index_16 ? RD::INDEX_BUFFER_FORMAT_UINT16 : RD::INDEX_BUFFER_FORMAT_UINT32, p_surface.index_data, false); + s->index_count = p_surface.index_count; + s->index_array = RD::get_singleton()->index_array_create(s->index_buffer, 0, s->index_count); + if (p_surface.lods.size()) { + s->lods = memnew_arr(Mesh::Surface::LOD, p_surface.lods.size()); + s->lod_count = p_surface.lods.size(); + + for (int i = 0; i < p_surface.lods.size(); i++) { + uint32_t indices = p_surface.lods[i].index_data.size() / (is_index_16 ? 2 : 4); + s->lods[i].index_buffer = RD::get_singleton()->index_buffer_create(indices, is_index_16 ? RD::INDEX_BUFFER_FORMAT_UINT16 : RD::INDEX_BUFFER_FORMAT_UINT32, p_surface.lods[i].index_data); + s->lods[i].index_array = RD::get_singleton()->index_array_create(s->lods[i].index_buffer, 0, indices); + s->lods[i].edge_length = p_surface.lods[i].edge_length; + } + } + } + + s->aabb = p_surface.aabb; + s->bone_aabbs = p_surface.bone_aabbs; //only really useful for returning them. + + if (mesh->blend_shape_count > 0) { + s->blend_shape_buffer = RD::get_singleton()->storage_buffer_create(p_surface.blend_shape_data.size(), p_surface.blend_shape_data); + } + + if (use_as_storage) { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.binding = 0; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(s->vertex_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + if (s->skin_buffer.is_valid()) { + u.ids.push_back(s->skin_buffer); + } else { + u.ids.push_back(default_rd_storage_buffer); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 2; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + if (s->blend_shape_buffer.is_valid()) { + u.ids.push_back(s->blend_shape_buffer); + } else { + u.ids.push_back(default_rd_storage_buffer); + } + uniforms.push_back(u); + } + + s->uniform_set = RD::get_singleton()->uniform_set_create(uniforms, skeleton_shader.version_shader[0], SkeletonShader::UNIFORM_SET_SURFACE); + } + + if (mesh->surface_count == 0) { + mesh->bone_aabbs = p_surface.bone_aabbs; + mesh->aabb = p_surface.aabb; + } else { + if (mesh->bone_aabbs.size() < p_surface.bone_aabbs.size()) { + // ArrayMesh::_surface_set_data only allocates bone_aabbs up to max_bone + // Each surface may affect different numbers of bones. + mesh->bone_aabbs.resize(p_surface.bone_aabbs.size()); + } + for (int i = 0; i < p_surface.bone_aabbs.size(); i++) { + mesh->bone_aabbs.write[i].merge_with(p_surface.bone_aabbs[i]); + } + mesh->aabb.merge_with(p_surface.aabb); + } + + s->material = p_surface.material; + + mesh->surfaces = (Mesh::Surface **)memrealloc(mesh->surfaces, sizeof(Mesh::Surface *) * (mesh->surface_count + 1)); + mesh->surfaces[mesh->surface_count] = s; + mesh->surface_count++; + + for (List<MeshInstance *>::Element *E = mesh->instances.front(); E; E = E->next()) { + //update instances + MeshInstance *mi = E->get(); + _mesh_instance_add_surface(mi, mesh, mesh->surface_count - 1); + } + + mesh->dependency.changed_notify(DEPENDENCY_CHANGED_MESH); + + for (Set<Mesh *>::Element *E = mesh->shadow_owners.front(); E; E = E->next()) { + Mesh *shadow_owner = E->get(); + shadow_owner->shadow_mesh = RID(); + shadow_owner->dependency.changed_notify(DEPENDENCY_CHANGED_MESH); + } + + mesh->material_cache.clear(); +} + +int RendererStorageRD::mesh_get_blend_shape_count(RID p_mesh) const { + const Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh, -1); + return mesh->blend_shape_count; +} + +void RendererStorageRD::mesh_set_blend_shape_mode(RID p_mesh, RS::BlendShapeMode p_mode) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND(!mesh); + ERR_FAIL_INDEX((int)p_mode, 2); + + mesh->blend_shape_mode = p_mode; +} + +RS::BlendShapeMode RendererStorageRD::mesh_get_blend_shape_mode(RID p_mesh) const { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh, RS::BLEND_SHAPE_MODE_NORMALIZED); + return mesh->blend_shape_mode; +} + +void RendererStorageRD::mesh_surface_update_region(RID p_mesh, int p_surface, int p_offset, const Vector<uint8_t> &p_data) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND(!mesh); + ERR_FAIL_UNSIGNED_INDEX((uint32_t)p_surface, mesh->surface_count); + ERR_FAIL_COND(p_data.size() == 0); + uint64_t data_size = p_data.size(); + const uint8_t *r = p_data.ptr(); + + RD::get_singleton()->buffer_update(mesh->surfaces[p_surface]->vertex_buffer, p_offset, data_size, r); +} + +void RendererStorageRD::mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND(!mesh); + ERR_FAIL_UNSIGNED_INDEX((uint32_t)p_surface, mesh->surface_count); + mesh->surfaces[p_surface]->material = p_material; + + mesh->dependency.changed_notify(DEPENDENCY_CHANGED_MATERIAL); + mesh->material_cache.clear(); +} + +RID RendererStorageRD::mesh_surface_get_material(RID p_mesh, int p_surface) const { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh, RID()); + ERR_FAIL_UNSIGNED_INDEX_V((uint32_t)p_surface, mesh->surface_count, RID()); + + return mesh->surfaces[p_surface]->material; +} + +RS::SurfaceData RendererStorageRD::mesh_get_surface(RID p_mesh, int p_surface) const { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh, RS::SurfaceData()); + ERR_FAIL_UNSIGNED_INDEX_V((uint32_t)p_surface, mesh->surface_count, RS::SurfaceData()); + + Mesh::Surface &s = *mesh->surfaces[p_surface]; + + RS::SurfaceData sd; + sd.format = s.format; + sd.vertex_data = RD::get_singleton()->buffer_get_data(s.vertex_buffer); + if (s.attribute_buffer.is_valid()) { + sd.attribute_data = RD::get_singleton()->buffer_get_data(s.attribute_buffer); + } + if (s.skin_buffer.is_valid()) { + sd.skin_data = RD::get_singleton()->buffer_get_data(s.skin_buffer); + } + sd.vertex_count = s.vertex_count; + sd.index_count = s.index_count; + sd.primitive = s.primitive; + + if (sd.index_count) { + sd.index_data = RD::get_singleton()->buffer_get_data(s.index_buffer); + } + sd.aabb = s.aabb; + for (uint32_t i = 0; i < s.lod_count; i++) { + RS::SurfaceData::LOD lod; + lod.edge_length = s.lods[i].edge_length; + lod.index_data = RD::get_singleton()->buffer_get_data(s.lods[i].index_buffer); + sd.lods.push_back(lod); + } + + sd.bone_aabbs = s.bone_aabbs; + + if (s.blend_shape_buffer.is_valid()) { + sd.blend_shape_data = RD::get_singleton()->buffer_get_data(s.blend_shape_buffer); + } + + return sd; +} + +int RendererStorageRD::mesh_get_surface_count(RID p_mesh) const { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh, 0); + return mesh->surface_count; +} + +void RendererStorageRD::mesh_set_custom_aabb(RID p_mesh, const AABB &p_aabb) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND(!mesh); + mesh->custom_aabb = p_aabb; +} + +AABB RendererStorageRD::mesh_get_custom_aabb(RID p_mesh) const { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh, AABB()); + return mesh->custom_aabb; +} + +AABB RendererStorageRD::mesh_get_aabb(RID p_mesh, RID p_skeleton) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh, AABB()); + + if (mesh->custom_aabb != AABB()) { + return mesh->custom_aabb; + } + + Skeleton *skeleton = skeleton_owner.getornull(p_skeleton); + + if (!skeleton || skeleton->size == 0) { + return mesh->aabb; + } + + AABB aabb; + + for (uint32_t i = 0; i < mesh->surface_count; i++) { + AABB laabb; + if ((mesh->surfaces[i]->format & RS::ARRAY_FORMAT_BONES) && mesh->surfaces[i]->bone_aabbs.size()) { + int bs = mesh->surfaces[i]->bone_aabbs.size(); + const AABB *skbones = mesh->surfaces[i]->bone_aabbs.ptr(); + + int sbs = skeleton->size; + ERR_CONTINUE(bs > sbs); + const float *baseptr = skeleton->data.ptr(); + + bool first = true; + + if (skeleton->use_2d) { + for (int j = 0; j < bs; j++) { + if (skbones[0].size == Vector3()) { + continue; //bone is unused + } + + const float *dataptr = baseptr + j * 8; + + Transform mtx; + + mtx.basis.elements[0].x = dataptr[0]; + mtx.basis.elements[1].x = dataptr[1]; + mtx.origin.x = dataptr[3]; + + mtx.basis.elements[0].y = dataptr[4]; + mtx.basis.elements[1].y = dataptr[5]; + mtx.origin.y = dataptr[7]; + + AABB baabb = mtx.xform(skbones[j]); + + if (first) { + laabb = baabb; + first = false; + } else { + laabb.merge_with(baabb); + } + } + } else { + for (int j = 0; j < bs; j++) { + if (skbones[0].size == Vector3()) { + continue; //bone is unused + } + + const float *dataptr = baseptr + j * 12; + + Transform mtx; + + mtx.basis.elements[0][0] = dataptr[0]; + mtx.basis.elements[0][1] = dataptr[1]; + mtx.basis.elements[0][2] = dataptr[2]; + mtx.origin.x = dataptr[3]; + mtx.basis.elements[1][0] = dataptr[4]; + mtx.basis.elements[1][1] = dataptr[5]; + mtx.basis.elements[1][2] = dataptr[6]; + mtx.origin.y = dataptr[7]; + mtx.basis.elements[2][0] = dataptr[8]; + mtx.basis.elements[2][1] = dataptr[9]; + mtx.basis.elements[2][2] = dataptr[10]; + mtx.origin.z = dataptr[11]; + + AABB baabb = mtx.xform(skbones[j]); + if (first) { + laabb = baabb; + first = false; + } else { + laabb.merge_with(baabb); + } + } + } + + if (laabb.size == Vector3()) { + laabb = mesh->surfaces[i]->aabb; + } + } else { + laabb = mesh->surfaces[i]->aabb; + } + + if (i == 0) { + aabb = laabb; + } else { + aabb.merge_with(laabb); + } + } + + return aabb; +} + +void RendererStorageRD::mesh_set_shadow_mesh(RID p_mesh, RID p_shadow_mesh) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND(!mesh); + + Mesh *shadow_mesh = mesh_owner.getornull(mesh->shadow_mesh); + if (shadow_mesh) { + shadow_mesh->shadow_owners.erase(mesh); + } + mesh->shadow_mesh = p_shadow_mesh; + + shadow_mesh = mesh_owner.getornull(mesh->shadow_mesh); + + if (shadow_mesh) { + shadow_mesh->shadow_owners.insert(mesh); + } + + mesh->dependency.changed_notify(DEPENDENCY_CHANGED_MESH); +} + +void RendererStorageRD::mesh_clear(RID p_mesh) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND(!mesh); + for (uint32_t i = 0; i < mesh->surface_count; i++) { + Mesh::Surface &s = *mesh->surfaces[i]; + RD::get_singleton()->free(s.vertex_buffer); //clears arrays as dependency automatically, including all versions + if (s.attribute_buffer.is_valid()) { + RD::get_singleton()->free(s.attribute_buffer); + } + if (s.skin_buffer.is_valid()) { + RD::get_singleton()->free(s.skin_buffer); + } + if (s.versions) { + memfree(s.versions); //reallocs, so free with memfree. + } + + if (s.index_buffer.is_valid()) { + RD::get_singleton()->free(s.index_buffer); + } + + if (s.lod_count) { + for (uint32_t j = 0; j < s.lod_count; j++) { + RD::get_singleton()->free(s.lods[j].index_buffer); + } + memdelete_arr(s.lods); + } + + if (s.blend_shape_buffer.is_valid()) { + RD::get_singleton()->free(s.blend_shape_buffer); + } + + memdelete(mesh->surfaces[i]); + } + if (mesh->surfaces) { + memfree(mesh->surfaces); + } + + mesh->surfaces = nullptr; + mesh->surface_count = 0; + mesh->material_cache.clear(); + //clear instance data + for (List<MeshInstance *>::Element *E = mesh->instances.front(); E; E = E->next()) { + MeshInstance *mi = E->get(); + _mesh_instance_clear(mi); + } + mesh->has_bone_weights = false; + mesh->dependency.changed_notify(DEPENDENCY_CHANGED_MESH); + + for (Set<Mesh *>::Element *E = mesh->shadow_owners.front(); E; E = E->next()) { + Mesh *shadow_owner = E->get(); + shadow_owner->shadow_mesh = RID(); + shadow_owner->dependency.changed_notify(DEPENDENCY_CHANGED_MESH); + } +} + +bool RendererStorageRD::mesh_needs_instance(RID p_mesh, bool p_has_skeleton) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh, false); + + return mesh->blend_shape_count > 0 || (mesh->has_bone_weights && p_has_skeleton); +} + +/* MESH INSTANCE */ + +RID RendererStorageRD::mesh_instance_create(RID p_base) { + Mesh *mesh = mesh_owner.getornull(p_base); + ERR_FAIL_COND_V(!mesh, RID()); + + MeshInstance *mi = memnew(MeshInstance); + + mi->mesh = mesh; + + for (uint32_t i = 0; i < mesh->surface_count; i++) { + _mesh_instance_add_surface(mi, mesh, i); + } + + mi->I = mesh->instances.push_back(mi); + + mi->dirty = true; + + return mesh_instance_owner.make_rid(mi); +} +void RendererStorageRD::mesh_instance_set_skeleton(RID p_mesh_instance, RID p_skeleton) { + MeshInstance *mi = mesh_instance_owner.getornull(p_mesh_instance); + if (mi->skeleton == p_skeleton) { + return; + } + mi->skeleton = p_skeleton; + mi->skeleton_version = 0; + mi->dirty = true; +} + +void RendererStorageRD::mesh_instance_set_blend_shape_weight(RID p_mesh_instance, int p_shape, float p_weight) { + MeshInstance *mi = mesh_instance_owner.getornull(p_mesh_instance); + ERR_FAIL_COND(!mi); + ERR_FAIL_INDEX(p_shape, (int)mi->blend_weights.size()); + mi->blend_weights[p_shape] = p_weight; + mi->weights_dirty = true; + //will be eventually updated +} + +void RendererStorageRD::_mesh_instance_clear(MeshInstance *mi) { + for (uint32_t i = 0; i < mi->surfaces.size(); i++) { + if (mi->surfaces[i].vertex_buffer.is_valid()) { + RD::get_singleton()->free(mi->surfaces[i].vertex_buffer); + } + if (mi->surfaces[i].versions) { + for (uint32_t j = 0; j < mi->surfaces[i].version_count; j++) { + RD::get_singleton()->free(mi->surfaces[i].versions[j].vertex_array); + } + memfree(mi->surfaces[i].versions); + } + } + mi->surfaces.clear(); + + if (mi->blend_weights_buffer.is_valid()) { + RD::get_singleton()->free(mi->blend_weights_buffer); + } + mi->blend_weights.clear(); + mi->weights_dirty = false; + mi->skeleton_version = 0; +} + +void RendererStorageRD::_mesh_instance_add_surface(MeshInstance *mi, Mesh *mesh, uint32_t p_surface) { + if (mesh->blend_shape_count > 0 && mi->blend_weights_buffer.is_null()) { + mi->blend_weights.resize(mesh->blend_shape_count); + for (uint32_t i = 0; i < mi->blend_weights.size(); i++) { + mi->blend_weights[i] = 0; + } + mi->blend_weights_buffer = RD::get_singleton()->storage_buffer_create(sizeof(float) * mi->blend_weights.size(), mi->blend_weights.to_byte_array()); + mi->weights_dirty = true; + } + + MeshInstance::Surface s; + if (mesh->blend_shape_count > 0 || (mesh->surfaces[p_surface]->format & RS::ARRAY_FORMAT_BONES)) { + //surface warrants transform + s.vertex_buffer = RD::get_singleton()->vertex_buffer_create(mesh->surfaces[p_surface]->vertex_buffer_size, Vector<uint8_t>(), true); + + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(s.vertex_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 2; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + if (mi->blend_weights_buffer.is_valid()) { + u.ids.push_back(mi->blend_weights_buffer); + } else { + u.ids.push_back(default_rd_storage_buffer); + } + uniforms.push_back(u); + } + s.uniform_set = RD::get_singleton()->uniform_set_create(uniforms, skeleton_shader.version_shader[0], SkeletonShader::UNIFORM_SET_INSTANCE); + } + + mi->surfaces.push_back(s); + mi->dirty = true; +} + +void RendererStorageRD::mesh_instance_check_for_update(RID p_mesh_instance) { + MeshInstance *mi = mesh_instance_owner.getornull(p_mesh_instance); + + bool needs_update = mi->dirty; + + if (mi->weights_dirty && !mi->weight_update_list.in_list()) { + dirty_mesh_instance_weights.add(&mi->weight_update_list); + needs_update = true; + } + + if (mi->array_update_list.in_list()) { + return; + } + + if (!needs_update && mi->skeleton.is_valid()) { + Skeleton *sk = skeleton_owner.getornull(mi->skeleton); + if (sk && sk->version != mi->skeleton_version) { + needs_update = true; + } + } + + if (needs_update) { + dirty_mesh_instance_arrays.add(&mi->array_update_list); + } +} + +void RendererStorageRD::update_mesh_instances() { + while (dirty_mesh_instance_weights.first()) { + MeshInstance *mi = dirty_mesh_instance_weights.first()->self(); + + if (mi->blend_weights_buffer.is_valid()) { + RD::get_singleton()->buffer_update(mi->blend_weights_buffer, 0, mi->blend_weights.size() * sizeof(float), mi->blend_weights.ptr()); + } + dirty_mesh_instance_weights.remove(&mi->weight_update_list); + mi->weights_dirty = false; + } + if (dirty_mesh_instance_arrays.first() == nullptr) { + return; //nothing to do + } + + //process skeletons and blend shapes + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + while (dirty_mesh_instance_arrays.first()) { + MeshInstance *mi = dirty_mesh_instance_arrays.first()->self(); + + Skeleton *sk = skeleton_owner.getornull(mi->skeleton); + + for (uint32_t i = 0; i < mi->surfaces.size(); i++) { + if (mi->surfaces[i].uniform_set == RID() || mi->mesh->surfaces[i]->uniform_set == RID()) { + continue; + } + + bool array_is_2d = mi->mesh->surfaces[i]->format & RS::ARRAY_FLAG_USE_2D_VERTICES; + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, skeleton_shader.pipeline[array_is_2d ? SkeletonShader::SHADER_MODE_2D : SkeletonShader::SHADER_MODE_3D]); + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, mi->surfaces[i].uniform_set, SkeletonShader::UNIFORM_SET_INSTANCE); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, mi->mesh->surfaces[i]->uniform_set, SkeletonShader::UNIFORM_SET_SURFACE); + if (sk && sk->uniform_set_mi.is_valid()) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sk->uniform_set_mi, SkeletonShader::UNIFORM_SET_SKELETON); + } else { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, skeleton_shader.default_skeleton_uniform_set, SkeletonShader::UNIFORM_SET_SKELETON); + } + + SkeletonShader::PushConstant push_constant; + + push_constant.has_normal = mi->mesh->surfaces[i]->format & RS::ARRAY_FORMAT_NORMAL; + push_constant.has_tangent = mi->mesh->surfaces[i]->format & RS::ARRAY_FORMAT_TANGENT; + push_constant.has_skeleton = sk != nullptr && sk->use_2d == array_is_2d && (mi->mesh->surfaces[i]->format & RS::ARRAY_FORMAT_BONES); + push_constant.has_blend_shape = mi->mesh->blend_shape_count > 0; + + push_constant.vertex_count = mi->mesh->surfaces[i]->vertex_count; + push_constant.vertex_stride = (mi->mesh->surfaces[i]->vertex_buffer_size / mi->mesh->surfaces[i]->vertex_count) / 4; + push_constant.skin_stride = (mi->mesh->surfaces[i]->skin_buffer_size / mi->mesh->surfaces[i]->vertex_count) / 4; + push_constant.skin_weight_offset = (mi->mesh->surfaces[i]->format & RS::ARRAY_FLAG_USE_8_BONE_WEIGHTS) ? 4 : 2; + + push_constant.blend_shape_count = mi->mesh->blend_shape_count; + push_constant.normalized_blend_shapes = mi->mesh->blend_shape_mode == RS::BLEND_SHAPE_MODE_NORMALIZED; + push_constant.pad0 = 0; + push_constant.pad1 = 0; + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SkeletonShader::PushConstant)); + + //dispatch without barrier, so all is done at the same time + RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.vertex_count, 1, 1); + } + + mi->dirty = false; + if (sk) { + mi->skeleton_version = sk->version; + } + dirty_mesh_instance_arrays.remove(&mi->array_update_list); + } + + RD::get_singleton()->compute_list_end(); +} + +void RendererStorageRD::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::Version &v, Mesh::Surface *s, uint32_t p_input_mask, MeshInstance::Surface *mis) { + Vector<RD::VertexAttribute> attributes; + Vector<RID> buffers; + + uint32_t stride = 0; + uint32_t attribute_stride = 0; + uint32_t skin_stride = 0; + + for (int i = 0; i < RS::ARRAY_INDEX; i++) { + RD::VertexAttribute vd; + RID buffer; + vd.location = i; + + if (!(s->format & (1 << i))) { + // Not supplied by surface, use default value + buffer = mesh_default_rd_buffers[i]; + vd.stride = 0; + switch (i) { + case RS::ARRAY_VERTEX: { + vd.format = RD::DATA_FORMAT_R32G32B32_SFLOAT; + + } break; + case RS::ARRAY_NORMAL: { + vd.format = RD::DATA_FORMAT_R32G32B32_SFLOAT; + } break; + case RS::ARRAY_TANGENT: { + vd.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; + } break; + case RS::ARRAY_COLOR: { + vd.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; + + } break; + case RS::ARRAY_TEX_UV: { + vd.format = RD::DATA_FORMAT_R32G32_SFLOAT; + + } break; + case RS::ARRAY_TEX_UV2: { + vd.format = RD::DATA_FORMAT_R32G32_SFLOAT; + } break; + case RS::ARRAY_CUSTOM0: + case RS::ARRAY_CUSTOM1: + case RS::ARRAY_CUSTOM2: + case RS::ARRAY_CUSTOM3: { + //assumed weights too + vd.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; + } break; + case RS::ARRAY_BONES: { + //assumed weights too + vd.format = RD::DATA_FORMAT_R32G32B32A32_UINT; + } break; + case RS::ARRAY_WEIGHTS: { + //assumed weights too + vd.format = RD::DATA_FORMAT_R32G32B32A32_UINT; + } break; + } + } else { + //Supplied, use it + + vd.stride = 1; //mark that it needs a stride set (default uses 0) + + switch (i) { + case RS::ARRAY_VERTEX: { + vd.offset = stride; + + if (s->format & RS::ARRAY_FLAG_USE_2D_VERTICES) { + vd.format = RD::DATA_FORMAT_R32G32_SFLOAT; + stride += sizeof(float) * 2; + } else { + vd.format = RD::DATA_FORMAT_R32G32B32_SFLOAT; + stride += sizeof(float) * 3; + } + + if (mis) { + buffer = mis->vertex_buffer; + } else { + buffer = s->vertex_buffer; + } + + } break; + case RS::ARRAY_NORMAL: { + vd.offset = stride; + + vd.format = RD::DATA_FORMAT_A2B10G10R10_UNORM_PACK32; + + stride += sizeof(uint32_t); + if (mis) { + buffer = mis->vertex_buffer; + } else { + buffer = s->vertex_buffer; + } + } break; + case RS::ARRAY_TANGENT: { + vd.offset = stride; + + vd.format = RD::DATA_FORMAT_A2B10G10R10_UNORM_PACK32; + stride += sizeof(uint32_t); + if (mis) { + buffer = mis->vertex_buffer; + } else { + buffer = s->vertex_buffer; + } + } break; + case RS::ARRAY_COLOR: { + vd.offset = attribute_stride; + + vd.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + attribute_stride += sizeof(int16_t) * 4; + buffer = s->attribute_buffer; + } break; + case RS::ARRAY_TEX_UV: { + vd.offset = attribute_stride; + + vd.format = RD::DATA_FORMAT_R32G32_SFLOAT; + attribute_stride += sizeof(float) * 2; + buffer = s->attribute_buffer; + + } break; + case RS::ARRAY_TEX_UV2: { + vd.offset = attribute_stride; + + vd.format = RD::DATA_FORMAT_R32G32_SFLOAT; + attribute_stride += sizeof(float) * 2; + buffer = s->attribute_buffer; + } break; + case RS::ARRAY_CUSTOM0: + case RS::ARRAY_CUSTOM1: + case RS::ARRAY_CUSTOM2: + case RS::ARRAY_CUSTOM3: { + vd.offset = attribute_stride; + + int idx = i - RS::ARRAY_CUSTOM0; + uint32_t fmt_shift[RS::ARRAY_CUSTOM_COUNT] = { RS::ARRAY_FORMAT_CUSTOM0_SHIFT, RS::ARRAY_FORMAT_CUSTOM1_SHIFT, RS::ARRAY_FORMAT_CUSTOM2_SHIFT, RS::ARRAY_FORMAT_CUSTOM3_SHIFT }; + uint32_t fmt = (s->format >> fmt_shift[idx]) & RS::ARRAY_FORMAT_CUSTOM_MASK; + uint32_t fmtsize[RS::ARRAY_CUSTOM_MAX] = { 4, 4, 4, 8, 4, 8, 12, 16 }; + RD::DataFormat fmtrd[RS::ARRAY_CUSTOM_MAX] = { RD::DATA_FORMAT_R8G8B8A8_UNORM, RD::DATA_FORMAT_R8G8B8A8_SNORM, RD::DATA_FORMAT_R16G16_SFLOAT, RD::DATA_FORMAT_R16G16B16A16_SFLOAT, RD::DATA_FORMAT_R32_SFLOAT, RD::DATA_FORMAT_R32G32_SFLOAT, RD::DATA_FORMAT_R32G32B32_SFLOAT, RD::DATA_FORMAT_R32G32B32A32_SFLOAT }; + vd.format = fmtrd[fmt]; + attribute_stride += fmtsize[fmt]; + buffer = s->attribute_buffer; + } break; + case RS::ARRAY_BONES: { + vd.offset = skin_stride; + + vd.format = RD::DATA_FORMAT_R16G16B16A16_UINT; + skin_stride += sizeof(int16_t) * 4; + buffer = s->skin_buffer; + } break; + case RS::ARRAY_WEIGHTS: { + vd.offset = skin_stride; + + vd.format = RD::DATA_FORMAT_R16G16B16A16_UNORM; + skin_stride += sizeof(int16_t) * 4; + buffer = s->skin_buffer; + } break; + } + } + + if (!(p_input_mask & (1 << i))) { + continue; // Shader does not need this, skip it (but computing stride was important anyway) + } + + attributes.push_back(vd); + buffers.push_back(buffer); + } + + //update final stride + for (int i = 0; i < attributes.size(); i++) { + if (attributes[i].stride == 0) { + continue; //default location + } + int loc = attributes[i].location; + + if (loc < RS::ARRAY_COLOR) { + attributes.write[i].stride = stride; + } else if (loc < RS::ARRAY_BONES) { + attributes.write[i].stride = attribute_stride; + } else { + attributes.write[i].stride = skin_stride; + } + } + + v.input_mask = p_input_mask; + v.vertex_format = RD::get_singleton()->vertex_format_create(attributes); + v.vertex_array = RD::get_singleton()->vertex_array_create(s->vertex_count, v.vertex_format, buffers); +} + +////////////////// MULTIMESH + +RID RendererStorageRD::multimesh_allocate() { + return multimesh_owner.allocate_rid(); +} +void RendererStorageRD::multimesh_initialize(RID p_rid) { + multimesh_owner.initialize_rid(p_rid, MultiMesh()); +} + +void RendererStorageRD::multimesh_allocate_data(RID p_multimesh, int p_instances, RS::MultimeshTransformFormat p_transform_format, bool p_use_colors, bool p_use_custom_data) { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + ERR_FAIL_COND(!multimesh); + + if (multimesh->instances == p_instances && multimesh->xform_format == p_transform_format && multimesh->uses_colors == p_use_colors && multimesh->uses_custom_data == p_use_custom_data) { + return; + } + + if (multimesh->buffer.is_valid()) { + RD::get_singleton()->free(multimesh->buffer); + multimesh->buffer = RID(); + multimesh->uniform_set_3d = RID(); //cleared by dependency + } + + if (multimesh->data_cache_dirty_regions) { + memdelete_arr(multimesh->data_cache_dirty_regions); + multimesh->data_cache_dirty_regions = nullptr; + multimesh->data_cache_used_dirty_regions = 0; + } + + multimesh->instances = p_instances; + multimesh->xform_format = p_transform_format; + multimesh->uses_colors = p_use_colors; + multimesh->color_offset_cache = p_transform_format == RS::MULTIMESH_TRANSFORM_2D ? 8 : 12; + multimesh->uses_custom_data = p_use_custom_data; + multimesh->custom_data_offset_cache = multimesh->color_offset_cache + (p_use_colors ? 4 : 0); + multimesh->stride_cache = multimesh->custom_data_offset_cache + (p_use_custom_data ? 4 : 0); + multimesh->buffer_set = false; + + //print_line("allocate, elements: " + itos(p_instances) + " 2D: " + itos(p_transform_format == RS::MULTIMESH_TRANSFORM_2D) + " colors " + itos(multimesh->uses_colors) + " data " + itos(multimesh->uses_custom_data) + " stride " + itos(multimesh->stride_cache) + " total size " + itos(multimesh->stride_cache * multimesh->instances)); + multimesh->data_cache = Vector<float>(); + multimesh->aabb = AABB(); + multimesh->aabb_dirty = false; + multimesh->visible_instances = MIN(multimesh->visible_instances, multimesh->instances); + + if (multimesh->instances) { + multimesh->buffer = RD::get_singleton()->storage_buffer_create(multimesh->instances * multimesh->stride_cache * 4); + } + + multimesh->dependency.changed_notify(DEPENDENCY_CHANGED_MULTIMESH); +} + +int RendererStorageRD::multimesh_get_instance_count(RID p_multimesh) const { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + ERR_FAIL_COND_V(!multimesh, 0); + return multimesh->instances; +} + +void RendererStorageRD::multimesh_set_mesh(RID p_multimesh, RID p_mesh) { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + ERR_FAIL_COND(!multimesh); + if (multimesh->mesh == p_mesh) { + return; + } + multimesh->mesh = p_mesh; + + if (multimesh->instances == 0) { + return; + } + + if (multimesh->data_cache.size()) { + //we have a data cache, just mark it dirt + _multimesh_mark_all_dirty(multimesh, false, true); + } else if (multimesh->instances) { + //need to re-create AABB unfortunately, calling this has a penalty + if (multimesh->buffer_set) { + Vector<uint8_t> buffer = RD::get_singleton()->buffer_get_data(multimesh->buffer); + const uint8_t *r = buffer.ptr(); + const float *data = (const float *)r; + _multimesh_re_create_aabb(multimesh, data, multimesh->instances); + } + } + + multimesh->dependency.changed_notify(DEPENDENCY_CHANGED_MESH); +} + +#define MULTIMESH_DIRTY_REGION_SIZE 512 + +void RendererStorageRD::_multimesh_make_local(MultiMesh *multimesh) const { + if (multimesh->data_cache.size() > 0) { + return; //already local + } + ERR_FAIL_COND(multimesh->data_cache.size() > 0); + // this means that the user wants to load/save individual elements, + // for this, the data must reside on CPU, so just copy it there. + multimesh->data_cache.resize(multimesh->instances * multimesh->stride_cache); + { + float *w = multimesh->data_cache.ptrw(); + + if (multimesh->buffer_set) { + Vector<uint8_t> buffer = RD::get_singleton()->buffer_get_data(multimesh->buffer); + { + const uint8_t *r = buffer.ptr(); + copymem(w, r, buffer.size()); + } + } else { + zeromem(w, multimesh->instances * multimesh->stride_cache * sizeof(float)); + } + } + uint32_t data_cache_dirty_region_count = (multimesh->instances - 1) / MULTIMESH_DIRTY_REGION_SIZE + 1; + multimesh->data_cache_dirty_regions = memnew_arr(bool, data_cache_dirty_region_count); + for (uint32_t i = 0; i < data_cache_dirty_region_count; i++) { + multimesh->data_cache_dirty_regions[i] = false; + } + multimesh->data_cache_used_dirty_regions = 0; +} + +void RendererStorageRD::_multimesh_mark_dirty(MultiMesh *multimesh, int p_index, bool p_aabb) { + uint32_t region_index = p_index / MULTIMESH_DIRTY_REGION_SIZE; +#ifdef DEBUG_ENABLED + uint32_t data_cache_dirty_region_count = (multimesh->instances - 1) / MULTIMESH_DIRTY_REGION_SIZE + 1; + ERR_FAIL_UNSIGNED_INDEX(region_index, data_cache_dirty_region_count); //bug +#endif + if (!multimesh->data_cache_dirty_regions[region_index]) { + multimesh->data_cache_dirty_regions[region_index] = true; + multimesh->data_cache_used_dirty_regions++; + } + + if (p_aabb) { + multimesh->aabb_dirty = true; + } + + if (!multimesh->dirty) { + multimesh->dirty_list = multimesh_dirty_list; + multimesh_dirty_list = multimesh; + multimesh->dirty = true; + } +} + +void RendererStorageRD::_multimesh_mark_all_dirty(MultiMesh *multimesh, bool p_data, bool p_aabb) { + if (p_data) { + uint32_t data_cache_dirty_region_count = (multimesh->instances - 1) / MULTIMESH_DIRTY_REGION_SIZE + 1; + + for (uint32_t i = 0; i < data_cache_dirty_region_count; i++) { + if (!multimesh->data_cache_dirty_regions[i]) { + multimesh->data_cache_dirty_regions[i] = true; + multimesh->data_cache_used_dirty_regions++; + } + } + } + + if (p_aabb) { + multimesh->aabb_dirty = true; + } + + if (!multimesh->dirty) { + multimesh->dirty_list = multimesh_dirty_list; + multimesh_dirty_list = multimesh; + multimesh->dirty = true; + } +} + +void RendererStorageRD::_multimesh_re_create_aabb(MultiMesh *multimesh, const float *p_data, int p_instances) { + ERR_FAIL_COND(multimesh->mesh.is_null()); + AABB aabb; + AABB mesh_aabb = mesh_get_aabb(multimesh->mesh); + for (int i = 0; i < p_instances; i++) { + const float *data = p_data + multimesh->stride_cache * i; + Transform t; + + if (multimesh->xform_format == RS::MULTIMESH_TRANSFORM_3D) { + t.basis.elements[0][0] = data[0]; + t.basis.elements[0][1] = data[1]; + t.basis.elements[0][2] = data[2]; + t.origin.x = data[3]; + t.basis.elements[1][0] = data[4]; + t.basis.elements[1][1] = data[5]; + t.basis.elements[1][2] = data[6]; + t.origin.y = data[7]; + t.basis.elements[2][0] = data[8]; + t.basis.elements[2][1] = data[9]; + t.basis.elements[2][2] = data[10]; + t.origin.z = data[11]; + + } else { + t.basis.elements[0].x = data[0]; + t.basis.elements[1].x = data[1]; + t.origin.x = data[3]; + + t.basis.elements[0].y = data[4]; + t.basis.elements[1].y = data[5]; + t.origin.y = data[7]; + } + + if (i == 0) { + aabb = t.xform(mesh_aabb); + } else { + aabb.merge_with(t.xform(mesh_aabb)); + } + } + + multimesh->aabb = aabb; +} + +void RendererStorageRD::multimesh_instance_set_transform(RID p_multimesh, int p_index, const Transform &p_transform) { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + ERR_FAIL_COND(!multimesh); + ERR_FAIL_INDEX(p_index, multimesh->instances); + ERR_FAIL_COND(multimesh->xform_format != RS::MULTIMESH_TRANSFORM_3D); + + _multimesh_make_local(multimesh); + + { + float *w = multimesh->data_cache.ptrw(); + + float *dataptr = w + p_index * multimesh->stride_cache; + + dataptr[0] = p_transform.basis.elements[0][0]; + dataptr[1] = p_transform.basis.elements[0][1]; + dataptr[2] = p_transform.basis.elements[0][2]; + dataptr[3] = p_transform.origin.x; + dataptr[4] = p_transform.basis.elements[1][0]; + dataptr[5] = p_transform.basis.elements[1][1]; + dataptr[6] = p_transform.basis.elements[1][2]; + dataptr[7] = p_transform.origin.y; + dataptr[8] = p_transform.basis.elements[2][0]; + dataptr[9] = p_transform.basis.elements[2][1]; + dataptr[10] = p_transform.basis.elements[2][2]; + dataptr[11] = p_transform.origin.z; + } + + _multimesh_mark_dirty(multimesh, p_index, true); +} + +void RendererStorageRD::multimesh_instance_set_transform_2d(RID p_multimesh, int p_index, const Transform2D &p_transform) { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + ERR_FAIL_COND(!multimesh); + ERR_FAIL_INDEX(p_index, multimesh->instances); + ERR_FAIL_COND(multimesh->xform_format != RS::MULTIMESH_TRANSFORM_2D); + + _multimesh_make_local(multimesh); + + { + float *w = multimesh->data_cache.ptrw(); + + float *dataptr = w + p_index * multimesh->stride_cache; + + dataptr[0] = p_transform.elements[0][0]; + dataptr[1] = p_transform.elements[1][0]; + dataptr[2] = 0; + dataptr[3] = p_transform.elements[2][0]; + dataptr[4] = p_transform.elements[0][1]; + dataptr[5] = p_transform.elements[1][1]; + dataptr[6] = 0; + dataptr[7] = p_transform.elements[2][1]; + } + + _multimesh_mark_dirty(multimesh, p_index, true); +} + +void RendererStorageRD::multimesh_instance_set_color(RID p_multimesh, int p_index, const Color &p_color) { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + ERR_FAIL_COND(!multimesh); + ERR_FAIL_INDEX(p_index, multimesh->instances); + ERR_FAIL_COND(!multimesh->uses_colors); + + _multimesh_make_local(multimesh); + + { + float *w = multimesh->data_cache.ptrw(); + + float *dataptr = w + p_index * multimesh->stride_cache + multimesh->color_offset_cache; + + dataptr[0] = p_color.r; + dataptr[1] = p_color.g; + dataptr[2] = p_color.b; + dataptr[3] = p_color.a; + } + + _multimesh_mark_dirty(multimesh, p_index, false); +} + +void RendererStorageRD::multimesh_instance_set_custom_data(RID p_multimesh, int p_index, const Color &p_color) { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + ERR_FAIL_COND(!multimesh); + ERR_FAIL_INDEX(p_index, multimesh->instances); + ERR_FAIL_COND(!multimesh->uses_custom_data); + + _multimesh_make_local(multimesh); + + { + float *w = multimesh->data_cache.ptrw(); + + float *dataptr = w + p_index * multimesh->stride_cache + multimesh->custom_data_offset_cache; + + dataptr[0] = p_color.r; + dataptr[1] = p_color.g; + dataptr[2] = p_color.b; + dataptr[3] = p_color.a; + } + + _multimesh_mark_dirty(multimesh, p_index, false); +} + +RID RendererStorageRD::multimesh_get_mesh(RID p_multimesh) const { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + ERR_FAIL_COND_V(!multimesh, RID()); + + return multimesh->mesh; +} + +Transform RendererStorageRD::multimesh_instance_get_transform(RID p_multimesh, int p_index) const { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + ERR_FAIL_COND_V(!multimesh, Transform()); + ERR_FAIL_INDEX_V(p_index, multimesh->instances, Transform()); + ERR_FAIL_COND_V(multimesh->xform_format != RS::MULTIMESH_TRANSFORM_3D, Transform()); + + _multimesh_make_local(multimesh); + + Transform t; + { + const float *r = multimesh->data_cache.ptr(); + + const float *dataptr = r + p_index * multimesh->stride_cache; + + t.basis.elements[0][0] = dataptr[0]; + t.basis.elements[0][1] = dataptr[1]; + t.basis.elements[0][2] = dataptr[2]; + t.origin.x = dataptr[3]; + t.basis.elements[1][0] = dataptr[4]; + t.basis.elements[1][1] = dataptr[5]; + t.basis.elements[1][2] = dataptr[6]; + t.origin.y = dataptr[7]; + t.basis.elements[2][0] = dataptr[8]; + t.basis.elements[2][1] = dataptr[9]; + t.basis.elements[2][2] = dataptr[10]; + t.origin.z = dataptr[11]; + } + + return t; +} + +Transform2D RendererStorageRD::multimesh_instance_get_transform_2d(RID p_multimesh, int p_index) const { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + ERR_FAIL_COND_V(!multimesh, Transform2D()); + ERR_FAIL_INDEX_V(p_index, multimesh->instances, Transform2D()); + ERR_FAIL_COND_V(multimesh->xform_format != RS::MULTIMESH_TRANSFORM_2D, Transform2D()); + + _multimesh_make_local(multimesh); + + Transform2D t; + { + const float *r = multimesh->data_cache.ptr(); + + const float *dataptr = r + p_index * multimesh->stride_cache; + + t.elements[0][0] = dataptr[0]; + t.elements[1][0] = dataptr[1]; + t.elements[2][0] = dataptr[3]; + t.elements[0][1] = dataptr[4]; + t.elements[1][1] = dataptr[5]; + t.elements[2][1] = dataptr[7]; + } + + return t; +} + +Color RendererStorageRD::multimesh_instance_get_color(RID p_multimesh, int p_index) const { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + ERR_FAIL_COND_V(!multimesh, Color()); + ERR_FAIL_INDEX_V(p_index, multimesh->instances, Color()); + ERR_FAIL_COND_V(!multimesh->uses_colors, Color()); + + _multimesh_make_local(multimesh); + + Color c; + { + const float *r = multimesh->data_cache.ptr(); + + const float *dataptr = r + p_index * multimesh->stride_cache + multimesh->color_offset_cache; + + c.r = dataptr[0]; + c.g = dataptr[1]; + c.b = dataptr[2]; + c.a = dataptr[3]; + } + + return c; +} + +Color RendererStorageRD::multimesh_instance_get_custom_data(RID p_multimesh, int p_index) const { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + ERR_FAIL_COND_V(!multimesh, Color()); + ERR_FAIL_INDEX_V(p_index, multimesh->instances, Color()); + ERR_FAIL_COND_V(!multimesh->uses_custom_data, Color()); + + _multimesh_make_local(multimesh); + + Color c; + { + const float *r = multimesh->data_cache.ptr(); + + const float *dataptr = r + p_index * multimesh->stride_cache + multimesh->custom_data_offset_cache; + + c.r = dataptr[0]; + c.g = dataptr[1]; + c.b = dataptr[2]; + c.a = dataptr[3]; + } + + return c; +} + +void RendererStorageRD::multimesh_set_buffer(RID p_multimesh, const Vector<float> &p_buffer) { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + ERR_FAIL_COND(!multimesh); + ERR_FAIL_COND(p_buffer.size() != (multimesh->instances * (int)multimesh->stride_cache)); + + { + const float *r = p_buffer.ptr(); + RD::get_singleton()->buffer_update(multimesh->buffer, 0, p_buffer.size() * sizeof(float), r); + multimesh->buffer_set = true; + } + + if (multimesh->data_cache.size()) { + //if we have a data cache, just update it + multimesh->data_cache = p_buffer; + { + //clear dirty since nothing will be dirty anymore + uint32_t data_cache_dirty_region_count = (multimesh->instances - 1) / MULTIMESH_DIRTY_REGION_SIZE + 1; + for (uint32_t i = 0; i < data_cache_dirty_region_count; i++) { + multimesh->data_cache_dirty_regions[i] = false; + } + multimesh->data_cache_used_dirty_regions = 0; + } + + _multimesh_mark_all_dirty(multimesh, false, true); //update AABB + } else if (multimesh->mesh.is_valid()) { + //if we have a mesh set, we need to re-generate the AABB from the new data + const float *data = p_buffer.ptr(); + + _multimesh_re_create_aabb(multimesh, data, multimesh->instances); + multimesh->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); + } +} + +Vector<float> RendererStorageRD::multimesh_get_buffer(RID p_multimesh) const { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + ERR_FAIL_COND_V(!multimesh, Vector<float>()); + if (multimesh->buffer.is_null()) { + return Vector<float>(); + } else if (multimesh->data_cache.size()) { + return multimesh->data_cache; + } else { + //get from memory + + Vector<uint8_t> buffer = RD::get_singleton()->buffer_get_data(multimesh->buffer); + Vector<float> ret; + ret.resize(multimesh->instances * multimesh->stride_cache); + { + float *w = ret.ptrw(); + const uint8_t *r = buffer.ptr(); + copymem(w, r, buffer.size()); + } + + return ret; + } +} + +void RendererStorageRD::multimesh_set_visible_instances(RID p_multimesh, int p_visible) { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + ERR_FAIL_COND(!multimesh); + ERR_FAIL_COND(p_visible < -1 || p_visible > multimesh->instances); + if (multimesh->visible_instances == p_visible) { + return; + } + + if (multimesh->data_cache.size()) { + //there is a data cache.. + _multimesh_mark_all_dirty(multimesh, false, true); + } + + multimesh->visible_instances = p_visible; + + multimesh->dependency.changed_notify(DEPENDENCY_CHANGED_MULTIMESH_VISIBLE_INSTANCES); +} + +int RendererStorageRD::multimesh_get_visible_instances(RID p_multimesh) const { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + ERR_FAIL_COND_V(!multimesh, 0); + return multimesh->visible_instances; +} + +AABB RendererStorageRD::multimesh_get_aabb(RID p_multimesh) const { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + ERR_FAIL_COND_V(!multimesh, AABB()); + if (multimesh->aabb_dirty) { + const_cast<RendererStorageRD *>(this)->_update_dirty_multimeshes(); + } + return multimesh->aabb; +} + +void RendererStorageRD::_update_dirty_multimeshes() { + while (multimesh_dirty_list) { + MultiMesh *multimesh = multimesh_dirty_list; + + if (multimesh->data_cache.size()) { //may have been cleared, so only process if it exists + const float *data = multimesh->data_cache.ptr(); + + uint32_t visible_instances = multimesh->visible_instances >= 0 ? multimesh->visible_instances : multimesh->instances; + + if (multimesh->data_cache_used_dirty_regions) { + uint32_t data_cache_dirty_region_count = (multimesh->instances - 1) / MULTIMESH_DIRTY_REGION_SIZE + 1; + uint32_t visible_region_count = (visible_instances - 1) / MULTIMESH_DIRTY_REGION_SIZE + 1; + + uint32_t region_size = multimesh->stride_cache * MULTIMESH_DIRTY_REGION_SIZE * sizeof(float); + + if (multimesh->data_cache_used_dirty_regions > 32 || multimesh->data_cache_used_dirty_regions > visible_region_count / 2) { + //if there too many dirty regions, or represent the majority of regions, just copy all, else transfer cost piles up too much + RD::get_singleton()->buffer_update(multimesh->buffer, 0, MIN(visible_region_count * region_size, multimesh->instances * multimesh->stride_cache * sizeof(float)), data); + } else { + //not that many regions? update them all + for (uint32_t i = 0; i < visible_region_count; i++) { + if (multimesh->data_cache_dirty_regions[i]) { + uint64_t offset = i * region_size; + uint64_t size = multimesh->stride_cache * multimesh->instances * sizeof(float); + RD::get_singleton()->buffer_update(multimesh->buffer, offset, MIN(region_size, size - offset), &data[i * region_size]); + } + } + } + + for (uint32_t i = 0; i < data_cache_dirty_region_count; i++) { + multimesh->data_cache_dirty_regions[i] = false; + } + + multimesh->data_cache_used_dirty_regions = 0; + } + + if (multimesh->aabb_dirty) { + //aabb is dirty.. + _multimesh_re_create_aabb(multimesh, data, visible_instances); + multimesh->aabb_dirty = false; + multimesh->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); + } + } + + multimesh_dirty_list = multimesh->dirty_list; + + multimesh->dirty_list = nullptr; + multimesh->dirty = false; + } + + multimesh_dirty_list = nullptr; +} + +/* PARTICLES */ + +RID RendererStorageRD::particles_allocate() { + return particles_owner.allocate_rid(); +} +void RendererStorageRD::particles_initialize(RID p_rid) { + particles_owner.initialize_rid(p_rid, Particles()); +} + +void RendererStorageRD::particles_set_emitting(RID p_particles, bool p_emitting) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->emitting = p_emitting; +} + +bool RendererStorageRD::particles_get_emitting(RID p_particles) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND_V(!particles, false); + + return particles->emitting; +} + +void RendererStorageRD::_particles_free_data(Particles *particles) { + if (!particles->particle_buffer.is_valid()) { + return; + } + RD::get_singleton()->free(particles->particle_buffer); + RD::get_singleton()->free(particles->frame_params_buffer); + RD::get_singleton()->free(particles->particle_instance_buffer); + particles->particles_transforms_buffer_uniform_set = RID(); + particles->particle_buffer = RID(); + + if (RD::get_singleton()->uniform_set_is_valid(particles->collision_textures_uniform_set)) { + RD::get_singleton()->free(particles->collision_textures_uniform_set); + } + + if (particles->particles_sort_buffer.is_valid()) { + RD::get_singleton()->free(particles->particles_sort_buffer); + particles->particles_sort_buffer = RID(); + } + + if (particles->emission_buffer != nullptr) { + particles->emission_buffer = nullptr; + particles->emission_buffer_data.clear(); + RD::get_singleton()->free(particles->emission_storage_buffer); + particles->emission_storage_buffer = RID(); + } +} + +void RendererStorageRD::particles_set_amount(RID p_particles, int p_amount) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + if (particles->amount == p_amount) { + return; + } + + _particles_free_data(particles); + + particles->amount = p_amount; + + if (particles->amount > 0) { + particles->particle_buffer = RD::get_singleton()->storage_buffer_create(sizeof(ParticleData) * p_amount); + particles->frame_params_buffer = RD::get_singleton()->storage_buffer_create(sizeof(ParticlesFrameParams) * 1); + particles->particle_instance_buffer = RD::get_singleton()->storage_buffer_create(sizeof(float) * 4 * (3 + 1 + 1) * p_amount); + //needs to clear it + + { + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 1; + u.ids.push_back(particles->particle_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + u.ids.push_back(particles->particle_instance_buffer); + uniforms.push_back(u); + } + + particles->particles_copy_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, particles_shader.copy_shader.version_get_shader(particles_shader.copy_shader_version, 0), 0); + } + } + + particles->prev_ticks = 0; + particles->phase = 0; + particles->prev_phase = 0; + particles->clear = true; +} + +void RendererStorageRD::particles_set_lifetime(RID p_particles, float p_lifetime) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + particles->lifetime = p_lifetime; +} + +void RendererStorageRD::particles_set_one_shot(RID p_particles, bool p_one_shot) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + particles->one_shot = p_one_shot; +} + +void RendererStorageRD::particles_set_pre_process_time(RID p_particles, float p_time) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + particles->pre_process_time = p_time; +} +void RendererStorageRD::particles_set_explosiveness_ratio(RID p_particles, float p_ratio) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + particles->explosiveness = p_ratio; +} +void RendererStorageRD::particles_set_randomness_ratio(RID p_particles, float p_ratio) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + particles->randomness = p_ratio; +} + +void RendererStorageRD::particles_set_custom_aabb(RID p_particles, const AABB &p_aabb) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + particles->custom_aabb = p_aabb; + particles->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); +} + +void RendererStorageRD::particles_set_speed_scale(RID p_particles, float p_scale) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->speed_scale = p_scale; +} +void RendererStorageRD::particles_set_use_local_coordinates(RID p_particles, bool p_enable) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->use_local_coords = p_enable; +} + +void RendererStorageRD::particles_set_fixed_fps(RID p_particles, int p_fps) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->fixed_fps = p_fps; +} + +void RendererStorageRD::particles_set_fractional_delta(RID p_particles, bool p_enable) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->fractional_delta = p_enable; +} + +void RendererStorageRD::particles_set_collision_base_size(RID p_particles, float p_size) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->collision_base_size = p_size; +} + +void RendererStorageRD::particles_set_process_material(RID p_particles, RID p_material) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->process_material = p_material; +} + +void RendererStorageRD::particles_set_draw_order(RID p_particles, RS::ParticlesDrawOrder p_order) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->draw_order = p_order; +} + +void RendererStorageRD::particles_set_draw_passes(RID p_particles, int p_passes) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->draw_passes.resize(p_passes); +} + +void RendererStorageRD::particles_set_draw_pass_mesh(RID p_particles, int p_pass, RID p_mesh) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + ERR_FAIL_INDEX(p_pass, particles->draw_passes.size()); + particles->draw_passes.write[p_pass] = p_mesh; +} + +void RendererStorageRD::particles_restart(RID p_particles) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->restart_request = true; +} + +void RendererStorageRD::_particles_allocate_emission_buffer(Particles *particles) { + ERR_FAIL_COND(particles->emission_buffer != nullptr); + + particles->emission_buffer_data.resize(sizeof(ParticleEmissionBuffer::Data) * particles->amount + sizeof(uint32_t) * 4); + zeromem(particles->emission_buffer_data.ptrw(), particles->emission_buffer_data.size()); + particles->emission_buffer = (ParticleEmissionBuffer *)particles->emission_buffer_data.ptrw(); + particles->emission_buffer->particle_max = particles->amount; + + particles->emission_storage_buffer = RD::get_singleton()->storage_buffer_create(particles->emission_buffer_data.size(), particles->emission_buffer_data); + + if (RD::get_singleton()->uniform_set_is_valid(particles->particles_material_uniform_set)) { + //will need to be re-created + RD::get_singleton()->free(particles->particles_material_uniform_set); + particles->particles_material_uniform_set = RID(); + } +} + +void RendererStorageRD::particles_set_subemitter(RID p_particles, RID p_subemitter_particles) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + ERR_FAIL_COND(p_particles == p_subemitter_particles); + + particles->sub_emitter = p_subemitter_particles; + + if (RD::get_singleton()->uniform_set_is_valid(particles->particles_material_uniform_set)) { + RD::get_singleton()->free(particles->particles_material_uniform_set); + particles->particles_material_uniform_set = RID(); //clear and force to re create sub emitting + } +} + +void RendererStorageRD::particles_emit(RID p_particles, const Transform &p_transform, const Vector3 &p_velocity, const Color &p_color, const Color &p_custom, uint32_t p_emit_flags) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + ERR_FAIL_COND(particles->amount == 0); + + if (particles->emitting) { + particles->clear = true; + particles->emitting = false; + } + + if (particles->emission_buffer == nullptr) { + _particles_allocate_emission_buffer(particles); + } + + if (particles->inactive) { + //in case it was inactive, make active again + particles->inactive = false; + particles->inactive_time = 0; + } + + int32_t idx = particles->emission_buffer->particle_count; + if (idx < particles->emission_buffer->particle_max) { + store_transform(p_transform, particles->emission_buffer->data[idx].xform); + + particles->emission_buffer->data[idx].velocity[0] = p_velocity.x; + particles->emission_buffer->data[idx].velocity[1] = p_velocity.y; + particles->emission_buffer->data[idx].velocity[2] = p_velocity.z; + + particles->emission_buffer->data[idx].custom[0] = p_custom.r; + particles->emission_buffer->data[idx].custom[1] = p_custom.g; + particles->emission_buffer->data[idx].custom[2] = p_custom.b; + particles->emission_buffer->data[idx].custom[3] = p_custom.a; + + particles->emission_buffer->data[idx].color[0] = p_color.r; + particles->emission_buffer->data[idx].color[1] = p_color.g; + particles->emission_buffer->data[idx].color[2] = p_color.b; + particles->emission_buffer->data[idx].color[3] = p_color.a; + + particles->emission_buffer->data[idx].flags = p_emit_flags; + particles->emission_buffer->particle_count++; + } +} + +void RendererStorageRD::particles_request_process(RID p_particles) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + if (!particles->dirty) { + particles->dirty = true; + particles->update_list = particle_update_list; + particle_update_list = particles; + } +} + +AABB RendererStorageRD::particles_get_current_aabb(RID p_particles) { + const Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND_V(!particles, AABB()); + + Vector<ParticleData> data; + data.resize(particles->amount); + + Vector<uint8_t> buffer = RD::get_singleton()->buffer_get_data(particles->particle_buffer); + + Transform inv = particles->emission_transform.affine_inverse(); + + AABB aabb; + if (buffer.size()) { + bool first = true; + const ParticleData *particle_data = (const ParticleData *)data.ptr(); + for (int i = 0; i < particles->amount; i++) { + if (particle_data[i].active) { + Vector3 pos = Vector3(particle_data[i].xform[12], particle_data[i].xform[13], particle_data[i].xform[14]); + if (!particles->use_local_coords) { + pos = inv.xform(pos); + } + if (first) { + aabb.position = pos; + first = false; + } else { + aabb.expand_to(pos); + } + } + } + } + + float longest_axis_size = 0; + for (int i = 0; i < particles->draw_passes.size(); i++) { + if (particles->draw_passes[i].is_valid()) { + AABB maabb = mesh_get_aabb(particles->draw_passes[i], RID()); + longest_axis_size = MAX(maabb.get_longest_axis_size(), longest_axis_size); + } + } + + aabb.grow_by(longest_axis_size); + + return aabb; +} + +AABB RendererStorageRD::particles_get_aabb(RID p_particles) const { + const Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND_V(!particles, AABB()); + + return particles->custom_aabb; +} + +void RendererStorageRD::particles_set_emission_transform(RID p_particles, const Transform &p_transform) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->emission_transform = p_transform; +} + +int RendererStorageRD::particles_get_draw_passes(RID p_particles) const { + const Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND_V(!particles, 0); + + return particles->draw_passes.size(); +} + +RID RendererStorageRD::particles_get_draw_pass_mesh(RID p_particles, int p_pass) const { + const Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND_V(!particles, RID()); + ERR_FAIL_INDEX_V(p_pass, particles->draw_passes.size(), RID()); + + return particles->draw_passes[p_pass]; +} + +void RendererStorageRD::particles_add_collision(RID p_particles, RID p_particles_collision_instance) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->collisions.insert(p_particles_collision_instance); +} + +void RendererStorageRD::particles_remove_collision(RID p_particles, RID p_particles_collision_instance) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->collisions.erase(p_particles_collision_instance); +} + +void RendererStorageRD::_particles_process(Particles *p_particles, float p_delta) { + if (p_particles->particles_material_uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(p_particles->particles_material_uniform_set)) { + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 0; + u.ids.push_back(p_particles->frame_params_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 1; + u.ids.push_back(p_particles->particle_buffer); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + if (p_particles->emission_storage_buffer.is_valid()) { + u.ids.push_back(p_particles->emission_storage_buffer); + } else { + u.ids.push_back(default_rd_storage_buffer); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 3; + Particles *sub_emitter = particles_owner.getornull(p_particles->sub_emitter); + if (sub_emitter) { + if (sub_emitter->emission_buffer == nullptr) { //no emission buffer, allocate emission buffer + _particles_allocate_emission_buffer(sub_emitter); + } + u.ids.push_back(sub_emitter->emission_storage_buffer); + } else { + u.ids.push_back(default_rd_storage_buffer); + } + uniforms.push_back(u); + } + + p_particles->particles_material_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, particles_shader.default_shader_rd, 1); + } + + float new_phase = Math::fmod((float)p_particles->phase + (p_delta / p_particles->lifetime) * p_particles->speed_scale, (float)1.0); + + ParticlesFrameParams &frame_params = p_particles->frame_params; + + if (p_particles->clear) { + p_particles->cycle_number = 0; + p_particles->random_seed = Math::rand(); + } else if (new_phase < p_particles->phase) { + if (p_particles->one_shot) { + p_particles->emitting = false; + } + p_particles->cycle_number++; + } + + frame_params.emitting = p_particles->emitting; + frame_params.system_phase = new_phase; + frame_params.prev_system_phase = p_particles->phase; + + p_particles->phase = new_phase; + + frame_params.time = RendererCompositorRD::singleton->get_total_time(); + frame_params.delta = p_delta * p_particles->speed_scale; + frame_params.random_seed = p_particles->random_seed; + frame_params.explosiveness = p_particles->explosiveness; + frame_params.randomness = p_particles->randomness; + + if (p_particles->use_local_coords) { + store_transform(Transform(), frame_params.emission_transform); + } else { + store_transform(p_particles->emission_transform, frame_params.emission_transform); + } + + frame_params.cycle = p_particles->cycle_number; + + { //collision and attractors + + frame_params.collider_count = 0; + frame_params.attractor_count = 0; + frame_params.particle_size = p_particles->collision_base_size; + + RID collision_3d_textures[ParticlesFrameParams::MAX_3D_TEXTURES]; + RID collision_heightmap_texture; + + Transform to_particles; + if (p_particles->use_local_coords) { + to_particles = p_particles->emission_transform.affine_inverse(); + } + uint32_t collision_3d_textures_used = 0; + for (const Set<RID>::Element *E = p_particles->collisions.front(); E; E = E->next()) { + ParticlesCollisionInstance *pci = particles_collision_instance_owner.getornull(E->get()); + if (!pci || !pci->active) { + continue; + } + ParticlesCollision *pc = particles_collision_owner.getornull(pci->collision); + ERR_CONTINUE(!pc); + + Transform to_collider = pci->transform; + if (p_particles->use_local_coords) { + to_collider = to_particles * to_collider; + } + Vector3 scale = to_collider.basis.get_scale(); + to_collider.basis.orthonormalize(); + + if (pc->type <= RS::PARTICLES_COLLISION_TYPE_VECTOR_FIELD_ATTRACT) { + //attractor + if (frame_params.attractor_count >= ParticlesFrameParams::MAX_ATTRACTORS) { + continue; + } + + ParticlesFrameParams::Attractor &attr = frame_params.attractors[frame_params.attractor_count]; + + store_transform(to_collider, attr.transform); + attr.strength = pc->attractor_strength; + attr.attenuation = pc->attractor_attenuation; + attr.directionality = pc->attractor_directionality; + + switch (pc->type) { + case RS::PARTICLES_COLLISION_TYPE_SPHERE_ATTRACT: { + attr.type = ParticlesFrameParams::ATTRACTOR_TYPE_SPHERE; + float radius = pc->radius; + radius *= (scale.x + scale.y + scale.z) / 3.0; + attr.extents[0] = radius; + attr.extents[1] = radius; + attr.extents[2] = radius; + } break; + case RS::PARTICLES_COLLISION_TYPE_BOX_ATTRACT: { + attr.type = ParticlesFrameParams::ATTRACTOR_TYPE_BOX; + Vector3 extents = pc->extents * scale; + attr.extents[0] = extents.x; + attr.extents[1] = extents.y; + attr.extents[2] = extents.z; + } break; + case RS::PARTICLES_COLLISION_TYPE_VECTOR_FIELD_ATTRACT: { + if (collision_3d_textures_used >= ParticlesFrameParams::MAX_3D_TEXTURES) { + continue; + } + attr.type = ParticlesFrameParams::ATTRACTOR_TYPE_VECTOR_FIELD; + Vector3 extents = pc->extents * scale; + attr.extents[0] = extents.x; + attr.extents[1] = extents.y; + attr.extents[2] = extents.z; + attr.texture_index = collision_3d_textures_used; + + collision_3d_textures[collision_3d_textures_used] = pc->field_texture; + collision_3d_textures_used++; + } break; + default: { + } + } + + frame_params.attractor_count++; + } else { + //collider + if (frame_params.collider_count >= ParticlesFrameParams::MAX_COLLIDERS) { + continue; + } + + ParticlesFrameParams::Collider &col = frame_params.colliders[frame_params.collider_count]; + + store_transform(to_collider, col.transform); + switch (pc->type) { + case RS::PARTICLES_COLLISION_TYPE_SPHERE_COLLIDE: { + col.type = ParticlesFrameParams::COLLISION_TYPE_SPHERE; + float radius = pc->radius; + radius *= (scale.x + scale.y + scale.z) / 3.0; + col.extents[0] = radius; + col.extents[1] = radius; + col.extents[2] = radius; + } break; + case RS::PARTICLES_COLLISION_TYPE_BOX_COLLIDE: { + col.type = ParticlesFrameParams::COLLISION_TYPE_BOX; + Vector3 extents = pc->extents * scale; + col.extents[0] = extents.x; + col.extents[1] = extents.y; + col.extents[2] = extents.z; + } break; + case RS::PARTICLES_COLLISION_TYPE_SDF_COLLIDE: { + if (collision_3d_textures_used >= ParticlesFrameParams::MAX_3D_TEXTURES) { + continue; + } + col.type = ParticlesFrameParams::COLLISION_TYPE_SDF; + Vector3 extents = pc->extents * scale; + col.extents[0] = extents.x; + col.extents[1] = extents.y; + col.extents[2] = extents.z; + col.texture_index = collision_3d_textures_used; + col.scale = (scale.x + scale.y + scale.z) * 0.333333333333; //non uniform scale non supported + + collision_3d_textures[collision_3d_textures_used] = pc->field_texture; + collision_3d_textures_used++; + } break; + case RS::PARTICLES_COLLISION_TYPE_HEIGHTFIELD_COLLIDE: { + if (collision_heightmap_texture != RID()) { //already taken + continue; + } + + col.type = ParticlesFrameParams::COLLISION_TYPE_HEIGHT_FIELD; + Vector3 extents = pc->extents * scale; + col.extents[0] = extents.x; + col.extents[1] = extents.y; + col.extents[2] = extents.z; + collision_heightmap_texture = pc->heightfield_texture; + } break; + default: { + } + } + + frame_params.collider_count++; + } + } + + bool different = false; + if (collision_3d_textures_used == p_particles->collision_3d_textures_used) { + for (int i = 0; i < ParticlesFrameParams::MAX_3D_TEXTURES; i++) { + if (p_particles->collision_3d_textures[i] != collision_3d_textures[i]) { + different = true; + break; + } + } + } + + if (collision_heightmap_texture != p_particles->collision_heightmap_texture) { + different = true; + } + + bool uniform_set_valid = RD::get_singleton()->uniform_set_is_valid(p_particles->collision_textures_uniform_set); + + if (different || !uniform_set_valid) { + if (uniform_set_valid) { + RD::get_singleton()->free(p_particles->collision_textures_uniform_set); + } + + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 0; + for (uint32_t i = 0; i < ParticlesFrameParams::MAX_3D_TEXTURES; i++) { + RID rd_tex; + if (i < collision_3d_textures_used) { + Texture *t = texture_owner.getornull(collision_3d_textures[i]); + if (t && t->type == Texture::TYPE_3D) { + rd_tex = t->rd_texture; + } + } + + if (rd_tex == RID()) { + rd_tex = default_rd_textures[DEFAULT_RD_TEXTURE_3D_WHITE]; + } + u.ids.push_back(rd_tex); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 1; + if (collision_heightmap_texture.is_valid()) { + u.ids.push_back(collision_heightmap_texture); + } else { + u.ids.push_back(default_rd_textures[DEFAULT_RD_TEXTURE_BLACK]); + } + uniforms.push_back(u); + } + p_particles->collision_textures_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, particles_shader.default_shader_rd, 2); + } + } + + ParticlesShader::PushConstant push_constant; + + push_constant.clear = p_particles->clear; + push_constant.total_particles = p_particles->amount; + push_constant.lifetime = p_particles->lifetime; + push_constant.trail_size = 1; + push_constant.use_fractional_delta = p_particles->fractional_delta; + push_constant.sub_emitter_mode = !p_particles->emitting && p_particles->emission_buffer && (p_particles->emission_buffer->particle_count > 0 || p_particles->force_sub_emit); + + p_particles->force_sub_emit = false; //reset + + Particles *sub_emitter = particles_owner.getornull(p_particles->sub_emitter); + + if (sub_emitter && sub_emitter->emission_storage_buffer.is_valid()) { + // print_line("updating subemitter buffer"); + int32_t zero[4] = { 0, sub_emitter->amount, 0, 0 }; + RD::get_singleton()->buffer_update(sub_emitter->emission_storage_buffer, 0, sizeof(uint32_t) * 4, zero); + push_constant.can_emit = true; + + if (sub_emitter->emitting) { + sub_emitter->emitting = false; + sub_emitter->clear = true; //will need to clear if it was emitting, sorry + } + //make sure the sub emitter processes particles too + sub_emitter->inactive = false; + sub_emitter->inactive_time = 0; + + sub_emitter->force_sub_emit = true; + + } else { + push_constant.can_emit = false; + } + + if (p_particles->emission_buffer && p_particles->emission_buffer->particle_count) { + RD::get_singleton()->buffer_update(p_particles->emission_storage_buffer, 0, sizeof(uint32_t) * 4 + sizeof(ParticleEmissionBuffer::Data) * p_particles->emission_buffer->particle_count, p_particles->emission_buffer); + p_particles->emission_buffer->particle_count = 0; + } + + p_particles->clear = false; + + RD::get_singleton()->buffer_update(p_particles->frame_params_buffer, 0, sizeof(ParticlesFrameParams), &frame_params); + + ParticlesMaterialData *m = (ParticlesMaterialData *)material_get_data(p_particles->process_material, SHADER_TYPE_PARTICLES); + if (!m) { + m = (ParticlesMaterialData *)material_get_data(particles_shader.default_material, SHADER_TYPE_PARTICLES); + } + + ERR_FAIL_COND(!m); + + //todo should maybe compute all particle systems together? + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, m->shader_data->pipeline); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles_shader.base_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, p_particles->particles_material_uniform_set, 1); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, p_particles->collision_textures_uniform_set, 2); + + if (m->uniform_set.is_valid()) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, m->uniform_set, 3); + } + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ParticlesShader::PushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_particles->amount, 1, 1); + + RD::get_singleton()->compute_list_end(); +} + +void RendererStorageRD::particles_set_view_axis(RID p_particles, const Vector3 &p_axis) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + if (particles->draw_order != RS::PARTICLES_DRAW_ORDER_VIEW_DEPTH) { + return; //uninteresting for other modes + } + + //copy to sort buffer + if (particles->particles_sort_buffer == RID()) { + uint32_t size = particles->amount; + if (size & 1) { + size++; //make multiple of 16 + } + size *= sizeof(float) * 2; + particles->particles_sort_buffer = RD::get_singleton()->storage_buffer_create(size); + { + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 0; + u.ids.push_back(particles->particles_sort_buffer); + uniforms.push_back(u); + } + + particles->particles_sort_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, particles_shader.copy_shader.version_get_shader(particles_shader.copy_shader_version, ParticlesShader::COPY_MODE_FILL_SORT_BUFFER), 1); + } + } + + Vector3 axis = -p_axis; // cameras look to z negative + + if (particles->use_local_coords) { + axis = particles->emission_transform.basis.xform_inv(axis).normalized(); + } + + ParticlesShader::CopyPushConstant copy_push_constant; + copy_push_constant.total_particles = particles->amount; + copy_push_constant.sort_direction[0] = axis.x; + copy_push_constant.sort_direction[1] = axis.y; + copy_push_constant.sort_direction[2] = axis.z; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, particles_shader.copy_pipelines[ParticlesShader::COPY_MODE_FILL_SORT_BUFFER]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_copy_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_sort_uniform_set, 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, ©_push_constant, sizeof(ParticlesShader::CopyPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1); + + RD::get_singleton()->compute_list_end(); + + effects.sort_buffer(particles->particles_sort_uniform_set, particles->amount); + + compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, particles_shader.copy_pipelines[ParticlesShader::COPY_MODE_FILL_INSTANCES_WITH_SORT_BUFFER]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_copy_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_sort_uniform_set, 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, ©_push_constant, sizeof(ParticlesShader::CopyPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1); + + RD::get_singleton()->compute_list_end(); +} + +void RendererStorageRD::update_particles() { + while (particle_update_list) { + //use transform feedback to process particles + + Particles *particles = particle_update_list; + + //take and remove + particle_update_list = particles->update_list; + particles->update_list = nullptr; + particles->dirty = false; + + if (particles->restart_request) { + particles->prev_ticks = 0; + particles->phase = 0; + particles->prev_phase = 0; + particles->clear = true; + particles->restart_request = false; + } + + if (particles->inactive && !particles->emitting) { + //go next + continue; + } + + if (particles->emitting) { + if (particles->inactive) { + //restart system from scratch + particles->prev_ticks = 0; + particles->phase = 0; + particles->prev_phase = 0; + particles->clear = true; + } + particles->inactive = false; + particles->inactive_time = 0; + } else { + particles->inactive_time += particles->speed_scale * RendererCompositorRD::singleton->get_frame_delta_time(); + if (particles->inactive_time > particles->lifetime * 1.2) { + particles->inactive = true; + continue; + } + } + + bool zero_time_scale = Engine::get_singleton()->get_time_scale() <= 0.0; + + if (particles->clear && particles->pre_process_time > 0.0) { + float frame_time; + if (particles->fixed_fps > 0) + frame_time = 1.0 / particles->fixed_fps; + else + frame_time = 1.0 / 30.0; + + float todo = particles->pre_process_time; + + while (todo >= 0) { + _particles_process(particles, frame_time); + todo -= frame_time; + } + } + + if (particles->fixed_fps > 0) { + float frame_time; + float decr; + if (zero_time_scale) { + frame_time = 0.0; + decr = 1.0 / particles->fixed_fps; + } else { + frame_time = 1.0 / particles->fixed_fps; + decr = frame_time; + } + float delta = RendererCompositorRD::singleton->get_frame_delta_time(); + if (delta > 0.1) { //avoid recursive stalls if fps goes below 10 + delta = 0.1; + } else if (delta <= 0.0) { //unlikely but.. + delta = 0.001; + } + float todo = particles->frame_remainder + delta; + + while (todo >= frame_time) { + _particles_process(particles, frame_time); + todo -= decr; + } + + particles->frame_remainder = todo; + + } else { + if (zero_time_scale) + _particles_process(particles, 0.0); + else + _particles_process(particles, RendererCompositorRD::singleton->get_frame_delta_time()); + } + + //copy particles to instance buffer + + if (particles->draw_order != RS::PARTICLES_DRAW_ORDER_VIEW_DEPTH) { + ParticlesShader::CopyPushConstant copy_push_constant; + copy_push_constant.total_particles = particles->amount; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, particles_shader.copy_pipelines[ParticlesShader::COPY_MODE_FILL_INSTANCES]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_copy_uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, ©_push_constant, sizeof(ParticlesShader::CopyPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1); + + RD::get_singleton()->compute_list_end(); + } + + particles->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); + } +} + +bool RendererStorageRD::particles_is_inactive(RID p_particles) const { + const Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND_V(!particles, false); + return !particles->emitting && particles->inactive; +} + +/* SKY SHADER */ + +void RendererStorageRD::ParticlesShaderData::set_code(const String &p_code) { + //compile + + code = p_code; + valid = false; + ubo_size = 0; + uniforms.clear(); + + if (code == String()) { + return; //just invalid, but no error + } + + ShaderCompilerRD::GeneratedCode gen_code; + ShaderCompilerRD::IdentifierActions actions; + + /* + uses_time = false; + + actions.render_mode_flags["use_half_res_pass"] = &uses_half_res; + actions.render_mode_flags["use_quarter_res_pass"] = &uses_quarter_res; + + actions.usage_flag_pointers["TIME"] = &uses_time; +*/ + + actions.uniforms = &uniforms; + + Error err = base_singleton->particles_shader.compiler.compile(RS::SHADER_PARTICLES, code, &actions, path, gen_code); + + ERR_FAIL_COND(err != OK); + + if (version.is_null()) { + version = base_singleton->particles_shader.shader.version_create(); + } + + base_singleton->particles_shader.shader.version_set_compute_code(version, gen_code.uniforms, gen_code.compute_global, gen_code.compute, gen_code.defines); + ERR_FAIL_COND(!base_singleton->particles_shader.shader.version_is_valid(version)); + + ubo_size = gen_code.uniform_total_size; + ubo_offsets = gen_code.uniform_offsets; + texture_uniforms = gen_code.texture_uniforms; + + //update pipelines + + pipeline = RD::get_singleton()->compute_pipeline_create(base_singleton->particles_shader.shader.version_get_shader(version, 0)); + + valid = true; +} + +void RendererStorageRD::ParticlesShaderData::set_default_texture_param(const StringName &p_name, RID p_texture) { + if (!p_texture.is_valid()) { + default_texture_params.erase(p_name); + } else { + default_texture_params[p_name] = p_texture; + } +} + +void RendererStorageRD::ParticlesShaderData::get_param_list(List<PropertyInfo> *p_param_list) const { + Map<int, StringName> order; + + for (Map<StringName, ShaderLanguage::ShaderNode::Uniform>::Element *E = uniforms.front(); E; E = E->next()) { + if (E->get().scope == ShaderLanguage::ShaderNode::Uniform::SCOPE_GLOBAL || E->get().scope == ShaderLanguage::ShaderNode::Uniform::SCOPE_INSTANCE) { + continue; + } + + if (E->get().texture_order >= 0) { + order[E->get().texture_order + 100000] = E->key(); + } else { + order[E->get().order] = E->key(); + } + } + + for (Map<int, StringName>::Element *E = order.front(); E; E = E->next()) { + PropertyInfo pi = ShaderLanguage::uniform_to_property_info(uniforms[E->get()]); + pi.name = E->get(); + p_param_list->push_back(pi); + } +} + +void RendererStorageRD::ParticlesShaderData::get_instance_param_list(List<RendererStorage::InstanceShaderParam> *p_param_list) const { + for (Map<StringName, ShaderLanguage::ShaderNode::Uniform>::Element *E = uniforms.front(); E; E = E->next()) { + if (E->get().scope != ShaderLanguage::ShaderNode::Uniform::SCOPE_INSTANCE) { + continue; + } + + RendererStorage::InstanceShaderParam p; + p.info = ShaderLanguage::uniform_to_property_info(E->get()); + p.info.name = E->key(); //supply name + p.index = E->get().instance_index; + p.default_value = ShaderLanguage::constant_value_to_variant(E->get().default_value, E->get().type, E->get().hint); + p_param_list->push_back(p); + } +} + +bool RendererStorageRD::ParticlesShaderData::is_param_texture(const StringName &p_param) const { + if (!uniforms.has(p_param)) { + return false; + } + + return uniforms[p_param].texture_order >= 0; +} + +bool RendererStorageRD::ParticlesShaderData::is_animated() const { + return false; +} + +bool RendererStorageRD::ParticlesShaderData::casts_shadows() const { + return false; +} + +Variant RendererStorageRD::ParticlesShaderData::get_default_parameter(const StringName &p_parameter) const { + if (uniforms.has(p_parameter)) { + ShaderLanguage::ShaderNode::Uniform uniform = uniforms[p_parameter]; + Vector<ShaderLanguage::ConstantNode::Value> default_value = uniform.default_value; + return ShaderLanguage::constant_value_to_variant(default_value, uniform.type, uniform.hint); + } + return Variant(); +} + +RS::ShaderNativeSourceCode RendererStorageRD::ParticlesShaderData::get_native_source_code() const { + return base_singleton->particles_shader.shader.version_get_native_source_code(version); +} + +RendererStorageRD::ParticlesShaderData::ParticlesShaderData() { + valid = false; +} + +RendererStorageRD::ParticlesShaderData::~ParticlesShaderData() { + //pipeline variants will clear themselves if shader is gone + if (version.is_valid()) { + base_singleton->particles_shader.shader.version_free(version); + } +} + +RendererStorageRD::ShaderData *RendererStorageRD::_create_particles_shader_func() { + ParticlesShaderData *shader_data = memnew(ParticlesShaderData); + return shader_data; +} + +void RendererStorageRD::ParticlesMaterialData::update_parameters(const Map<StringName, Variant> &p_parameters, bool p_uniform_dirty, bool p_textures_dirty) { + uniform_set_updated = true; + + if ((uint32_t)ubo_data.size() != shader_data->ubo_size) { + p_uniform_dirty = true; + if (uniform_buffer.is_valid()) { + RD::get_singleton()->free(uniform_buffer); + uniform_buffer = RID(); + } + + ubo_data.resize(shader_data->ubo_size); + if (ubo_data.size()) { + uniform_buffer = RD::get_singleton()->uniform_buffer_create(ubo_data.size()); + memset(ubo_data.ptrw(), 0, ubo_data.size()); //clear + } + + //clear previous uniform set + if (uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + RD::get_singleton()->free(uniform_set); + uniform_set = RID(); + } + } + + //check whether buffer changed + if (p_uniform_dirty && ubo_data.size()) { + update_uniform_buffer(shader_data->uniforms, shader_data->ubo_offsets.ptr(), p_parameters, ubo_data.ptrw(), ubo_data.size(), false); + RD::get_singleton()->buffer_update(uniform_buffer, 0, ubo_data.size(), ubo_data.ptrw()); + } + + uint32_t tex_uniform_count = shader_data->texture_uniforms.size(); + + if ((uint32_t)texture_cache.size() != tex_uniform_count) { + texture_cache.resize(tex_uniform_count); + p_textures_dirty = true; + + //clear previous uniform set + if (uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + RD::get_singleton()->free(uniform_set); + uniform_set = RID(); + } + } + + if (p_textures_dirty && tex_uniform_count) { + update_textures(p_parameters, shader_data->default_texture_params, shader_data->texture_uniforms, texture_cache.ptrw(), true); + } + + if (shader_data->ubo_size == 0 && shader_data->texture_uniforms.size() == 0) { + // This material does not require an uniform set, so don't create it. + return; + } + + if (!p_textures_dirty && uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + //no reason to update uniform set, only UBO (or nothing) was needed to update + return; + } + + Vector<RD::Uniform> uniforms; + + { + if (shader_data->ubo_size) { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 0; + u.ids.push_back(uniform_buffer); + uniforms.push_back(u); + } + + const RID *textures = texture_cache.ptrw(); + for (uint32_t i = 0; i < tex_uniform_count; i++) { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 1 + i; + u.ids.push_back(textures[i]); + uniforms.push_back(u); + } + } + + uniform_set = RD::get_singleton()->uniform_set_create(uniforms, base_singleton->particles_shader.shader.version_get_shader(shader_data->version, 0), 3); +} + +RendererStorageRD::ParticlesMaterialData::~ParticlesMaterialData() { + if (uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + RD::get_singleton()->free(uniform_set); + } + + if (uniform_buffer.is_valid()) { + RD::get_singleton()->free(uniform_buffer); + } +} + +RendererStorageRD::MaterialData *RendererStorageRD::_create_particles_material_func(ParticlesShaderData *p_shader) { + ParticlesMaterialData *material_data = memnew(ParticlesMaterialData); + material_data->shader_data = p_shader; + material_data->last_frame = false; + //update will happen later anyway so do nothing. + return material_data; +} +//////// + +/* PARTICLES COLLISION API */ + +RID RendererStorageRD::particles_collision_allocate() { + return particles_collision_owner.allocate_rid(); +} +void RendererStorageRD::particles_collision_initialize(RID p_rid) { + particles_collision_owner.initialize_rid(p_rid, ParticlesCollision()); +} + +RID RendererStorageRD::particles_collision_get_heightfield_framebuffer(RID p_particles_collision) const { + ParticlesCollision *particles_collision = particles_collision_owner.getornull(p_particles_collision); + ERR_FAIL_COND_V(!particles_collision, RID()); + ERR_FAIL_COND_V(particles_collision->type != RS::PARTICLES_COLLISION_TYPE_HEIGHTFIELD_COLLIDE, RID()); + + if (particles_collision->heightfield_texture == RID()) { + //create + int resolutions[RS::PARTICLES_COLLISION_HEIGHTFIELD_RESOLUTION_MAX] = { 256, 512, 1024, 2048, 4096, 8192 }; + Size2i size; + if (particles_collision->extents.x > particles_collision->extents.z) { + size.x = resolutions[particles_collision->heightfield_resolution]; + size.y = int32_t(particles_collision->extents.z / particles_collision->extents.x * size.x); + } else { + size.y = resolutions[particles_collision->heightfield_resolution]; + size.x = int32_t(particles_collision->extents.x / particles_collision->extents.z * size.y); + } + + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_D32_SFLOAT; + tf.width = size.x; + tf.height = size.y; + tf.texture_type = RD::TEXTURE_TYPE_2D; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + + particles_collision->heightfield_texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + Vector<RID> fb_tex; + fb_tex.push_back(particles_collision->heightfield_texture); + particles_collision->heightfield_fb = RD::get_singleton()->framebuffer_create(fb_tex); + particles_collision->heightfield_fb_size = size; + } + + return particles_collision->heightfield_fb; +} + +void RendererStorageRD::particles_collision_set_collision_type(RID p_particles_collision, RS::ParticlesCollisionType p_type) { + ParticlesCollision *particles_collision = particles_collision_owner.getornull(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + + if (p_type == particles_collision->type) { + return; + } + + if (particles_collision->heightfield_texture.is_valid()) { + RD::get_singleton()->free(particles_collision->heightfield_texture); + particles_collision->heightfield_texture = RID(); + } + particles_collision->type = p_type; + particles_collision->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); +} + +void RendererStorageRD::particles_collision_set_cull_mask(RID p_particles_collision, uint32_t p_cull_mask) { + ParticlesCollision *particles_collision = particles_collision_owner.getornull(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + particles_collision->cull_mask = p_cull_mask; +} + +void RendererStorageRD::particles_collision_set_sphere_radius(RID p_particles_collision, float p_radius) { + ParticlesCollision *particles_collision = particles_collision_owner.getornull(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + + particles_collision->radius = p_radius; + particles_collision->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); +} + +void RendererStorageRD::particles_collision_set_box_extents(RID p_particles_collision, const Vector3 &p_extents) { + ParticlesCollision *particles_collision = particles_collision_owner.getornull(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + + particles_collision->extents = p_extents; + particles_collision->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); +} + +void RendererStorageRD::particles_collision_set_attractor_strength(RID p_particles_collision, float p_strength) { + ParticlesCollision *particles_collision = particles_collision_owner.getornull(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + + particles_collision->attractor_strength = p_strength; +} + +void RendererStorageRD::particles_collision_set_attractor_directionality(RID p_particles_collision, float p_directionality) { + ParticlesCollision *particles_collision = particles_collision_owner.getornull(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + + particles_collision->attractor_directionality = p_directionality; +} + +void RendererStorageRD::particles_collision_set_attractor_attenuation(RID p_particles_collision, float p_curve) { + ParticlesCollision *particles_collision = particles_collision_owner.getornull(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + + particles_collision->attractor_attenuation = p_curve; +} + +void RendererStorageRD::particles_collision_set_field_texture(RID p_particles_collision, RID p_texture) { + ParticlesCollision *particles_collision = particles_collision_owner.getornull(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + + particles_collision->field_texture = p_texture; +} + +void RendererStorageRD::particles_collision_height_field_update(RID p_particles_collision) { + ParticlesCollision *particles_collision = particles_collision_owner.getornull(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + particles_collision->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); +} + +void RendererStorageRD::particles_collision_set_height_field_resolution(RID p_particles_collision, RS::ParticlesCollisionHeightfieldResolution p_resolution) { + ParticlesCollision *particles_collision = particles_collision_owner.getornull(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + + if (particles_collision->heightfield_resolution == p_resolution) { + return; + } + + particles_collision->heightfield_resolution = p_resolution; + + if (particles_collision->heightfield_texture.is_valid()) { + RD::get_singleton()->free(particles_collision->heightfield_texture); + particles_collision->heightfield_texture = RID(); + } +} + +AABB RendererStorageRD::particles_collision_get_aabb(RID p_particles_collision) const { + ParticlesCollision *particles_collision = particles_collision_owner.getornull(p_particles_collision); + ERR_FAIL_COND_V(!particles_collision, AABB()); + + switch (particles_collision->type) { + case RS::PARTICLES_COLLISION_TYPE_SPHERE_ATTRACT: + case RS::PARTICLES_COLLISION_TYPE_SPHERE_COLLIDE: { + AABB aabb; + aabb.position = -Vector3(1, 1, 1) * particles_collision->radius; + aabb.size = Vector3(2, 2, 2) * particles_collision->radius; + return aabb; + } + default: { + AABB aabb; + aabb.position = -particles_collision->extents; + aabb.size = particles_collision->extents * 2; + return aabb; + } + } + + return AABB(); +} + +Vector3 RendererStorageRD::particles_collision_get_extents(RID p_particles_collision) const { + const ParticlesCollision *particles_collision = particles_collision_owner.getornull(p_particles_collision); + ERR_FAIL_COND_V(!particles_collision, Vector3()); + return particles_collision->extents; +} + +bool RendererStorageRD::particles_collision_is_heightfield(RID p_particles_collision) const { + const ParticlesCollision *particles_collision = particles_collision_owner.getornull(p_particles_collision); + ERR_FAIL_COND_V(!particles_collision, false); + return particles_collision->type == RS::PARTICLES_COLLISION_TYPE_HEIGHTFIELD_COLLIDE; +} + +RID RendererStorageRD::particles_collision_instance_create(RID p_collision) { + ParticlesCollisionInstance pci; + pci.collision = p_collision; + return particles_collision_instance_owner.make_rid(pci); +} +void RendererStorageRD::particles_collision_instance_set_transform(RID p_collision_instance, const Transform &p_transform) { + ParticlesCollisionInstance *pci = particles_collision_instance_owner.getornull(p_collision_instance); + ERR_FAIL_COND(!pci); + pci->transform = p_transform; +} +void RendererStorageRD::particles_collision_instance_set_active(RID p_collision_instance, bool p_active) { + ParticlesCollisionInstance *pci = particles_collision_instance_owner.getornull(p_collision_instance); + ERR_FAIL_COND(!pci); + pci->active = p_active; +} + +/* SKELETON API */ + +RID RendererStorageRD::skeleton_allocate() { + return skeleton_owner.allocate_rid(); +} +void RendererStorageRD::skeleton_initialize(RID p_rid) { + skeleton_owner.initialize_rid(p_rid, Skeleton()); +} + +void RendererStorageRD::_skeleton_make_dirty(Skeleton *skeleton) { + if (!skeleton->dirty) { + skeleton->dirty = true; + skeleton->dirty_list = skeleton_dirty_list; + skeleton_dirty_list = skeleton; + } +} + +void RendererStorageRD::skeleton_allocate_data(RID p_skeleton, int p_bones, bool p_2d_skeleton) { + Skeleton *skeleton = skeleton_owner.getornull(p_skeleton); + ERR_FAIL_COND(!skeleton); + ERR_FAIL_COND(p_bones < 0); + + if (skeleton->size == p_bones && skeleton->use_2d == p_2d_skeleton) { + return; + } + + skeleton->size = p_bones; + skeleton->use_2d = p_2d_skeleton; + skeleton->uniform_set_3d = RID(); + + if (skeleton->buffer.is_valid()) { + RD::get_singleton()->free(skeleton->buffer); + skeleton->buffer = RID(); + skeleton->data.resize(0); + skeleton->uniform_set_mi = RID(); + } + + if (skeleton->size) { + skeleton->data.resize(skeleton->size * (skeleton->use_2d ? 8 : 12)); + skeleton->buffer = RD::get_singleton()->storage_buffer_create(skeleton->data.size() * sizeof(float)); + zeromem(skeleton->data.ptrw(), skeleton->data.size() * sizeof(float)); + + _skeleton_make_dirty(skeleton); + + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.binding = 0; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(skeleton->buffer); + uniforms.push_back(u); + } + skeleton->uniform_set_mi = RD::get_singleton()->uniform_set_create(uniforms, skeleton_shader.version_shader[0], SkeletonShader::UNIFORM_SET_SKELETON); + } + } + + skeleton->dependency.changed_notify(DEPENDENCY_CHANGED_SKELETON_DATA); +} + +int RendererStorageRD::skeleton_get_bone_count(RID p_skeleton) const { + Skeleton *skeleton = skeleton_owner.getornull(p_skeleton); + ERR_FAIL_COND_V(!skeleton, 0); + + return skeleton->size; +} + +void RendererStorageRD::skeleton_bone_set_transform(RID p_skeleton, int p_bone, const Transform &p_transform) { + Skeleton *skeleton = skeleton_owner.getornull(p_skeleton); + + ERR_FAIL_COND(!skeleton); + ERR_FAIL_INDEX(p_bone, skeleton->size); + ERR_FAIL_COND(skeleton->use_2d); + + float *dataptr = skeleton->data.ptrw() + p_bone * 12; + + dataptr[0] = p_transform.basis.elements[0][0]; + dataptr[1] = p_transform.basis.elements[0][1]; + dataptr[2] = p_transform.basis.elements[0][2]; + dataptr[3] = p_transform.origin.x; + dataptr[4] = p_transform.basis.elements[1][0]; + dataptr[5] = p_transform.basis.elements[1][1]; + dataptr[6] = p_transform.basis.elements[1][2]; + dataptr[7] = p_transform.origin.y; + dataptr[8] = p_transform.basis.elements[2][0]; + dataptr[9] = p_transform.basis.elements[2][1]; + dataptr[10] = p_transform.basis.elements[2][2]; + dataptr[11] = p_transform.origin.z; + + _skeleton_make_dirty(skeleton); +} + +Transform RendererStorageRD::skeleton_bone_get_transform(RID p_skeleton, int p_bone) const { + Skeleton *skeleton = skeleton_owner.getornull(p_skeleton); + + ERR_FAIL_COND_V(!skeleton, Transform()); + ERR_FAIL_INDEX_V(p_bone, skeleton->size, Transform()); + ERR_FAIL_COND_V(skeleton->use_2d, Transform()); + + const float *dataptr = skeleton->data.ptr() + p_bone * 12; + + Transform t; + + t.basis.elements[0][0] = dataptr[0]; + t.basis.elements[0][1] = dataptr[1]; + t.basis.elements[0][2] = dataptr[2]; + t.origin.x = dataptr[3]; + t.basis.elements[1][0] = dataptr[4]; + t.basis.elements[1][1] = dataptr[5]; + t.basis.elements[1][2] = dataptr[6]; + t.origin.y = dataptr[7]; + t.basis.elements[2][0] = dataptr[8]; + t.basis.elements[2][1] = dataptr[9]; + t.basis.elements[2][2] = dataptr[10]; + t.origin.z = dataptr[11]; + + return t; +} + +void RendererStorageRD::skeleton_bone_set_transform_2d(RID p_skeleton, int p_bone, const Transform2D &p_transform) { + Skeleton *skeleton = skeleton_owner.getornull(p_skeleton); + + ERR_FAIL_COND(!skeleton); + ERR_FAIL_INDEX(p_bone, skeleton->size); + ERR_FAIL_COND(!skeleton->use_2d); + + float *dataptr = skeleton->data.ptrw() + p_bone * 8; + + dataptr[0] = p_transform.elements[0][0]; + dataptr[1] = p_transform.elements[1][0]; + dataptr[2] = 0; + dataptr[3] = p_transform.elements[2][0]; + dataptr[4] = p_transform.elements[0][1]; + dataptr[5] = p_transform.elements[1][1]; + dataptr[6] = 0; + dataptr[7] = p_transform.elements[2][1]; + + _skeleton_make_dirty(skeleton); +} + +Transform2D RendererStorageRD::skeleton_bone_get_transform_2d(RID p_skeleton, int p_bone) const { + Skeleton *skeleton = skeleton_owner.getornull(p_skeleton); + + ERR_FAIL_COND_V(!skeleton, Transform2D()); + ERR_FAIL_INDEX_V(p_bone, skeleton->size, Transform2D()); + ERR_FAIL_COND_V(!skeleton->use_2d, Transform2D()); + + const float *dataptr = skeleton->data.ptr() + p_bone * 8; + + Transform2D t; + t.elements[0][0] = dataptr[0]; + t.elements[1][0] = dataptr[1]; + t.elements[2][0] = dataptr[3]; + t.elements[0][1] = dataptr[4]; + t.elements[1][1] = dataptr[5]; + t.elements[2][1] = dataptr[7]; + + return t; +} + +void RendererStorageRD::skeleton_set_base_transform_2d(RID p_skeleton, const Transform2D &p_base_transform) { + Skeleton *skeleton = skeleton_owner.getornull(p_skeleton); + + ERR_FAIL_COND(!skeleton->use_2d); + + skeleton->base_transform_2d = p_base_transform; +} + +void RendererStorageRD::_update_dirty_skeletons() { + while (skeleton_dirty_list) { + Skeleton *skeleton = skeleton_dirty_list; + + if (skeleton->size) { + RD::get_singleton()->buffer_update(skeleton->buffer, 0, skeleton->data.size() * sizeof(float), skeleton->data.ptr()); + } + + skeleton_dirty_list = skeleton->dirty_list; + + skeleton->dependency.changed_notify(DEPENDENCY_CHANGED_SKELETON_BONES); + + skeleton->version++; + + skeleton->dirty = false; + skeleton->dirty_list = nullptr; + } + + skeleton_dirty_list = nullptr; +} + +/* LIGHT */ + +void RendererStorageRD::_light_initialize(RID p_light, RS::LightType p_type) { + Light light; + light.type = p_type; + + light.param[RS::LIGHT_PARAM_ENERGY] = 1.0; + light.param[RS::LIGHT_PARAM_INDIRECT_ENERGY] = 1.0; + light.param[RS::LIGHT_PARAM_SPECULAR] = 0.5; + light.param[RS::LIGHT_PARAM_RANGE] = 1.0; + light.param[RS::LIGHT_PARAM_SIZE] = 0.0; + light.param[RS::LIGHT_PARAM_ATTENUATION] = 1.0; + light.param[RS::LIGHT_PARAM_SPOT_ANGLE] = 45; + light.param[RS::LIGHT_PARAM_SPOT_ATTENUATION] = 1.0; + light.param[RS::LIGHT_PARAM_SHADOW_MAX_DISTANCE] = 0; + light.param[RS::LIGHT_PARAM_SHADOW_SPLIT_1_OFFSET] = 0.1; + light.param[RS::LIGHT_PARAM_SHADOW_SPLIT_2_OFFSET] = 0.3; + light.param[RS::LIGHT_PARAM_SHADOW_SPLIT_3_OFFSET] = 0.6; + light.param[RS::LIGHT_PARAM_SHADOW_FADE_START] = 0.8; + light.param[RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS] = 1.0; + light.param[RS::LIGHT_PARAM_SHADOW_BIAS] = 0.02; + light.param[RS::LIGHT_PARAM_SHADOW_BLUR] = 0; + light.param[RS::LIGHT_PARAM_SHADOW_PANCAKE_SIZE] = 20.0; + light.param[RS::LIGHT_PARAM_SHADOW_VOLUMETRIC_FOG_FADE] = 0.1; + light.param[RS::LIGHT_PARAM_TRANSMITTANCE_BIAS] = 0.05; + + light_owner.initialize_rid(p_light, light); +} + +RID RendererStorageRD::directional_light_allocate() { + return light_owner.allocate_rid(); +} +void RendererStorageRD::directional_light_initialize(RID p_light) { + _light_initialize(p_light, RS::LIGHT_DIRECTIONAL); +} + +RID RendererStorageRD::omni_light_allocate() { + return light_owner.allocate_rid(); +} +void RendererStorageRD::omni_light_initialize(RID p_light) { + _light_initialize(p_light, RS::LIGHT_OMNI); +} + +RID RendererStorageRD::spot_light_allocate() { + return light_owner.allocate_rid(); +} +void RendererStorageRD::spot_light_initialize(RID p_light) { + _light_initialize(p_light, RS::LIGHT_SPOT); +} + +void RendererStorageRD::light_set_color(RID p_light, const Color &p_color) { + Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + + light->color = p_color; +} + +void RendererStorageRD::light_set_param(RID p_light, RS::LightParam p_param, float p_value) { + Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + ERR_FAIL_INDEX(p_param, RS::LIGHT_PARAM_MAX); + + switch (p_param) { + case RS::LIGHT_PARAM_RANGE: + case RS::LIGHT_PARAM_SPOT_ANGLE: + case RS::LIGHT_PARAM_SHADOW_MAX_DISTANCE: + case RS::LIGHT_PARAM_SHADOW_SPLIT_1_OFFSET: + case RS::LIGHT_PARAM_SHADOW_SPLIT_2_OFFSET: + case RS::LIGHT_PARAM_SHADOW_SPLIT_3_OFFSET: + case RS::LIGHT_PARAM_SHADOW_NORMAL_BIAS: + case RS::LIGHT_PARAM_SHADOW_PANCAKE_SIZE: + case RS::LIGHT_PARAM_SHADOW_BIAS: { + light->version++; + light->dependency.changed_notify(DEPENDENCY_CHANGED_LIGHT); + } break; + default: { + } + } + + light->param[p_param] = p_value; +} + +void RendererStorageRD::light_set_shadow(RID p_light, bool p_enabled) { + Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + light->shadow = p_enabled; + + light->version++; + light->dependency.changed_notify(DEPENDENCY_CHANGED_LIGHT); +} + +void RendererStorageRD::light_set_shadow_color(RID p_light, const Color &p_color) { + Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + light->shadow_color = p_color; +} + +void RendererStorageRD::light_set_projector(RID p_light, RID p_texture) { + Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + + if (light->projector == p_texture) { + return; + } + + if (light->type != RS::LIGHT_DIRECTIONAL && light->projector.is_valid()) { + texture_remove_from_decal_atlas(light->projector, light->type == RS::LIGHT_OMNI); + } + + light->projector = p_texture; + + if (light->type != RS::LIGHT_DIRECTIONAL && light->projector.is_valid()) { + texture_add_to_decal_atlas(light->projector, light->type == RS::LIGHT_OMNI); + } +} + +void RendererStorageRD::light_set_negative(RID p_light, bool p_enable) { + Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + + light->negative = p_enable; +} + +void RendererStorageRD::light_set_cull_mask(RID p_light, uint32_t p_mask) { + Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + + light->cull_mask = p_mask; + + light->version++; + light->dependency.changed_notify(DEPENDENCY_CHANGED_LIGHT); +} + +void RendererStorageRD::light_set_reverse_cull_face_mode(RID p_light, bool p_enabled) { + Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + + light->reverse_cull = p_enabled; + + light->version++; + light->dependency.changed_notify(DEPENDENCY_CHANGED_LIGHT); +} + +void RendererStorageRD::light_set_bake_mode(RID p_light, RS::LightBakeMode p_bake_mode) { + Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + + light->bake_mode = p_bake_mode; + + light->version++; + light->dependency.changed_notify(DEPENDENCY_CHANGED_LIGHT); +} + +void RendererStorageRD::light_set_max_sdfgi_cascade(RID p_light, uint32_t p_cascade) { + Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + + light->max_sdfgi_cascade = p_cascade; + + light->version++; + light->dependency.changed_notify(DEPENDENCY_CHANGED_LIGHT); +} + +void RendererStorageRD::light_omni_set_shadow_mode(RID p_light, RS::LightOmniShadowMode p_mode) { + Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + + light->omni_shadow_mode = p_mode; + + light->version++; + light->dependency.changed_notify(DEPENDENCY_CHANGED_LIGHT); +} + +RS::LightOmniShadowMode RendererStorageRD::light_omni_get_shadow_mode(RID p_light) { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, RS::LIGHT_OMNI_SHADOW_CUBE); + + return light->omni_shadow_mode; +} + +void RendererStorageRD::light_directional_set_shadow_mode(RID p_light, RS::LightDirectionalShadowMode p_mode) { + Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + + light->directional_shadow_mode = p_mode; + light->version++; + light->dependency.changed_notify(DEPENDENCY_CHANGED_LIGHT); +} + +void RendererStorageRD::light_directional_set_blend_splits(RID p_light, bool p_enable) { + Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + + light->directional_blend_splits = p_enable; + light->version++; + light->dependency.changed_notify(DEPENDENCY_CHANGED_LIGHT); +} + +bool RendererStorageRD::light_directional_get_blend_splits(RID p_light) const { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, false); + + return light->directional_blend_splits; +} + +void RendererStorageRD::light_directional_set_sky_only(RID p_light, bool p_sky_only) { + Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + + light->directional_sky_only = p_sky_only; +} + +bool RendererStorageRD::light_directional_is_sky_only(RID p_light) const { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, false); + + return light->directional_sky_only; +} + +RS::LightDirectionalShadowMode RendererStorageRD::light_directional_get_shadow_mode(RID p_light) { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, RS::LIGHT_DIRECTIONAL_SHADOW_ORTHOGONAL); + + return light->directional_shadow_mode; +} + +void RendererStorageRD::light_directional_set_shadow_depth_range_mode(RID p_light, RS::LightDirectionalShadowDepthRangeMode p_range_mode) { + Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + + light->directional_range_mode = p_range_mode; +} + +RS::LightDirectionalShadowDepthRangeMode RendererStorageRD::light_directional_get_shadow_depth_range_mode(RID p_light) const { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, RS::LIGHT_DIRECTIONAL_SHADOW_DEPTH_RANGE_STABLE); + + return light->directional_range_mode; +} + +uint32_t RendererStorageRD::light_get_max_sdfgi_cascade(RID p_light) { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, 0); + + return light->max_sdfgi_cascade; +} + +RS::LightBakeMode RendererStorageRD::light_get_bake_mode(RID p_light) { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, RS::LIGHT_BAKE_DISABLED); + + return light->bake_mode; +} + +uint64_t RendererStorageRD::light_get_version(RID p_light) const { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, 0); + + return light->version; +} + +AABB RendererStorageRD::light_get_aabb(RID p_light) const { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, AABB()); + + switch (light->type) { + case RS::LIGHT_SPOT: { + float len = light->param[RS::LIGHT_PARAM_RANGE]; + float size = Math::tan(Math::deg2rad(light->param[RS::LIGHT_PARAM_SPOT_ANGLE])) * len; + return AABB(Vector3(-size, -size, -len), Vector3(size * 2, size * 2, len)); + }; + case RS::LIGHT_OMNI: { + float r = light->param[RS::LIGHT_PARAM_RANGE]; + return AABB(-Vector3(r, r, r), Vector3(r, r, r) * 2); + }; + case RS::LIGHT_DIRECTIONAL: { + return AABB(); + }; + } + + ERR_FAIL_V(AABB()); +} + +/* REFLECTION PROBE */ + +RID RendererStorageRD::reflection_probe_allocate() { + return reflection_probe_owner.allocate_rid(); +} +void RendererStorageRD::reflection_probe_initialize(RID p_reflection_probe) { + reflection_probe_owner.initialize_rid(p_reflection_probe, ReflectionProbe()); +} + +void RendererStorageRD::reflection_probe_set_update_mode(RID p_probe, RS::ReflectionProbeUpdateMode p_mode) { + ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND(!reflection_probe); + + reflection_probe->update_mode = p_mode; + reflection_probe->dependency.changed_notify(DEPENDENCY_CHANGED_REFLECTION_PROBE); +} + +void RendererStorageRD::reflection_probe_set_intensity(RID p_probe, float p_intensity) { + ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND(!reflection_probe); + + reflection_probe->intensity = p_intensity; +} + +void RendererStorageRD::reflection_probe_set_ambient_mode(RID p_probe, RS::ReflectionProbeAmbientMode p_mode) { + ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND(!reflection_probe); + + reflection_probe->ambient_mode = p_mode; +} + +void RendererStorageRD::reflection_probe_set_ambient_color(RID p_probe, const Color &p_color) { + ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND(!reflection_probe); + + reflection_probe->ambient_color = p_color; +} + +void RendererStorageRD::reflection_probe_set_ambient_energy(RID p_probe, float p_energy) { + ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND(!reflection_probe); + + reflection_probe->ambient_color_energy = p_energy; +} + +void RendererStorageRD::reflection_probe_set_max_distance(RID p_probe, float p_distance) { + ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND(!reflection_probe); + + reflection_probe->max_distance = p_distance; + + reflection_probe->dependency.changed_notify(DEPENDENCY_CHANGED_REFLECTION_PROBE); +} + +void RendererStorageRD::reflection_probe_set_extents(RID p_probe, const Vector3 &p_extents) { + ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND(!reflection_probe); + + if (reflection_probe->extents == p_extents) { + return; + } + reflection_probe->extents = p_extents; + reflection_probe->dependency.changed_notify(DEPENDENCY_CHANGED_REFLECTION_PROBE); +} + +void RendererStorageRD::reflection_probe_set_origin_offset(RID p_probe, const Vector3 &p_offset) { + ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND(!reflection_probe); + + reflection_probe->origin_offset = p_offset; + reflection_probe->dependency.changed_notify(DEPENDENCY_CHANGED_REFLECTION_PROBE); +} + +void RendererStorageRD::reflection_probe_set_as_interior(RID p_probe, bool p_enable) { + ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND(!reflection_probe); + + reflection_probe->interior = p_enable; + reflection_probe->dependency.changed_notify(DEPENDENCY_CHANGED_REFLECTION_PROBE); +} + +void RendererStorageRD::reflection_probe_set_enable_box_projection(RID p_probe, bool p_enable) { + ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND(!reflection_probe); + + reflection_probe->box_projection = p_enable; +} + +void RendererStorageRD::reflection_probe_set_enable_shadows(RID p_probe, bool p_enable) { + ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND(!reflection_probe); + + reflection_probe->enable_shadows = p_enable; + reflection_probe->dependency.changed_notify(DEPENDENCY_CHANGED_REFLECTION_PROBE); +} + +void RendererStorageRD::reflection_probe_set_cull_mask(RID p_probe, uint32_t p_layers) { + ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND(!reflection_probe); + + reflection_probe->cull_mask = p_layers; + reflection_probe->dependency.changed_notify(DEPENDENCY_CHANGED_REFLECTION_PROBE); +} + +void RendererStorageRD::reflection_probe_set_resolution(RID p_probe, int p_resolution) { + ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND(!reflection_probe); + ERR_FAIL_COND(p_resolution < 32); + + reflection_probe->resolution = p_resolution; +} + +void RendererStorageRD::reflection_probe_set_lod_threshold(RID p_probe, float p_ratio) { + ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND(!reflection_probe); + + reflection_probe->lod_threshold = p_ratio; + + reflection_probe->dependency.changed_notify(DEPENDENCY_CHANGED_REFLECTION_PROBE); +} + +AABB RendererStorageRD::reflection_probe_get_aabb(RID p_probe) const { + const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND_V(!reflection_probe, AABB()); + + AABB aabb; + aabb.position = -reflection_probe->extents; + aabb.size = reflection_probe->extents * 2.0; + + return aabb; +} + +RS::ReflectionProbeUpdateMode RendererStorageRD::reflection_probe_get_update_mode(RID p_probe) const { + const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND_V(!reflection_probe, RS::REFLECTION_PROBE_UPDATE_ALWAYS); + + return reflection_probe->update_mode; +} + +uint32_t RendererStorageRD::reflection_probe_get_cull_mask(RID p_probe) const { + const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND_V(!reflection_probe, 0); + + return reflection_probe->cull_mask; +} + +Vector3 RendererStorageRD::reflection_probe_get_extents(RID p_probe) const { + const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND_V(!reflection_probe, Vector3()); + + return reflection_probe->extents; +} + +Vector3 RendererStorageRD::reflection_probe_get_origin_offset(RID p_probe) const { + const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND_V(!reflection_probe, Vector3()); + + return reflection_probe->origin_offset; +} + +bool RendererStorageRD::reflection_probe_renders_shadows(RID p_probe) const { + const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND_V(!reflection_probe, false); + + return reflection_probe->enable_shadows; +} + +float RendererStorageRD::reflection_probe_get_origin_max_distance(RID p_probe) const { + const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND_V(!reflection_probe, 0); + + return reflection_probe->max_distance; +} + +float RendererStorageRD::reflection_probe_get_lod_threshold(RID p_probe) const { + const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND_V(!reflection_probe, 0); + + return reflection_probe->lod_threshold; +} + +int RendererStorageRD::reflection_probe_get_resolution(RID p_probe) const { + const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND_V(!reflection_probe, 0); + + return reflection_probe->resolution; +} + +float RendererStorageRD::reflection_probe_get_intensity(RID p_probe) const { + const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND_V(!reflection_probe, 0); + + return reflection_probe->intensity; +} + +bool RendererStorageRD::reflection_probe_is_interior(RID p_probe) const { + const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND_V(!reflection_probe, false); + + return reflection_probe->interior; +} + +bool RendererStorageRD::reflection_probe_is_box_projection(RID p_probe) const { + const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND_V(!reflection_probe, false); + + return reflection_probe->box_projection; +} + +RS::ReflectionProbeAmbientMode RendererStorageRD::reflection_probe_get_ambient_mode(RID p_probe) const { + const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND_V(!reflection_probe, RS::REFLECTION_PROBE_AMBIENT_DISABLED); + return reflection_probe->ambient_mode; +} + +Color RendererStorageRD::reflection_probe_get_ambient_color(RID p_probe) const { + const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND_V(!reflection_probe, Color()); + + return reflection_probe->ambient_color; +} +float RendererStorageRD::reflection_probe_get_ambient_color_energy(RID p_probe) const { + const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND_V(!reflection_probe, 0); + + return reflection_probe->ambient_color_energy; +} + +RID RendererStorageRD::decal_allocate() { + return decal_owner.allocate_rid(); +} +void RendererStorageRD::decal_initialize(RID p_decal) { + decal_owner.initialize_rid(p_decal, Decal()); +} + +void RendererStorageRD::decal_set_extents(RID p_decal, const Vector3 &p_extents) { + Decal *decal = decal_owner.getornull(p_decal); + ERR_FAIL_COND(!decal); + decal->extents = p_extents; + decal->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); +} + +void RendererStorageRD::decal_set_texture(RID p_decal, RS::DecalTexture p_type, RID p_texture) { + Decal *decal = decal_owner.getornull(p_decal); + ERR_FAIL_COND(!decal); + ERR_FAIL_INDEX(p_type, RS::DECAL_TEXTURE_MAX); + + if (decal->textures[p_type] == p_texture) { + return; + } + + ERR_FAIL_COND(p_texture.is_valid() && !texture_owner.owns(p_texture)); + + if (decal->textures[p_type].is_valid() && texture_owner.owns(decal->textures[p_type])) { + texture_remove_from_decal_atlas(decal->textures[p_type]); + } + + decal->textures[p_type] = p_texture; + + if (decal->textures[p_type].is_valid()) { + texture_add_to_decal_atlas(decal->textures[p_type]); + } + + decal->dependency.changed_notify(DEPENDENCY_CHANGED_DECAL); +} + +void RendererStorageRD::decal_set_emission_energy(RID p_decal, float p_energy) { + Decal *decal = decal_owner.getornull(p_decal); + ERR_FAIL_COND(!decal); + decal->emission_energy = p_energy; +} + +void RendererStorageRD::decal_set_albedo_mix(RID p_decal, float p_mix) { + Decal *decal = decal_owner.getornull(p_decal); + ERR_FAIL_COND(!decal); + decal->albedo_mix = p_mix; +} + +void RendererStorageRD::decal_set_modulate(RID p_decal, const Color &p_modulate) { + Decal *decal = decal_owner.getornull(p_decal); + ERR_FAIL_COND(!decal); + decal->modulate = p_modulate; +} + +void RendererStorageRD::decal_set_cull_mask(RID p_decal, uint32_t p_layers) { + Decal *decal = decal_owner.getornull(p_decal); + ERR_FAIL_COND(!decal); + decal->cull_mask = p_layers; + decal->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); +} + +void RendererStorageRD::decal_set_distance_fade(RID p_decal, bool p_enabled, float p_begin, float p_length) { + Decal *decal = decal_owner.getornull(p_decal); + ERR_FAIL_COND(!decal); + decal->distance_fade = p_enabled; + decal->distance_fade_begin = p_begin; + decal->distance_fade_length = p_length; +} + +void RendererStorageRD::decal_set_fade(RID p_decal, float p_above, float p_below) { + Decal *decal = decal_owner.getornull(p_decal); + ERR_FAIL_COND(!decal); + decal->upper_fade = p_above; + decal->lower_fade = p_below; +} + +void RendererStorageRD::decal_set_normal_fade(RID p_decal, float p_fade) { + Decal *decal = decal_owner.getornull(p_decal); + ERR_FAIL_COND(!decal); + decal->normal_fade = p_fade; +} + +AABB RendererStorageRD::decal_get_aabb(RID p_decal) const { + Decal *decal = decal_owner.getornull(p_decal); + ERR_FAIL_COND_V(!decal, AABB()); + + return AABB(-decal->extents, decal->extents * 2.0); +} + +RID RendererStorageRD::gi_probe_allocate() { + return gi_probe_owner.allocate_rid(); +} +void RendererStorageRD::gi_probe_initialize(RID p_gi_probe) { + gi_probe_owner.initialize_rid(p_gi_probe, GIProbe()); +} + +void RendererStorageRD::gi_probe_allocate_data(RID p_gi_probe, const Transform &p_to_cell_xform, const AABB &p_aabb, const Vector3i &p_octree_size, const Vector<uint8_t> &p_octree_cells, const Vector<uint8_t> &p_data_cells, const Vector<uint8_t> &p_distance_field, const Vector<int> &p_level_counts) { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND(!gi_probe); + + if (gi_probe->octree_buffer.is_valid()) { + RD::get_singleton()->free(gi_probe->octree_buffer); + RD::get_singleton()->free(gi_probe->data_buffer); + if (gi_probe->sdf_texture.is_valid()) { + RD::get_singleton()->free(gi_probe->sdf_texture); + } + + gi_probe->sdf_texture = RID(); + gi_probe->octree_buffer = RID(); + gi_probe->data_buffer = RID(); + gi_probe->octree_buffer_size = 0; + gi_probe->data_buffer_size = 0; + gi_probe->cell_count = 0; + } + + gi_probe->to_cell_xform = p_to_cell_xform; + gi_probe->bounds = p_aabb; + gi_probe->octree_size = p_octree_size; + gi_probe->level_counts = p_level_counts; + + if (p_octree_cells.size()) { + ERR_FAIL_COND(p_octree_cells.size() % 32 != 0); //cells size must be a multiple of 32 + + uint32_t cell_count = p_octree_cells.size() / 32; + + ERR_FAIL_COND(p_data_cells.size() != (int)cell_count * 16); //see that data size matches + + gi_probe->cell_count = cell_count; + gi_probe->octree_buffer = RD::get_singleton()->storage_buffer_create(p_octree_cells.size(), p_octree_cells); + gi_probe->octree_buffer_size = p_octree_cells.size(); + gi_probe->data_buffer = RD::get_singleton()->storage_buffer_create(p_data_cells.size(), p_data_cells); + gi_probe->data_buffer_size = p_data_cells.size(); + + if (p_distance_field.size()) { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8_UNORM; + tf.width = gi_probe->octree_size.x; + tf.height = gi_probe->octree_size.y; + tf.depth = gi_probe->octree_size.z; + tf.texture_type = RD::TEXTURE_TYPE_3D; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + Vector<Vector<uint8_t>> s; + s.push_back(p_distance_field); + gi_probe->sdf_texture = RD::get_singleton()->texture_create(tf, RD::TextureView(), s); + } +#if 0 + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8_UNORM; + tf.width = gi_probe->octree_size.x; + tf.height = gi_probe->octree_size.y; + tf.depth = gi_probe->octree_size.z; + tf.type = RD::TEXTURE_TYPE_3D; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + tf.shareable_formats.push_back(RD::DATA_FORMAT_R8_UNORM); + tf.shareable_formats.push_back(RD::DATA_FORMAT_R8_UINT); + gi_probe->sdf_texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); + } + RID shared_tex; + { + RD::TextureView tv; + tv.format_override = RD::DATA_FORMAT_R8_UINT; + shared_tex = RD::get_singleton()->texture_create_shared(tv, gi_probe->sdf_texture); + } + //update SDF texture + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 1; + u.ids.push_back(gi_probe->octree_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + u.ids.push_back(gi_probe->data_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 3; + u.ids.push_back(shared_tex); + uniforms.push_back(u); + } + + RID uniform_set = RD::get_singleton()->uniform_set_create(uniforms, giprobe_sdf_shader_version_shader, 0); + + { + uint32_t push_constant[4] = { 0, 0, 0, 0 }; + + for (int i = 0; i < gi_probe->level_counts.size() - 1; i++) { + push_constant[0] += gi_probe->level_counts[i]; + } + push_constant[1] = push_constant[0] + gi_probe->level_counts[gi_probe->level_counts.size() - 1]; + + print_line("offset: " + itos(push_constant[0])); + print_line("size: " + itos(push_constant[1])); + //create SDF + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, giprobe_sdf_shader_pipeline); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, push_constant, sizeof(uint32_t) * 4); + RD::get_singleton()->compute_list_dispatch(compute_list, gi_probe->octree_size.x / 4, gi_probe->octree_size.y / 4, gi_probe->octree_size.z / 4); + RD::get_singleton()->compute_list_end(); + } + + RD::get_singleton()->free(uniform_set); + RD::get_singleton()->free(shared_tex); + } +#endif + } + + gi_probe->version++; + gi_probe->data_version++; + + gi_probe->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); +} + +AABB RendererStorageRD::gi_probe_get_bounds(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, AABB()); + + return gi_probe->bounds; +} + +Vector3i RendererStorageRD::gi_probe_get_octree_size(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, Vector3i()); + return gi_probe->octree_size; +} + +Vector<uint8_t> RendererStorageRD::gi_probe_get_octree_cells(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, Vector<uint8_t>()); + + if (gi_probe->octree_buffer.is_valid()) { + return RD::get_singleton()->buffer_get_data(gi_probe->octree_buffer); + } + return Vector<uint8_t>(); +} + +Vector<uint8_t> RendererStorageRD::gi_probe_get_data_cells(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, Vector<uint8_t>()); + + if (gi_probe->data_buffer.is_valid()) { + return RD::get_singleton()->buffer_get_data(gi_probe->data_buffer); + } + return Vector<uint8_t>(); +} + +Vector<uint8_t> RendererStorageRD::gi_probe_get_distance_field(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, Vector<uint8_t>()); + + if (gi_probe->data_buffer.is_valid()) { + return RD::get_singleton()->texture_get_data(gi_probe->sdf_texture, 0); + } + return Vector<uint8_t>(); +} + +Vector<int> RendererStorageRD::gi_probe_get_level_counts(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, Vector<int>()); + + return gi_probe->level_counts; +} + +Transform RendererStorageRD::gi_probe_get_to_cell_xform(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, Transform()); + + return gi_probe->to_cell_xform; +} + +void RendererStorageRD::gi_probe_set_dynamic_range(RID p_gi_probe, float p_range) { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND(!gi_probe); + + gi_probe->dynamic_range = p_range; + gi_probe->version++; +} + +float RendererStorageRD::gi_probe_get_dynamic_range(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, 0); + + return gi_probe->dynamic_range; +} + +void RendererStorageRD::gi_probe_set_propagation(RID p_gi_probe, float p_range) { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND(!gi_probe); + + gi_probe->propagation = p_range; + gi_probe->version++; +} + +float RendererStorageRD::gi_probe_get_propagation(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, 0); + return gi_probe->propagation; +} + +void RendererStorageRD::gi_probe_set_energy(RID p_gi_probe, float p_energy) { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND(!gi_probe); + + gi_probe->energy = p_energy; +} + +float RendererStorageRD::gi_probe_get_energy(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, 0); + return gi_probe->energy; +} + +void RendererStorageRD::gi_probe_set_ao(RID p_gi_probe, float p_ao) { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND(!gi_probe); + + gi_probe->ao = p_ao; +} + +float RendererStorageRD::gi_probe_get_ao(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, 0); + return gi_probe->ao; +} + +void RendererStorageRD::gi_probe_set_ao_size(RID p_gi_probe, float p_strength) { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND(!gi_probe); + + gi_probe->ao_size = p_strength; +} + +float RendererStorageRD::gi_probe_get_ao_size(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, 0); + return gi_probe->ao_size; +} + +void RendererStorageRD::gi_probe_set_bias(RID p_gi_probe, float p_bias) { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND(!gi_probe); + + gi_probe->bias = p_bias; +} + +float RendererStorageRD::gi_probe_get_bias(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, 0); + return gi_probe->bias; +} + +void RendererStorageRD::gi_probe_set_normal_bias(RID p_gi_probe, float p_normal_bias) { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND(!gi_probe); + + gi_probe->normal_bias = p_normal_bias; +} + +float RendererStorageRD::gi_probe_get_normal_bias(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, 0); + return gi_probe->normal_bias; +} + +void RendererStorageRD::gi_probe_set_anisotropy_strength(RID p_gi_probe, float p_strength) { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND(!gi_probe); + + gi_probe->anisotropy_strength = p_strength; +} + +float RendererStorageRD::gi_probe_get_anisotropy_strength(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, 0); + return gi_probe->anisotropy_strength; +} + +void RendererStorageRD::gi_probe_set_interior(RID p_gi_probe, bool p_enable) { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND(!gi_probe); + + gi_probe->interior = p_enable; +} + +void RendererStorageRD::gi_probe_set_use_two_bounces(RID p_gi_probe, bool p_enable) { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND(!gi_probe); + + gi_probe->use_two_bounces = p_enable; + gi_probe->version++; +} + +bool RendererStorageRD::gi_probe_is_using_two_bounces(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, false); + return gi_probe->use_two_bounces; +} + +bool RendererStorageRD::gi_probe_is_interior(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, 0); + return gi_probe->interior; +} + +uint32_t RendererStorageRD::gi_probe_get_version(RID p_gi_probe) { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, 0); + return gi_probe->version; +} + +uint32_t RendererStorageRD::gi_probe_get_data_version(RID p_gi_probe) { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, 0); + return gi_probe->data_version; +} + +RID RendererStorageRD::gi_probe_get_octree_buffer(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, RID()); + return gi_probe->octree_buffer; +} + +RID RendererStorageRD::gi_probe_get_data_buffer(RID p_gi_probe) const { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, RID()); + return gi_probe->data_buffer; +} + +RID RendererStorageRD::gi_probe_get_sdf_texture(RID p_gi_probe) { + GIProbe *gi_probe = gi_probe_owner.getornull(p_gi_probe); + ERR_FAIL_COND_V(!gi_probe, RID()); + + return gi_probe->sdf_texture; +} + +/* LIGHTMAP API */ + +RID RendererStorageRD::lightmap_allocate() { + return lightmap_owner.allocate_rid(); +} + +void RendererStorageRD::lightmap_initialize(RID p_lightmap) { + lightmap_owner.initialize_rid(p_lightmap, Lightmap()); +} + +void RendererStorageRD::lightmap_set_textures(RID p_lightmap, RID p_light, bool p_uses_spherical_haromics) { + Lightmap *lm = lightmap_owner.getornull(p_lightmap); + ERR_FAIL_COND(!lm); + + lightmap_array_version++; + + //erase lightmap users + if (lm->light_texture.is_valid()) { + Texture *t = texture_owner.getornull(lm->light_texture); + if (t) { + t->lightmap_users.erase(p_lightmap); + } + } + + Texture *t = texture_owner.getornull(p_light); + lm->light_texture = p_light; + lm->uses_spherical_harmonics = p_uses_spherical_haromics; + + RID default_2d_array = default_rd_textures[DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE]; + if (!t) { + if (using_lightmap_array) { + if (lm->array_index >= 0) { + lightmap_textures.write[lm->array_index] = default_2d_array; + lm->array_index = -1; + } + } + + return; + } + + t->lightmap_users.insert(p_lightmap); + + if (using_lightmap_array) { + if (lm->array_index < 0) { + //not in array, try to put in array + for (int i = 0; i < lightmap_textures.size(); i++) { + if (lightmap_textures[i] == default_2d_array) { + lm->array_index = i; + break; + } + } + } + ERR_FAIL_COND_MSG(lm->array_index < 0, "Maximum amount of lightmaps in use (" + itos(lightmap_textures.size()) + ") has been exceeded, lightmap will nod display properly."); + + lightmap_textures.write[lm->array_index] = t->rd_texture; + } +} + +void RendererStorageRD::lightmap_set_probe_bounds(RID p_lightmap, const AABB &p_bounds) { + Lightmap *lm = lightmap_owner.getornull(p_lightmap); + ERR_FAIL_COND(!lm); + lm->bounds = p_bounds; +} + +void RendererStorageRD::lightmap_set_probe_interior(RID p_lightmap, bool p_interior) { + Lightmap *lm = lightmap_owner.getornull(p_lightmap); + ERR_FAIL_COND(!lm); + lm->interior = p_interior; +} + +void RendererStorageRD::lightmap_set_probe_capture_data(RID p_lightmap, const PackedVector3Array &p_points, const PackedColorArray &p_point_sh, const PackedInt32Array &p_tetrahedra, const PackedInt32Array &p_bsp_tree) { + Lightmap *lm = lightmap_owner.getornull(p_lightmap); + ERR_FAIL_COND(!lm); + + if (p_points.size()) { + ERR_FAIL_COND(p_points.size() * 9 != p_point_sh.size()); + ERR_FAIL_COND((p_tetrahedra.size() % 4) != 0); + ERR_FAIL_COND((p_bsp_tree.size() % 6) != 0); + } + + lm->points = p_points; + lm->bsp_tree = p_bsp_tree; + lm->point_sh = p_point_sh; + lm->tetrahedra = p_tetrahedra; +} + +PackedVector3Array RendererStorageRD::lightmap_get_probe_capture_points(RID p_lightmap) const { + Lightmap *lm = lightmap_owner.getornull(p_lightmap); + ERR_FAIL_COND_V(!lm, PackedVector3Array()); + + return lm->points; +} + +PackedColorArray RendererStorageRD::lightmap_get_probe_capture_sh(RID p_lightmap) const { + Lightmap *lm = lightmap_owner.getornull(p_lightmap); + ERR_FAIL_COND_V(!lm, PackedColorArray()); + return lm->point_sh; +} + +PackedInt32Array RendererStorageRD::lightmap_get_probe_capture_tetrahedra(RID p_lightmap) const { + Lightmap *lm = lightmap_owner.getornull(p_lightmap); + ERR_FAIL_COND_V(!lm, PackedInt32Array()); + return lm->tetrahedra; +} + +PackedInt32Array RendererStorageRD::lightmap_get_probe_capture_bsp_tree(RID p_lightmap) const { + Lightmap *lm = lightmap_owner.getornull(p_lightmap); + ERR_FAIL_COND_V(!lm, PackedInt32Array()); + return lm->bsp_tree; +} + +void RendererStorageRD::lightmap_set_probe_capture_update_speed(float p_speed) { + lightmap_probe_capture_update_speed = p_speed; +} + +void RendererStorageRD::lightmap_tap_sh_light(RID p_lightmap, const Vector3 &p_point, Color *r_sh) { + Lightmap *lm = lightmap_owner.getornull(p_lightmap); + ERR_FAIL_COND(!lm); + + for (int i = 0; i < 9; i++) { + r_sh[i] = Color(0, 0, 0, 0); + } + + if (!lm->points.size() || !lm->bsp_tree.size() || !lm->tetrahedra.size()) { + return; + } + + static_assert(sizeof(Lightmap::BSP) == 24); + + const Lightmap::BSP *bsp = (const Lightmap::BSP *)lm->bsp_tree.ptr(); + int32_t node = 0; + while (node >= 0) { + if (Plane(bsp[node].plane[0], bsp[node].plane[1], bsp[node].plane[2], bsp[node].plane[3]).is_point_over(p_point)) { +#ifdef DEBUG_ENABLED + ERR_FAIL_COND(bsp[node].over >= 0 && bsp[node].over < node); +#endif + + node = bsp[node].over; + } else { +#ifdef DEBUG_ENABLED + ERR_FAIL_COND(bsp[node].under >= 0 && bsp[node].under < node); +#endif + node = bsp[node].under; + } + } + + if (node == Lightmap::BSP::EMPTY_LEAF) { + return; //nothing could be done + } + + node = ABS(node) - 1; + + uint32_t *tetrahedron = (uint32_t *)&lm->tetrahedra[node * 4]; + Vector3 points[4] = { lm->points[tetrahedron[0]], lm->points[tetrahedron[1]], lm->points[tetrahedron[2]], lm->points[tetrahedron[3]] }; + const Color *sh_colors[4]{ &lm->point_sh[tetrahedron[0] * 9], &lm->point_sh[tetrahedron[1] * 9], &lm->point_sh[tetrahedron[2] * 9], &lm->point_sh[tetrahedron[3] * 9] }; + Color barycentric = Geometry3D::tetrahedron_get_barycentric_coords(points[0], points[1], points[2], points[3], p_point); + + for (int i = 0; i < 4; i++) { + float c = CLAMP(barycentric[i], 0.0, 1.0); + for (int j = 0; j < 9; j++) { + r_sh[j] += sh_colors[i][j] * c; + } + } +} + +bool RendererStorageRD::lightmap_is_interior(RID p_lightmap) const { + const Lightmap *lm = lightmap_owner.getornull(p_lightmap); + ERR_FAIL_COND_V(!lm, false); + return lm->interior; +} + +AABB RendererStorageRD::lightmap_get_aabb(RID p_lightmap) const { + const Lightmap *lm = lightmap_owner.getornull(p_lightmap); + ERR_FAIL_COND_V(!lm, AABB()); + return lm->bounds; +} + +/* RENDER TARGET API */ + +void RendererStorageRD::_clear_render_target(RenderTarget *rt) { + //free in reverse dependency order + if (rt->framebuffer.is_valid()) { + RD::get_singleton()->free(rt->framebuffer); + rt->framebuffer_uniform_set = RID(); //chain deleted + } + + if (rt->color.is_valid()) { + RD::get_singleton()->free(rt->color); + } + + if (rt->backbuffer.is_valid()) { + RD::get_singleton()->free(rt->backbuffer); + rt->backbuffer = RID(); + for (int i = 0; i < rt->backbuffer_mipmaps.size(); i++) { + //just erase copies, since the rest are erased by dependency + RD::get_singleton()->free(rt->backbuffer_mipmaps[i].mipmap_copy); + } + rt->backbuffer_mipmaps.clear(); + rt->backbuffer_uniform_set = RID(); //chain deleted + } + + _render_target_clear_sdf(rt); + + rt->framebuffer = RID(); + rt->color = RID(); +} + +void RendererStorageRD::_update_render_target(RenderTarget *rt) { + if (rt->texture.is_null()) { + //create a placeholder until updated + rt->texture = texture_allocate(); + texture_2d_placeholder_initialize(rt->texture); + Texture *tex = texture_owner.getornull(rt->texture); + tex->is_render_target = true; + } + + _clear_render_target(rt); + + if (rt->size.width == 0 || rt->size.height == 0) { + return; + } + //until we implement support for HDR monitors (and render target is attached to screen), this is enough. + rt->color_format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + rt->color_format_srgb = RD::DATA_FORMAT_R8G8B8A8_SRGB; + rt->image_format = rt->flags[RENDER_TARGET_TRANSPARENT] ? Image::FORMAT_RGBA8 : Image::FORMAT_RGB8; + + RD::TextureFormat rd_format; + RD::TextureView rd_view; + { //attempt register + rd_format.format = rt->color_format; + rd_format.width = rt->size.width; + rd_format.height = rt->size.height; + rd_format.depth = 1; + rd_format.array_layers = 1; + rd_format.mipmaps = 1; + rd_format.texture_type = RD::TEXTURE_TYPE_2D; + rd_format.samples = RD::TEXTURE_SAMPLES_1; + rd_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + rd_format.shareable_formats.push_back(rt->color_format); + rd_format.shareable_formats.push_back(rt->color_format_srgb); + } + + rt->color = RD::get_singleton()->texture_create(rd_format, rd_view); + ERR_FAIL_COND(rt->color.is_null()); + + Vector<RID> fb_textures; + fb_textures.push_back(rt->color); + rt->framebuffer = RD::get_singleton()->framebuffer_create(fb_textures); + if (rt->framebuffer.is_null()) { + _clear_render_target(rt); + ERR_FAIL_COND(rt->framebuffer.is_null()); + } + + { //update texture + + Texture *tex = texture_owner.getornull(rt->texture); + + //free existing textures + if (RD::get_singleton()->texture_is_valid(tex->rd_texture)) { + RD::get_singleton()->free(tex->rd_texture); + } + if (RD::get_singleton()->texture_is_valid(tex->rd_texture_srgb)) { + RD::get_singleton()->free(tex->rd_texture_srgb); + } + + tex->rd_texture = RID(); + tex->rd_texture_srgb = RID(); + + //create shared textures to the color buffer, + //so transparent can be supported + RD::TextureView view; + view.format_override = rt->color_format; + if (!rt->flags[RENDER_TARGET_TRANSPARENT]) { + view.swizzle_a = RD::TEXTURE_SWIZZLE_ONE; + } + tex->rd_texture = RD::get_singleton()->texture_create_shared(view, rt->color); + if (rt->color_format_srgb != RD::DATA_FORMAT_MAX) { + view.format_override = rt->color_format_srgb; + tex->rd_texture_srgb = RD::get_singleton()->texture_create_shared(view, rt->color); + } + tex->rd_view = view; + tex->width = rt->size.width; + tex->height = rt->size.height; + tex->width_2d = rt->size.width; + tex->height_2d = rt->size.height; + tex->rd_format = rt->color_format; + tex->rd_format_srgb = rt->color_format_srgb; + tex->format = rt->image_format; + + Vector<RID> proxies = tex->proxies; //make a copy, since update may change it + for (int i = 0; i < proxies.size(); i++) { + texture_proxy_update(proxies[i], rt->texture); + } + } +} + +void RendererStorageRD::_create_render_target_backbuffer(RenderTarget *rt) { + ERR_FAIL_COND(rt->backbuffer.is_valid()); + + uint32_t mipmaps_required = Image::get_image_required_mipmaps(rt->size.width, rt->size.height, Image::FORMAT_RGBA8); + RD::TextureFormat tf; + tf.format = rt->color_format; + tf.width = rt->size.width; + tf.height = rt->size.height; + tf.texture_type = RD::TEXTURE_TYPE_2D; + tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + tf.mipmaps = mipmaps_required; + + rt->backbuffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rt->backbuffer_mipmap0 = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rt->backbuffer, 0, 0); + + { + Vector<RID> fb_tex; + fb_tex.push_back(rt->backbuffer_mipmap0); + rt->backbuffer_fb = RD::get_singleton()->framebuffer_create(fb_tex); + } + + if (rt->framebuffer_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(rt->framebuffer_uniform_set)) { + //the new one will require the backbuffer. + RD::get_singleton()->free(rt->framebuffer_uniform_set); + rt->framebuffer_uniform_set = RID(); + } + //create mipmaps + for (uint32_t i = 1; i < mipmaps_required; i++) { + RenderTarget::BackbufferMipmap mm; + { + mm.mipmap = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rt->backbuffer, 0, i); + } + + { + Size2 mm_size = Image::get_image_mipmap_size(tf.width, tf.height, Image::FORMAT_RGBA8, i); + + RD::TextureFormat mmtf = tf; + mmtf.width = mm_size.width; + mmtf.height = mm_size.height; + mmtf.mipmaps = 1; + + mm.mipmap_copy = RD::get_singleton()->texture_create(mmtf, RD::TextureView()); + } + + rt->backbuffer_mipmaps.push_back(mm); + } +} + +RID RendererStorageRD::render_target_create() { + RenderTarget render_target; + + render_target.was_used = false; + render_target.clear_requested = false; + + for (int i = 0; i < RENDER_TARGET_FLAG_MAX; i++) { + render_target.flags[i] = false; + } + _update_render_target(&render_target); + return render_target_owner.make_rid(render_target); +} + +void RendererStorageRD::render_target_set_position(RID p_render_target, int p_x, int p_y) { + //unused for this render target +} + +void RendererStorageRD::render_target_set_size(RID p_render_target, int p_width, int p_height) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND(!rt); + rt->size.x = p_width; + rt->size.y = p_height; + _update_render_target(rt); +} + +RID RendererStorageRD::render_target_get_texture(RID p_render_target) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND_V(!rt, RID()); + + return rt->texture; +} + +void RendererStorageRD::render_target_set_external_texture(RID p_render_target, unsigned int p_texture_id) { +} + +void RendererStorageRD::render_target_set_flag(RID p_render_target, RenderTargetFlags p_flag, bool p_value) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND(!rt); + rt->flags[p_flag] = p_value; + _update_render_target(rt); +} + +bool RendererStorageRD::render_target_was_used(RID p_render_target) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND_V(!rt, false); + return rt->was_used; +} + +void RendererStorageRD::render_target_set_as_unused(RID p_render_target) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND(!rt); + rt->was_used = false; +} + +Size2 RendererStorageRD::render_target_get_size(RID p_render_target) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND_V(!rt, Size2()); + + return rt->size; +} + +RID RendererStorageRD::render_target_get_rd_framebuffer(RID p_render_target) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND_V(!rt, RID()); + + return rt->framebuffer; +} + +RID RendererStorageRD::render_target_get_rd_texture(RID p_render_target) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND_V(!rt, RID()); + + return rt->color; +} + +RID RendererStorageRD::render_target_get_rd_backbuffer(RID p_render_target) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND_V(!rt, RID()); + return rt->backbuffer; +} + +RID RendererStorageRD::render_target_get_rd_backbuffer_framebuffer(RID p_render_target) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND_V(!rt, RID()); + + if (!rt->backbuffer.is_valid()) { + _create_render_target_backbuffer(rt); + } + + return rt->backbuffer_fb; +} + +void RendererStorageRD::render_target_request_clear(RID p_render_target, const Color &p_clear_color) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND(!rt); + rt->clear_requested = true; + rt->clear_color = p_clear_color; +} + +bool RendererStorageRD::render_target_is_clear_requested(RID p_render_target) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND_V(!rt, false); + return rt->clear_requested; +} + +Color RendererStorageRD::render_target_get_clear_request_color(RID p_render_target) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND_V(!rt, Color()); + return rt->clear_color; +} + +void RendererStorageRD::render_target_disable_clear_request(RID p_render_target) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND(!rt); + rt->clear_requested = false; +} + +void RendererStorageRD::render_target_do_clear_request(RID p_render_target) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND(!rt); + if (!rt->clear_requested) { + return; + } + Vector<Color> clear_colors; + clear_colors.push_back(rt->clear_color); + RD::get_singleton()->draw_list_begin(rt->framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD, clear_colors); + RD::get_singleton()->draw_list_end(); + rt->clear_requested = false; +} + +void RendererStorageRD::render_target_set_sdf_size_and_scale(RID p_render_target, RS::ViewportSDFOversize p_size, RS::ViewportSDFScale p_scale) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND(!rt); + if (rt->sdf_oversize == p_size && rt->sdf_scale == p_scale) { + return; + } + + rt->sdf_oversize = p_size; + rt->sdf_scale = p_scale; + + _render_target_clear_sdf(rt); +} + +Rect2i RendererStorageRD::_render_target_get_sdf_rect(const RenderTarget *rt) const { + Size2i margin; + int scale; + switch (rt->sdf_oversize) { + case RS::VIEWPORT_SDF_OVERSIZE_100_PERCENT: { + scale = 100; + } break; + case RS::VIEWPORT_SDF_OVERSIZE_120_PERCENT: { + scale = 120; + } break; + case RS::VIEWPORT_SDF_OVERSIZE_150_PERCENT: { + scale = 150; + } break; + case RS::VIEWPORT_SDF_OVERSIZE_200_PERCENT: { + scale = 200; + } break; + default: { + } + } + + margin = (rt->size * scale / 100) - rt->size; + + Rect2i r(Vector2i(), rt->size); + r.position -= margin; + r.size += margin * 2; + + return r; +} + +Rect2i RendererStorageRD::render_target_get_sdf_rect(RID p_render_target) const { + const RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND_V(!rt, Rect2i()); + + return _render_target_get_sdf_rect(rt); +} + +RID RendererStorageRD::render_target_get_sdf_texture(RID p_render_target) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND_V(!rt, RID()); + if (rt->sdf_buffer_read.is_null()) { + // no texture, create a dummy one for the 2D uniform set + RD::TextureFormat tformat; + tformat.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + tformat.width = 4; + tformat.height = 4; + tformat.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT; + tformat.texture_type = RD::TEXTURE_TYPE_2D; + + Vector<uint8_t> pv; + pv.resize(16 * 4); + zeromem(pv.ptrw(), 16 * 4); + Vector<Vector<uint8_t>> vpv; + + rt->sdf_buffer_read = RD::get_singleton()->texture_create(tformat, RD::TextureView(), vpv); + } + + return rt->sdf_buffer_read; +} + +void RendererStorageRD::_render_target_allocate_sdf(RenderTarget *rt) { + ERR_FAIL_COND(rt->sdf_buffer_write_fb.is_valid()); + if (rt->sdf_buffer_read.is_valid()) { + RD::get_singleton()->free(rt->sdf_buffer_read); + rt->sdf_buffer_read = RID(); + } + + Size2i size = _render_target_get_sdf_rect(rt).size; + + RD::TextureFormat tformat; + tformat.format = RD::DATA_FORMAT_R8_UNORM; + tformat.width = size.width; + tformat.height = size.height; + tformat.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + tformat.texture_type = RD::TEXTURE_TYPE_2D; + + rt->sdf_buffer_write = RD::get_singleton()->texture_create(tformat, RD::TextureView()); + + { + Vector<RID> write_fb; + write_fb.push_back(rt->sdf_buffer_write); + rt->sdf_buffer_write_fb = RD::get_singleton()->framebuffer_create(write_fb); + } + + int scale; + switch (rt->sdf_scale) { + case RS::VIEWPORT_SDF_SCALE_100_PERCENT: { + scale = 100; + } break; + case RS::VIEWPORT_SDF_SCALE_50_PERCENT: { + scale = 50; + } break; + case RS::VIEWPORT_SDF_SCALE_25_PERCENT: { + scale = 25; + } break; + default: { + scale = 100; + } break; + } + + rt->process_size = size * scale / 100; + rt->process_size.x = MAX(rt->process_size.x, 1); + rt->process_size.y = MAX(rt->process_size.y, 1); + + tformat.format = RD::DATA_FORMAT_R16G16_UINT; + tformat.width = rt->process_size.width; + tformat.height = rt->process_size.height; + tformat.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT; + + rt->sdf_buffer_process[0] = RD::get_singleton()->texture_create(tformat, RD::TextureView()); + rt->sdf_buffer_process[1] = RD::get_singleton()->texture_create(tformat, RD::TextureView()); + + tformat.format = RD::DATA_FORMAT_R16_UNORM; + tformat.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + + rt->sdf_buffer_read = RD::get_singleton()->texture_create(tformat, RD::TextureView()); + + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.ids.push_back(rt->sdf_buffer_write); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.ids.push_back(rt->sdf_buffer_read); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 3; + u.ids.push_back(rt->sdf_buffer_process[0]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 4; + u.ids.push_back(rt->sdf_buffer_process[1]); + uniforms.push_back(u); + } + + rt->sdf_buffer_process_uniform_sets[0] = RD::get_singleton()->uniform_set_create(uniforms, rt_sdf.shader.version_get_shader(rt_sdf.shader_version, 0), 0); + SWAP(uniforms.write[2].ids.write[0], uniforms.write[3].ids.write[0]); + rt->sdf_buffer_process_uniform_sets[1] = RD::get_singleton()->uniform_set_create(uniforms, rt_sdf.shader.version_get_shader(rt_sdf.shader_version, 0), 0); + } +} + +void RendererStorageRD::_render_target_clear_sdf(RenderTarget *rt) { + if (rt->sdf_buffer_read.is_valid()) { + RD::get_singleton()->free(rt->sdf_buffer_read); + rt->sdf_buffer_read = RID(); + } + if (rt->sdf_buffer_write_fb.is_valid()) { + RD::get_singleton()->free(rt->sdf_buffer_write); + RD::get_singleton()->free(rt->sdf_buffer_process[0]); + RD::get_singleton()->free(rt->sdf_buffer_process[1]); + rt->sdf_buffer_write = RID(); + rt->sdf_buffer_write_fb = RID(); + rt->sdf_buffer_process[0] = RID(); + rt->sdf_buffer_process[1] = RID(); + rt->sdf_buffer_process_uniform_sets[0] = RID(); + rt->sdf_buffer_process_uniform_sets[1] = RID(); + } +} + +RID RendererStorageRD::render_target_get_sdf_framebuffer(RID p_render_target) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND_V(!rt, RID()); + + if (rt->sdf_buffer_write_fb.is_null()) { + _render_target_allocate_sdf(rt); + } + + return rt->sdf_buffer_write_fb; +} +void RendererStorageRD::render_target_sdf_process(RID p_render_target) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND(!rt); + ERR_FAIL_COND(rt->sdf_buffer_write_fb.is_null()); + + RenderTargetSDF::PushConstant push_constant; + + Rect2i r = _render_target_get_sdf_rect(rt); + + push_constant.size[0] = r.size.width; + push_constant.size[1] = r.size.height; + push_constant.stride = 0; + push_constant.shift = 0; + push_constant.base_size[0] = r.size.width; + push_constant.base_size[1] = r.size.height; + + bool shrink = false; + + switch (rt->sdf_scale) { + case RS::VIEWPORT_SDF_SCALE_50_PERCENT: { + push_constant.size[0] >>= 1; + push_constant.size[1] >>= 1; + push_constant.shift = 1; + shrink = true; + } break; + case RS::VIEWPORT_SDF_SCALE_25_PERCENT: { + push_constant.size[0] >>= 2; + push_constant.size[1] >>= 2; + push_constant.shift = 2; + shrink = true; + } break; + default: { + }; + } + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + /* Load */ + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, rt_sdf.pipelines[shrink ? RenderTargetSDF::SHADER_LOAD_SHRINK : RenderTargetSDF::SHADER_LOAD]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rt->sdf_buffer_process_uniform_sets[1], 0); //fill [0] + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RenderTargetSDF::PushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1); + + /* Process */ + + int stride = nearest_power_of_2_templated(MAX(push_constant.size[0], push_constant.size[1]) / 2); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, rt_sdf.pipelines[RenderTargetSDF::SHADER_PROCESS]); + + RD::get_singleton()->compute_list_add_barrier(compute_list); + bool swap = false; + + //jumpflood + while (stride > 0) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rt->sdf_buffer_process_uniform_sets[swap ? 1 : 0], 0); + push_constant.stride = stride; + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RenderTargetSDF::PushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1); + stride /= 2; + swap = !swap; + RD::get_singleton()->compute_list_add_barrier(compute_list); + } + + /* Store */ + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, rt_sdf.pipelines[shrink ? RenderTargetSDF::SHADER_STORE_SHRINK : RenderTargetSDF::SHADER_STORE]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rt->sdf_buffer_process_uniform_sets[swap ? 1 : 0], 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RenderTargetSDF::PushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1); + + RD::get_singleton()->compute_list_end(); +} + +void RendererStorageRD::render_target_copy_to_back_buffer(RID p_render_target, const Rect2i &p_region, bool p_gen_mipmaps) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND(!rt); + if (!rt->backbuffer.is_valid()) { + _create_render_target_backbuffer(rt); + } + + Rect2i region; + if (p_region == Rect2i()) { + region.size = rt->size; + } else { + region = Rect2i(Size2i(), rt->size).intersection(p_region); + if (region.size == Size2i()) { + return; //nothing to do + } + } + + //single texture copy for backbuffer + //RD::get_singleton()->texture_copy(rt->color, rt->backbuffer_mipmap0, Vector3(region.position.x, region.position.y, 0), Vector3(region.position.x, region.position.y, 0), Vector3(region.size.x, region.size.y, 1), 0, 0, 0, 0, true); + effects.copy_to_rect(rt->color, rt->backbuffer_mipmap0, region, false, false, false, true, true); + + if (!p_gen_mipmaps) { + return; + } + + //then mipmap blur + RID prev_texture = rt->color; //use color, not backbuffer, as bb has mipmaps. + + for (int i = 0; i < rt->backbuffer_mipmaps.size(); i++) { + region.position.x >>= 1; + region.position.y >>= 1; + region.size.x = MAX(1, region.size.x >> 1); + region.size.y = MAX(1, region.size.y >> 1); + + const RenderTarget::BackbufferMipmap &mm = rt->backbuffer_mipmaps[i]; + effects.gaussian_blur(prev_texture, mm.mipmap, mm.mipmap_copy, region, true); + prev_texture = mm.mipmap; + } +} + +void RendererStorageRD::render_target_clear_back_buffer(RID p_render_target, const Rect2i &p_region, const Color &p_color) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND(!rt); + if (!rt->backbuffer.is_valid()) { + _create_render_target_backbuffer(rt); + } + + Rect2i region; + if (p_region == Rect2i()) { + region.size = rt->size; + } else { + region = Rect2i(Size2i(), rt->size).intersection(p_region); + if (region.size == Size2i()) { + return; //nothing to do + } + } + + //single texture copy for backbuffer + effects.set_color(rt->backbuffer_mipmap0, p_color, region, true); +} + +void RendererStorageRD::render_target_gen_back_buffer_mipmaps(RID p_render_target, const Rect2i &p_region) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND(!rt); + if (!rt->backbuffer.is_valid()) { + _create_render_target_backbuffer(rt); + } + + Rect2i region; + if (p_region == Rect2i()) { + region.size = rt->size; + } else { + region = Rect2i(Size2i(), rt->size).intersection(p_region); + if (region.size == Size2i()) { + return; //nothing to do + } + } + + //then mipmap blur + RID prev_texture = rt->backbuffer_mipmap0; + + for (int i = 0; i < rt->backbuffer_mipmaps.size(); i++) { + region.position.x >>= 1; + region.position.y >>= 1; + region.size.x = MAX(1, region.size.x >> 1); + region.size.y = MAX(1, region.size.y >> 1); + + const RenderTarget::BackbufferMipmap &mm = rt->backbuffer_mipmaps[i]; + effects.gaussian_blur(prev_texture, mm.mipmap, mm.mipmap_copy, region, true); + prev_texture = mm.mipmap; + } +} + +RID RendererStorageRD::render_target_get_framebuffer_uniform_set(RID p_render_target) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND_V(!rt, RID()); + return rt->framebuffer_uniform_set; +} +RID RendererStorageRD::render_target_get_backbuffer_uniform_set(RID p_render_target) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND_V(!rt, RID()); + return rt->backbuffer_uniform_set; +} + +void RendererStorageRD::render_target_set_framebuffer_uniform_set(RID p_render_target, RID p_uniform_set) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND(!rt); + rt->framebuffer_uniform_set = p_uniform_set; +} +void RendererStorageRD::render_target_set_backbuffer_uniform_set(RID p_render_target, RID p_uniform_set) { + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND(!rt); + rt->backbuffer_uniform_set = p_uniform_set; +} + +void RendererStorageRD::base_update_dependency(RID p_base, DependencyTracker *p_instance) { + if (mesh_owner.owns(p_base)) { + Mesh *mesh = mesh_owner.getornull(p_base); + p_instance->update_dependency(&mesh->dependency); + } else if (multimesh_owner.owns(p_base)) { + MultiMesh *multimesh = multimesh_owner.getornull(p_base); + p_instance->update_dependency(&multimesh->dependency); + if (multimesh->mesh.is_valid()) { + base_update_dependency(multimesh->mesh, p_instance); + } + } else if (reflection_probe_owner.owns(p_base)) { + ReflectionProbe *rp = reflection_probe_owner.getornull(p_base); + p_instance->update_dependency(&rp->dependency); + } else if (decal_owner.owns(p_base)) { + Decal *decal = decal_owner.getornull(p_base); + p_instance->update_dependency(&decal->dependency); + } else if (gi_probe_owner.owns(p_base)) { + GIProbe *gip = gi_probe_owner.getornull(p_base); + p_instance->update_dependency(&gip->dependency); + } else if (lightmap_owner.owns(p_base)) { + Lightmap *lm = lightmap_owner.getornull(p_base); + p_instance->update_dependency(&lm->dependency); + } else if (light_owner.owns(p_base)) { + Light *l = light_owner.getornull(p_base); + p_instance->update_dependency(&l->dependency); + } else if (particles_owner.owns(p_base)) { + Particles *p = particles_owner.getornull(p_base); + p_instance->update_dependency(&p->dependency); + } else if (particles_collision_owner.owns(p_base)) { + ParticlesCollision *pc = particles_collision_owner.getornull(p_base); + p_instance->update_dependency(&pc->dependency); + } +} + +void RendererStorageRD::skeleton_update_dependency(RID p_skeleton, DependencyTracker *p_instance) { + Skeleton *skeleton = skeleton_owner.getornull(p_skeleton); + ERR_FAIL_COND(!skeleton); + + p_instance->update_dependency(&skeleton->dependency); +} + +RS::InstanceType RendererStorageRD::get_base_type(RID p_rid) const { + if (mesh_owner.owns(p_rid)) { + return RS::INSTANCE_MESH; + } + if (multimesh_owner.owns(p_rid)) { + return RS::INSTANCE_MULTIMESH; + } + if (reflection_probe_owner.owns(p_rid)) { + return RS::INSTANCE_REFLECTION_PROBE; + } + if (decal_owner.owns(p_rid)) { + return RS::INSTANCE_DECAL; + } + if (gi_probe_owner.owns(p_rid)) { + return RS::INSTANCE_GI_PROBE; + } + if (light_owner.owns(p_rid)) { + return RS::INSTANCE_LIGHT; + } + if (lightmap_owner.owns(p_rid)) { + return RS::INSTANCE_LIGHTMAP; + } + if (particles_owner.owns(p_rid)) { + return RS::INSTANCE_PARTICLES; + } + if (particles_collision_owner.owns(p_rid)) { + return RS::INSTANCE_PARTICLES_COLLISION; + } + + return RS::INSTANCE_NONE; +} + +void RendererStorageRD::texture_add_to_decal_atlas(RID p_texture, bool p_panorama_to_dp) { + if (!decal_atlas.textures.has(p_texture)) { + DecalAtlas::Texture t; + t.users = 1; + t.panorama_to_dp_users = p_panorama_to_dp ? 1 : 0; + decal_atlas.textures[p_texture] = t; + decal_atlas.dirty = true; + } else { + DecalAtlas::Texture *t = decal_atlas.textures.getptr(p_texture); + t->users++; + if (p_panorama_to_dp) { + t->panorama_to_dp_users++; + } + } +} + +void RendererStorageRD::texture_remove_from_decal_atlas(RID p_texture, bool p_panorama_to_dp) { + DecalAtlas::Texture *t = decal_atlas.textures.getptr(p_texture); + ERR_FAIL_COND(!t); + t->users--; + if (p_panorama_to_dp) { + ERR_FAIL_COND(t->panorama_to_dp_users == 0); + t->panorama_to_dp_users--; + } + if (t->users == 0) { + decal_atlas.textures.erase(p_texture); + //do not mark it dirty, there is no need to since it remains working + } +} + +RID RendererStorageRD::decal_atlas_get_texture() const { + return decal_atlas.texture; +} + +RID RendererStorageRD::decal_atlas_get_texture_srgb() const { + return decal_atlas.texture_srgb; +} + +void RendererStorageRD::_update_decal_atlas() { + if (!decal_atlas.dirty) { + return; //nothing to do + } + + decal_atlas.dirty = false; + + if (decal_atlas.texture.is_valid()) { + RD::get_singleton()->free(decal_atlas.texture); + decal_atlas.texture = RID(); + decal_atlas.texture_srgb = RID(); + decal_atlas.texture_mipmaps.clear(); + } + + int border = 1 << decal_atlas.mipmaps; + + if (decal_atlas.textures.size()) { + //generate atlas + Vector<DecalAtlas::SortItem> itemsv; + itemsv.resize(decal_atlas.textures.size()); + int base_size = 8; + const RID *K = nullptr; + + int idx = 0; + while ((K = decal_atlas.textures.next(K))) { + DecalAtlas::SortItem &si = itemsv.write[idx]; + + Texture *src_tex = texture_owner.getornull(*K); + + si.size.width = (src_tex->width / border) + 1; + si.size.height = (src_tex->height / border) + 1; + si.pixel_size = Size2i(src_tex->width, src_tex->height); + + if (base_size < si.size.width) { + base_size = nearest_power_of_2_templated(si.size.width); + } + + si.texture = *K; + idx++; + } + + //sort items by size + itemsv.sort(); + + //attempt to create atlas + int item_count = itemsv.size(); + DecalAtlas::SortItem *items = itemsv.ptrw(); + + int atlas_height = 0; + + while (true) { + Vector<int> v_offsetsv; + v_offsetsv.resize(base_size); + + int *v_offsets = v_offsetsv.ptrw(); + zeromem(v_offsets, sizeof(int) * base_size); + + int max_height = 0; + + for (int i = 0; i < item_count; i++) { + //best fit + DecalAtlas::SortItem &si = items[i]; + int best_idx = -1; + int best_height = 0x7FFFFFFF; + for (int j = 0; j <= base_size - si.size.width; j++) { + int height = 0; + for (int k = 0; k < si.size.width; k++) { + int h = v_offsets[k + j]; + if (h > height) { + height = h; + if (height > best_height) { + break; //already bad + } + } + } + + if (height < best_height) { + best_height = height; + best_idx = j; + } + } + + //update + for (int k = 0; k < si.size.width; k++) { + v_offsets[k + best_idx] = best_height + si.size.height; + } + + si.pos.x = best_idx; + si.pos.y = best_height; + + if (si.pos.y + si.size.height > max_height) { + max_height = si.pos.y + si.size.height; + } + } + + if (max_height <= base_size * 2) { + atlas_height = max_height; + break; //good ratio, break; + } + + base_size *= 2; + } + + decal_atlas.size.width = base_size * border; + decal_atlas.size.height = nearest_power_of_2_templated(atlas_height * border); + + for (int i = 0; i < item_count; i++) { + DecalAtlas::Texture *t = decal_atlas.textures.getptr(items[i].texture); + t->uv_rect.position = items[i].pos * border + Vector2i(border / 2, border / 2); + t->uv_rect.size = items[i].pixel_size; + + t->uv_rect.position /= Size2(decal_atlas.size); + t->uv_rect.size /= Size2(decal_atlas.size); + } + } else { + //use border as size, so it at least has enough mipmaps + decal_atlas.size.width = border; + decal_atlas.size.height = border; + } + + //blit textures + + RD::TextureFormat tformat; + tformat.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + tformat.width = decal_atlas.size.width; + tformat.height = decal_atlas.size.height; + tformat.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + tformat.texture_type = RD::TEXTURE_TYPE_2D; + tformat.mipmaps = decal_atlas.mipmaps; + tformat.shareable_formats.push_back(RD::DATA_FORMAT_R8G8B8A8_UNORM); + tformat.shareable_formats.push_back(RD::DATA_FORMAT_R8G8B8A8_SRGB); + + decal_atlas.texture = RD::get_singleton()->texture_create(tformat, RD::TextureView()); + RD::get_singleton()->texture_clear(decal_atlas.texture, Color(0, 0, 0, 0), 0, decal_atlas.mipmaps, 0, 1); + + { + //create the framebuffer + + Size2i s = decal_atlas.size; + + for (int i = 0; i < decal_atlas.mipmaps; i++) { + DecalAtlas::MipMap mm; + mm.texture = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), decal_atlas.texture, 0, i); + Vector<RID> fb; + fb.push_back(mm.texture); + mm.fb = RD::get_singleton()->framebuffer_create(fb); + mm.size = s; + decal_atlas.texture_mipmaps.push_back(mm); + + s.width = MAX(1, s.width >> 1); + s.height = MAX(1, s.height >> 1); + } + { + //create the SRGB variant + RD::TextureView rd_view; + rd_view.format_override = RD::DATA_FORMAT_R8G8B8A8_SRGB; + decal_atlas.texture_srgb = RD::get_singleton()->texture_create_shared(rd_view, decal_atlas.texture); + } + } + + RID prev_texture; + for (int i = 0; i < decal_atlas.texture_mipmaps.size(); i++) { + const DecalAtlas::MipMap &mm = decal_atlas.texture_mipmaps[i]; + + Color clear_color(0, 0, 0, 0); + + if (decal_atlas.textures.size()) { + if (i == 0) { + Vector<Color> cc; + cc.push_back(clear_color); + + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(mm.fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, cc); + + const RID *K = nullptr; + while ((K = decal_atlas.textures.next(K))) { + DecalAtlas::Texture *t = decal_atlas.textures.getptr(*K); + Texture *src_tex = texture_owner.getornull(*K); + effects.copy_to_atlas_fb(src_tex->rd_texture, mm.fb, t->uv_rect, draw_list, false, t->panorama_to_dp_users > 0); + } + + RD::get_singleton()->draw_list_end(); + + prev_texture = mm.texture; + } else { + effects.copy_to_fb_rect(prev_texture, mm.fb, Rect2i(Point2i(), mm.size)); + prev_texture = mm.texture; + } + } else { + RD::get_singleton()->texture_clear(mm.texture, clear_color, 0, 1, 0, 1); + } + } +} + +int32_t RendererStorageRD::_global_variable_allocate(uint32_t p_elements) { + int32_t idx = 0; + while (idx + p_elements <= global_variables.buffer_size) { + if (global_variables.buffer_usage[idx].elements == 0) { + bool valid = true; + for (uint32_t i = 1; i < p_elements; i++) { + if (global_variables.buffer_usage[idx + i].elements > 0) { + valid = false; + idx += i + global_variables.buffer_usage[idx + i].elements; + break; + } + } + + if (!valid) { + continue; //if not valid, idx is in new position + } + + return idx; + } else { + idx += global_variables.buffer_usage[idx].elements; + } + } + + return -1; +} + +void RendererStorageRD::_global_variable_store_in_buffer(int32_t p_index, RS::GlobalVariableType p_type, const Variant &p_value) { + switch (p_type) { + case RS::GLOBAL_VAR_TYPE_BOOL: { + GlobalVariables::Value &bv = global_variables.buffer_values[p_index]; + bool b = p_value; + bv.x = b ? 1.0 : 0.0; + bv.y = 0.0; + bv.z = 0.0; + bv.w = 0.0; + + } break; + case RS::GLOBAL_VAR_TYPE_BVEC2: { + GlobalVariables::Value &bv = global_variables.buffer_values[p_index]; + uint32_t bvec = p_value; + bv.x = (bvec & 1) ? 1.0 : 0.0; + bv.y = (bvec & 2) ? 1.0 : 0.0; + bv.z = 0.0; + bv.w = 0.0; + } break; + case RS::GLOBAL_VAR_TYPE_BVEC3: { + GlobalVariables::Value &bv = global_variables.buffer_values[p_index]; + uint32_t bvec = p_value; + bv.x = (bvec & 1) ? 1.0 : 0.0; + bv.y = (bvec & 2) ? 1.0 : 0.0; + bv.z = (bvec & 4) ? 1.0 : 0.0; + bv.w = 0.0; + } break; + case RS::GLOBAL_VAR_TYPE_BVEC4: { + GlobalVariables::Value &bv = global_variables.buffer_values[p_index]; + uint32_t bvec = p_value; + bv.x = (bvec & 1) ? 1.0 : 0.0; + bv.y = (bvec & 2) ? 1.0 : 0.0; + bv.z = (bvec & 4) ? 1.0 : 0.0; + bv.w = (bvec & 8) ? 1.0 : 0.0; + } break; + case RS::GLOBAL_VAR_TYPE_INT: { + GlobalVariables::ValueInt &bv = *(GlobalVariables::ValueInt *)&global_variables.buffer_values[p_index]; + int32_t v = p_value; + bv.x = v; + bv.y = 0; + bv.z = 0; + bv.w = 0; + } break; + case RS::GLOBAL_VAR_TYPE_IVEC2: { + GlobalVariables::ValueInt &bv = *(GlobalVariables::ValueInt *)&global_variables.buffer_values[p_index]; + Vector2i v = p_value; + bv.x = v.x; + bv.y = v.y; + bv.z = 0; + bv.w = 0; + } break; + case RS::GLOBAL_VAR_TYPE_IVEC3: { + GlobalVariables::ValueInt &bv = *(GlobalVariables::ValueInt *)&global_variables.buffer_values[p_index]; + Vector3i v = p_value; + bv.x = v.x; + bv.y = v.y; + bv.z = v.z; + bv.w = 0; + } break; + case RS::GLOBAL_VAR_TYPE_IVEC4: { + GlobalVariables::ValueInt &bv = *(GlobalVariables::ValueInt *)&global_variables.buffer_values[p_index]; + Vector<int32_t> v = p_value; + bv.x = v.size() >= 1 ? v[0] : 0; + bv.y = v.size() >= 2 ? v[1] : 0; + bv.z = v.size() >= 3 ? v[2] : 0; + bv.w = v.size() >= 4 ? v[3] : 0; + } break; + case RS::GLOBAL_VAR_TYPE_RECT2I: { + GlobalVariables::ValueInt &bv = *(GlobalVariables::ValueInt *)&global_variables.buffer_values[p_index]; + Rect2i v = p_value; + bv.x = v.position.x; + bv.y = v.position.y; + bv.z = v.size.x; + bv.w = v.size.y; + } break; + case RS::GLOBAL_VAR_TYPE_UINT: { + GlobalVariables::ValueUInt &bv = *(GlobalVariables::ValueUInt *)&global_variables.buffer_values[p_index]; + uint32_t v = p_value; + bv.x = v; + bv.y = 0; + bv.z = 0; + bv.w = 0; + } break; + case RS::GLOBAL_VAR_TYPE_UVEC2: { + GlobalVariables::ValueUInt &bv = *(GlobalVariables::ValueUInt *)&global_variables.buffer_values[p_index]; + Vector2i v = p_value; + bv.x = v.x; + bv.y = v.y; + bv.z = 0; + bv.w = 0; + } break; + case RS::GLOBAL_VAR_TYPE_UVEC3: { + GlobalVariables::ValueUInt &bv = *(GlobalVariables::ValueUInt *)&global_variables.buffer_values[p_index]; + Vector3i v = p_value; + bv.x = v.x; + bv.y = v.y; + bv.z = v.z; + bv.w = 0; + } break; + case RS::GLOBAL_VAR_TYPE_UVEC4: { + GlobalVariables::ValueUInt &bv = *(GlobalVariables::ValueUInt *)&global_variables.buffer_values[p_index]; + Vector<int32_t> v = p_value; + bv.x = v.size() >= 1 ? v[0] : 0; + bv.y = v.size() >= 2 ? v[1] : 0; + bv.z = v.size() >= 3 ? v[2] : 0; + bv.w = v.size() >= 4 ? v[3] : 0; + } break; + case RS::GLOBAL_VAR_TYPE_FLOAT: { + GlobalVariables::Value &bv = global_variables.buffer_values[p_index]; + float v = p_value; + bv.x = v; + bv.y = 0; + bv.z = 0; + bv.w = 0; + } break; + case RS::GLOBAL_VAR_TYPE_VEC2: { + GlobalVariables::Value &bv = global_variables.buffer_values[p_index]; + Vector2 v = p_value; + bv.x = v.x; + bv.y = v.y; + bv.z = 0; + bv.w = 0; + } break; + case RS::GLOBAL_VAR_TYPE_VEC3: { + GlobalVariables::Value &bv = global_variables.buffer_values[p_index]; + Vector3 v = p_value; + bv.x = v.x; + bv.y = v.y; + bv.z = v.z; + bv.w = 0; + } break; + case RS::GLOBAL_VAR_TYPE_VEC4: { + GlobalVariables::Value &bv = global_variables.buffer_values[p_index]; + Plane v = p_value; + bv.x = v.normal.x; + bv.y = v.normal.y; + bv.z = v.normal.z; + bv.w = v.d; + } break; + case RS::GLOBAL_VAR_TYPE_COLOR: { + GlobalVariables::Value &bv = global_variables.buffer_values[p_index]; + Color v = p_value; + bv.x = v.r; + bv.y = v.g; + bv.z = v.b; + bv.w = v.a; + + GlobalVariables::Value &bv_linear = global_variables.buffer_values[p_index + 1]; + v = v.to_linear(); + bv_linear.x = v.r; + bv_linear.y = v.g; + bv_linear.z = v.b; + bv_linear.w = v.a; + + } break; + case RS::GLOBAL_VAR_TYPE_RECT2: { + GlobalVariables::Value &bv = global_variables.buffer_values[p_index]; + Rect2 v = p_value; + bv.x = v.position.x; + bv.y = v.position.y; + bv.z = v.size.x; + bv.w = v.size.y; + } break; + case RS::GLOBAL_VAR_TYPE_MAT2: { + GlobalVariables::Value *bv = &global_variables.buffer_values[p_index]; + Vector<float> m2 = p_value; + if (m2.size() < 4) { + m2.resize(4); + } + bv[0].x = m2[0]; + bv[0].y = m2[1]; + bv[0].z = 0; + bv[0].w = 0; + + bv[1].x = m2[2]; + bv[1].y = m2[3]; + bv[1].z = 0; + bv[1].w = 0; + + } break; + case RS::GLOBAL_VAR_TYPE_MAT3: { + GlobalVariables::Value *bv = &global_variables.buffer_values[p_index]; + Basis v = p_value; + bv[0].x = v.elements[0][0]; + bv[0].y = v.elements[1][0]; + bv[0].z = v.elements[2][0]; + bv[0].w = 0; + + bv[1].x = v.elements[0][1]; + bv[1].y = v.elements[1][1]; + bv[1].z = v.elements[2][1]; + bv[1].w = 0; + + bv[2].x = v.elements[0][2]; + bv[2].y = v.elements[1][2]; + bv[2].z = v.elements[2][2]; + bv[2].w = 0; + + } break; + case RS::GLOBAL_VAR_TYPE_MAT4: { + GlobalVariables::Value *bv = &global_variables.buffer_values[p_index]; + + Vector<float> m2 = p_value; + if (m2.size() < 16) { + m2.resize(16); + } + + bv[0].x = m2[0]; + bv[0].y = m2[1]; + bv[0].z = m2[2]; + bv[0].w = m2[3]; + + bv[1].x = m2[4]; + bv[1].y = m2[5]; + bv[1].z = m2[6]; + bv[1].w = m2[7]; + + bv[2].x = m2[8]; + bv[2].y = m2[9]; + bv[2].z = m2[10]; + bv[2].w = m2[11]; + + bv[3].x = m2[12]; + bv[3].y = m2[13]; + bv[3].z = m2[14]; + bv[3].w = m2[15]; + + } break; + case RS::GLOBAL_VAR_TYPE_TRANSFORM_2D: { + GlobalVariables::Value *bv = &global_variables.buffer_values[p_index]; + Transform2D v = p_value; + bv[0].x = v.elements[0][0]; + bv[0].y = v.elements[0][1]; + bv[0].z = 0; + bv[0].w = 0; + + bv[1].x = v.elements[1][0]; + bv[1].y = v.elements[1][1]; + bv[1].z = 0; + bv[1].w = 0; + + bv[2].x = v.elements[2][0]; + bv[2].y = v.elements[2][1]; + bv[2].z = 1; + bv[2].w = 0; + + } break; + case RS::GLOBAL_VAR_TYPE_TRANSFORM: { + GlobalVariables::Value *bv = &global_variables.buffer_values[p_index]; + Transform v = p_value; + bv[0].x = v.basis.elements[0][0]; + bv[0].y = v.basis.elements[1][0]; + bv[0].z = v.basis.elements[2][0]; + bv[0].w = 0; + + bv[1].x = v.basis.elements[0][1]; + bv[1].y = v.basis.elements[1][1]; + bv[1].z = v.basis.elements[2][1]; + bv[1].w = 0; + + bv[2].x = v.basis.elements[0][2]; + bv[2].y = v.basis.elements[1][2]; + bv[2].z = v.basis.elements[2][2]; + bv[2].w = 0; + + bv[3].x = v.origin.x; + bv[3].y = v.origin.y; + bv[3].z = v.origin.z; + bv[3].w = 1; + + } break; + default: { + ERR_FAIL(); + } + } +} + +void RendererStorageRD::_global_variable_mark_buffer_dirty(int32_t p_index, int32_t p_elements) { + int32_t prev_chunk = -1; + + for (int32_t i = 0; i < p_elements; i++) { + int32_t chunk = (p_index + i) / GlobalVariables::BUFFER_DIRTY_REGION_SIZE; + if (chunk != prev_chunk) { + if (!global_variables.buffer_dirty_regions[chunk]) { + global_variables.buffer_dirty_regions[chunk] = true; + global_variables.buffer_dirty_region_count++; + } + } + + prev_chunk = chunk; + } +} + +void RendererStorageRD::global_variable_add(const StringName &p_name, RS::GlobalVariableType p_type, const Variant &p_value) { + ERR_FAIL_COND(global_variables.variables.has(p_name)); + GlobalVariables::Variable gv; + gv.type = p_type; + gv.value = p_value; + gv.buffer_index = -1; + + if (p_type >= RS::GLOBAL_VAR_TYPE_SAMPLER2D) { + //is texture + global_variables.must_update_texture_materials = true; //normally there are none + } else { + gv.buffer_elements = 1; + if (p_type == RS::GLOBAL_VAR_TYPE_COLOR || p_type == RS::GLOBAL_VAR_TYPE_MAT2) { + //color needs to elements to store srgb and linear + gv.buffer_elements = 2; + } + if (p_type == RS::GLOBAL_VAR_TYPE_MAT3 || p_type == RS::GLOBAL_VAR_TYPE_TRANSFORM_2D) { + //color needs to elements to store srgb and linear + gv.buffer_elements = 3; + } + if (p_type == RS::GLOBAL_VAR_TYPE_MAT4 || p_type == RS::GLOBAL_VAR_TYPE_TRANSFORM) { + //color needs to elements to store srgb and linear + gv.buffer_elements = 4; + } + + //is vector, allocate in buffer and update index + gv.buffer_index = _global_variable_allocate(gv.buffer_elements); + ERR_FAIL_COND_MSG(gv.buffer_index < 0, vformat("Failed allocating global variable '%s' out of buffer memory. Consider increasing it in the Project Settings.", String(p_name))); + global_variables.buffer_usage[gv.buffer_index].elements = gv.buffer_elements; + _global_variable_store_in_buffer(gv.buffer_index, gv.type, gv.value); + _global_variable_mark_buffer_dirty(gv.buffer_index, gv.buffer_elements); + + global_variables.must_update_buffer_materials = true; //normally there are none + } + + global_variables.variables[p_name] = gv; +} + +void RendererStorageRD::global_variable_remove(const StringName &p_name) { + if (!global_variables.variables.has(p_name)) { + return; + } + GlobalVariables::Variable &gv = global_variables.variables[p_name]; + + if (gv.buffer_index >= 0) { + global_variables.buffer_usage[gv.buffer_index].elements = 0; + global_variables.must_update_buffer_materials = true; + } else { + global_variables.must_update_texture_materials = true; + } + + global_variables.variables.erase(p_name); +} + +Vector<StringName> RendererStorageRD::global_variable_get_list() const { + if (!Engine::get_singleton()->is_editor_hint()) { + ERR_FAIL_V_MSG(Vector<StringName>(), "This function should never be used outside the editor, it can severely damage performance."); + } + + const StringName *K = nullptr; + Vector<StringName> names; + while ((K = global_variables.variables.next(K))) { + names.push_back(*K); + } + names.sort_custom<StringName::AlphCompare>(); + return names; +} + +void RendererStorageRD::global_variable_set(const StringName &p_name, const Variant &p_value) { + ERR_FAIL_COND(!global_variables.variables.has(p_name)); + GlobalVariables::Variable &gv = global_variables.variables[p_name]; + gv.value = p_value; + if (gv.override.get_type() == Variant::NIL) { + if (gv.buffer_index >= 0) { + //buffer + _global_variable_store_in_buffer(gv.buffer_index, gv.type, gv.value); + _global_variable_mark_buffer_dirty(gv.buffer_index, gv.buffer_elements); + } else { + //texture + for (Set<RID>::Element *E = gv.texture_materials.front(); E; E = E->next()) { + Material *material = material_owner.getornull(E->get()); + ERR_CONTINUE(!material); + _material_queue_update(material, false, true); + } + } + } +} + +void RendererStorageRD::global_variable_set_override(const StringName &p_name, const Variant &p_value) { + if (!global_variables.variables.has(p_name)) { + return; //variable may not exist + } + GlobalVariables::Variable &gv = global_variables.variables[p_name]; + + gv.override = p_value; + + if (gv.buffer_index >= 0) { + //buffer + if (gv.override.get_type() == Variant::NIL) { + _global_variable_store_in_buffer(gv.buffer_index, gv.type, gv.value); + } else { + _global_variable_store_in_buffer(gv.buffer_index, gv.type, gv.override); + } + + _global_variable_mark_buffer_dirty(gv.buffer_index, gv.buffer_elements); + } else { + //texture + //texture + for (Set<RID>::Element *E = gv.texture_materials.front(); E; E = E->next()) { + Material *material = material_owner.getornull(E->get()); + ERR_CONTINUE(!material); + _material_queue_update(material, false, true); + } + } +} + +Variant RendererStorageRD::global_variable_get(const StringName &p_name) const { + if (!Engine::get_singleton()->is_editor_hint()) { + ERR_FAIL_V_MSG(Variant(), "This function should never be used outside the editor, it can severely damage performance."); + } + + if (!global_variables.variables.has(p_name)) { + return Variant(); + } + + return global_variables.variables[p_name].value; +} + +RS::GlobalVariableType RendererStorageRD::global_variable_get_type_internal(const StringName &p_name) const { + if (!global_variables.variables.has(p_name)) { + return RS::GLOBAL_VAR_TYPE_MAX; + } + + return global_variables.variables[p_name].type; +} + +RS::GlobalVariableType RendererStorageRD::global_variable_get_type(const StringName &p_name) const { + if (!Engine::get_singleton()->is_editor_hint()) { + ERR_FAIL_V_MSG(RS::GLOBAL_VAR_TYPE_MAX, "This function should never be used outside the editor, it can severely damage performance."); + } + + return global_variable_get_type_internal(p_name); +} + +void RendererStorageRD::global_variables_load_settings(bool p_load_textures) { + List<PropertyInfo> settings; + ProjectSettings::get_singleton()->get_property_list(&settings); + + for (List<PropertyInfo>::Element *E = settings.front(); E; E = E->next()) { + if (E->get().name.begins_with("shader_globals/")) { + StringName name = E->get().name.get_slice("/", 1); + Dictionary d = ProjectSettings::get_singleton()->get(E->get().name); + + ERR_CONTINUE(!d.has("type")); + ERR_CONTINUE(!d.has("value")); + + String type = d["type"]; + + static const char *global_var_type_names[RS::GLOBAL_VAR_TYPE_MAX] = { + "bool", + "bvec2", + "bvec3", + "bvec4", + "int", + "ivec2", + "ivec3", + "ivec4", + "rect2i", + "uint", + "uvec2", + "uvec3", + "uvec4", + "float", + "vec2", + "vec3", + "vec4", + "color", + "rect2", + "mat2", + "mat3", + "mat4", + "transform_2d", + "transform", + "sampler2D", + "sampler2DArray", + "sampler3D", + "samplerCube", + }; + + RS::GlobalVariableType gvtype = RS::GLOBAL_VAR_TYPE_MAX; + + for (int i = 0; i < RS::GLOBAL_VAR_TYPE_MAX; i++) { + if (global_var_type_names[i] == type) { + gvtype = RS::GlobalVariableType(i); + break; + } + } + + ERR_CONTINUE(gvtype == RS::GLOBAL_VAR_TYPE_MAX); //type invalid + + Variant value = d["value"]; + + if (gvtype >= RS::GLOBAL_VAR_TYPE_SAMPLER2D) { + //textire + if (!p_load_textures) { + value = RID(); + continue; + } + + String path = value; + RES resource = ResourceLoader::load(path); + ERR_CONTINUE(resource.is_null()); + value = resource; + } + + if (global_variables.variables.has(name)) { + //has it, update it + global_variable_set(name, value); + } else { + global_variable_add(name, gvtype, value); + } + } + } +} + +void RendererStorageRD::global_variables_clear() { + global_variables.variables.clear(); //not right but for now enough +} + +RID RendererStorageRD::global_variables_get_storage_buffer() const { + return global_variables.buffer; +} + +int32_t RendererStorageRD::global_variables_instance_allocate(RID p_instance) { + ERR_FAIL_COND_V(global_variables.instance_buffer_pos.has(p_instance), -1); + int32_t pos = _global_variable_allocate(ShaderLanguage::MAX_INSTANCE_UNIFORM_INDICES); + global_variables.instance_buffer_pos[p_instance] = pos; //save anyway + ERR_FAIL_COND_V_MSG(pos < 0, -1, "Too many instances using shader instance variables. Increase buffer size in Project Settings."); + global_variables.buffer_usage[pos].elements = ShaderLanguage::MAX_INSTANCE_UNIFORM_INDICES; + return pos; +} + +void RendererStorageRD::global_variables_instance_free(RID p_instance) { + ERR_FAIL_COND(!global_variables.instance_buffer_pos.has(p_instance)); + int32_t pos = global_variables.instance_buffer_pos[p_instance]; + if (pos >= 0) { + global_variables.buffer_usage[pos].elements = 0; + } + global_variables.instance_buffer_pos.erase(p_instance); +} + +void RendererStorageRD::global_variables_instance_update(RID p_instance, int p_index, const Variant &p_value) { + if (!global_variables.instance_buffer_pos.has(p_instance)) { + return; //just not allocated, ignore + } + int32_t pos = global_variables.instance_buffer_pos[p_instance]; + + if (pos < 0) { + return; //again, not allocated, ignore + } + ERR_FAIL_INDEX(p_index, ShaderLanguage::MAX_INSTANCE_UNIFORM_INDICES); + ERR_FAIL_COND_MSG(p_value.get_type() > Variant::COLOR, "Unsupported variant type for instance parameter: " + Variant::get_type_name(p_value.get_type())); //anything greater not supported + + ShaderLanguage::DataType datatype_from_value[Variant::COLOR + 1] = { + ShaderLanguage::TYPE_MAX, //nil + ShaderLanguage::TYPE_BOOL, //bool + ShaderLanguage::TYPE_INT, //int + ShaderLanguage::TYPE_FLOAT, //float + ShaderLanguage::TYPE_MAX, //string + ShaderLanguage::TYPE_VEC2, //vec2 + ShaderLanguage::TYPE_IVEC2, //vec2i + ShaderLanguage::TYPE_VEC4, //rect2 + ShaderLanguage::TYPE_IVEC4, //rect2i + ShaderLanguage::TYPE_VEC3, // vec3 + ShaderLanguage::TYPE_IVEC3, //vec3i + ShaderLanguage::TYPE_MAX, //xform2d not supported here + ShaderLanguage::TYPE_VEC4, //plane + ShaderLanguage::TYPE_VEC4, //quat + ShaderLanguage::TYPE_MAX, //aabb not supported here + ShaderLanguage::TYPE_MAX, //basis not supported here + ShaderLanguage::TYPE_MAX, //xform not supported here + ShaderLanguage::TYPE_VEC4 //color + }; + + ShaderLanguage::DataType datatype = datatype_from_value[p_value.get_type()]; + + ERR_FAIL_COND_MSG(datatype == ShaderLanguage::TYPE_MAX, "Unsupported variant type for instance parameter: " + Variant::get_type_name(p_value.get_type())); //anything greater not supported + + pos += p_index; + + _fill_std140_variant_ubo_value(datatype, p_value, (uint8_t *)&global_variables.buffer_values[pos], true); //instances always use linear color in this renderer + _global_variable_mark_buffer_dirty(pos, 1); +} + +void RendererStorageRD::_update_global_variables() { + if (global_variables.buffer_dirty_region_count > 0) { + uint32_t total_regions = global_variables.buffer_size / GlobalVariables::BUFFER_DIRTY_REGION_SIZE; + if (total_regions / global_variables.buffer_dirty_region_count <= 4) { + // 25% of regions dirty, just update all buffer + RD::get_singleton()->buffer_update(global_variables.buffer, 0, sizeof(GlobalVariables::Value) * global_variables.buffer_size, global_variables.buffer_values); + zeromem(global_variables.buffer_dirty_regions, sizeof(bool) * total_regions); + } else { + uint32_t region_byte_size = sizeof(GlobalVariables::Value) * GlobalVariables::BUFFER_DIRTY_REGION_SIZE; + + for (uint32_t i = 0; i < total_regions; i++) { + if (global_variables.buffer_dirty_regions[i]) { + RD::get_singleton()->buffer_update(global_variables.buffer, i * region_byte_size, region_byte_size, global_variables.buffer_values); + + global_variables.buffer_dirty_regions[i] = false; + } + } + } + + global_variables.buffer_dirty_region_count = 0; + } + + if (global_variables.must_update_buffer_materials) { + // only happens in the case of a buffer variable added or removed, + // so not often. + for (List<RID>::Element *E = global_variables.materials_using_buffer.front(); E; E = E->next()) { + Material *material = material_owner.getornull(E->get()); + ERR_CONTINUE(!material); //wtf + + _material_queue_update(material, true, false); + } + + global_variables.must_update_buffer_materials = false; + } + + if (global_variables.must_update_texture_materials) { + // only happens in the case of a buffer variable added or removed, + // so not often. + for (List<RID>::Element *E = global_variables.materials_using_texture.front(); E; E = E->next()) { + Material *material = material_owner.getornull(E->get()); + ERR_CONTINUE(!material); //wtf + + _material_queue_update(material, false, true); + print_line("update material texture?"); + } + + global_variables.must_update_texture_materials = false; + } +} + +void RendererStorageRD::update_dirty_resources() { + _update_global_variables(); //must do before materials, so it can queue them for update + _update_queued_materials(); + _update_dirty_multimeshes(); + _update_dirty_skeletons(); + _update_decal_atlas(); +} + +bool RendererStorageRD::has_os_feature(const String &p_feature) const { + if (p_feature == "rgtc" && RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_BC5_UNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT)) { + return true; + } + + if (p_feature == "s3tc" && RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_BC1_RGB_UNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT)) { + return true; + } + + if (p_feature == "bptc" && RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_BC7_UNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT)) { + return true; + } + + if ((p_feature == "etc" || p_feature == "etc2") && RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, RD::TEXTURE_USAGE_SAMPLING_BIT)) { + return true; + } + + if (p_feature == "pvrtc" && RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG, RD::TEXTURE_USAGE_SAMPLING_BIT)) { + return true; + } + + return false; +} + +bool RendererStorageRD::free(RID p_rid) { + if (texture_owner.owns(p_rid)) { + Texture *t = texture_owner.getornull(p_rid); + + ERR_FAIL_COND_V(t->is_render_target, false); + + if (RD::get_singleton()->texture_is_valid(t->rd_texture_srgb)) { + //erase this first, as it's a dependency of the one below + RD::get_singleton()->free(t->rd_texture_srgb); + } + if (RD::get_singleton()->texture_is_valid(t->rd_texture)) { + RD::get_singleton()->free(t->rd_texture); + } + + if (t->is_proxy && t->proxy_to.is_valid()) { + Texture *proxy_to = texture_owner.getornull(t->proxy_to); + if (proxy_to) { + proxy_to->proxies.erase(p_rid); + } + } + + if (decal_atlas.textures.has(p_rid)) { + decal_atlas.textures.erase(p_rid); + //there is not much a point of making it dirty, just let it be. + } + + for (int i = 0; i < t->proxies.size(); i++) { + Texture *p = texture_owner.getornull(t->proxies[i]); + ERR_CONTINUE(!p); + p->proxy_to = RID(); + p->rd_texture = RID(); + p->rd_texture_srgb = RID(); + } + + if (t->canvas_texture) { + memdelete(t->canvas_texture); + } + texture_owner.free(p_rid); + + } else if (canvas_texture_owner.owns(p_rid)) { + CanvasTexture *ct = canvas_texture_owner.getornull(p_rid); + memdelete(ct); + canvas_texture_owner.free(p_rid); + } else if (shader_owner.owns(p_rid)) { + Shader *shader = shader_owner.getornull(p_rid); + //make material unreference this + while (shader->owners.size()) { + material_set_shader(shader->owners.front()->get()->self, RID()); + } + //clear data if exists + if (shader->data) { + memdelete(shader->data); + } + shader_owner.free(p_rid); + + } else if (material_owner.owns(p_rid)) { + Material *material = material_owner.getornull(p_rid); + if (material->update_requested) { + _update_queued_materials(); + } + material_set_shader(p_rid, RID()); //clean up shader + material->dependency.deleted_notify(p_rid); + + material_owner.free(p_rid); + } else if (mesh_owner.owns(p_rid)) { + mesh_clear(p_rid); + mesh_set_shadow_mesh(p_rid, RID()); + Mesh *mesh = mesh_owner.getornull(p_rid); + mesh->dependency.deleted_notify(p_rid); + if (mesh->instances.size()) { + ERR_PRINT("deleting mesh with active instances"); + } + if (mesh->shadow_owners.size()) { + for (Set<Mesh *>::Element *E = mesh->shadow_owners.front(); E; E = E->next()) { + Mesh *shadow_owner = E->get(); + shadow_owner->shadow_mesh = RID(); + shadow_owner->dependency.changed_notify(DEPENDENCY_CHANGED_MESH); + } + } + mesh_owner.free(p_rid); + } else if (mesh_instance_owner.owns(p_rid)) { + MeshInstance *mi = mesh_instance_owner.getornull(p_rid); + _mesh_instance_clear(mi); + mi->mesh->instances.erase(mi->I); + mi->I = nullptr; + + mesh_instance_owner.free(p_rid); + memdelete(mi); + + } else if (multimesh_owner.owns(p_rid)) { + _update_dirty_multimeshes(); + multimesh_allocate_data(p_rid, 0, RS::MULTIMESH_TRANSFORM_2D); + MultiMesh *multimesh = multimesh_owner.getornull(p_rid); + multimesh->dependency.deleted_notify(p_rid); + multimesh_owner.free(p_rid); + } else if (skeleton_owner.owns(p_rid)) { + _update_dirty_skeletons(); + skeleton_allocate_data(p_rid, 0); + Skeleton *skeleton = skeleton_owner.getornull(p_rid); + skeleton->dependency.deleted_notify(p_rid); + skeleton_owner.free(p_rid); + } else if (reflection_probe_owner.owns(p_rid)) { + ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_rid); + reflection_probe->dependency.deleted_notify(p_rid); + reflection_probe_owner.free(p_rid); + } else if (decal_owner.owns(p_rid)) { + Decal *decal = decal_owner.getornull(p_rid); + for (int i = 0; i < RS::DECAL_TEXTURE_MAX; i++) { + if (decal->textures[i].is_valid() && texture_owner.owns(decal->textures[i])) { + texture_remove_from_decal_atlas(decal->textures[i]); + } + } + decal->dependency.deleted_notify(p_rid); + decal_owner.free(p_rid); + } else if (gi_probe_owner.owns(p_rid)) { + gi_probe_allocate_data(p_rid, Transform(), AABB(), Vector3i(), Vector<uint8_t>(), Vector<uint8_t>(), Vector<uint8_t>(), Vector<int>()); //deallocate + GIProbe *gi_probe = gi_probe_owner.getornull(p_rid); + gi_probe->dependency.deleted_notify(p_rid); + gi_probe_owner.free(p_rid); + } else if (lightmap_owner.owns(p_rid)) { + lightmap_set_textures(p_rid, RID(), false); + Lightmap *lightmap = lightmap_owner.getornull(p_rid); + lightmap->dependency.deleted_notify(p_rid); + lightmap_owner.free(p_rid); + + } else if (light_owner.owns(p_rid)) { + light_set_projector(p_rid, RID()); //clear projector + // delete the texture + Light *light = light_owner.getornull(p_rid); + light->dependency.deleted_notify(p_rid); + light_owner.free(p_rid); + + } else if (particles_owner.owns(p_rid)) { + Particles *particles = particles_owner.getornull(p_rid); + _particles_free_data(particles); + particles->dependency.deleted_notify(p_rid); + particles_owner.free(p_rid); + } else if (particles_collision_owner.owns(p_rid)) { + ParticlesCollision *particles_collision = particles_collision_owner.getornull(p_rid); + + if (particles_collision->heightfield_texture.is_valid()) { + RD::get_singleton()->free(particles_collision->heightfield_texture); + } + particles_collision->dependency.deleted_notify(p_rid); + particles_collision_owner.free(p_rid); + } else if (particles_collision_instance_owner.owns(p_rid)) { + particles_collision_instance_owner.free(p_rid); + } else if (render_target_owner.owns(p_rid)) { + RenderTarget *rt = render_target_owner.getornull(p_rid); + + _clear_render_target(rt); + + if (rt->texture.is_valid()) { + Texture *tex = texture_owner.getornull(rt->texture); + tex->is_render_target = false; + free(rt->texture); + } + + render_target_owner.free(p_rid); + } else { + return false; + } + + return true; +} + +EffectsRD *RendererStorageRD::get_effects() { + return &effects; +} + +void RendererStorageRD::capture_timestamps_begin() { + RD::get_singleton()->capture_timestamp("Frame Begin"); +} + +void RendererStorageRD::capture_timestamp(const String &p_name) { + RD::get_singleton()->capture_timestamp(p_name); +} + +uint32_t RendererStorageRD::get_captured_timestamps_count() const { + return RD::get_singleton()->get_captured_timestamps_count(); +} + +uint64_t RendererStorageRD::get_captured_timestamps_frame() const { + return RD::get_singleton()->get_captured_timestamps_frame(); +} + +uint64_t RendererStorageRD::get_captured_timestamp_gpu_time(uint32_t p_index) const { + return RD::get_singleton()->get_captured_timestamp_gpu_time(p_index); +} + +uint64_t RendererStorageRD::get_captured_timestamp_cpu_time(uint32_t p_index) const { + return RD::get_singleton()->get_captured_timestamp_cpu_time(p_index); +} + +String RendererStorageRD::get_captured_timestamp_name(uint32_t p_index) const { + return RD::get_singleton()->get_captured_timestamp_name(p_index); +} + +RendererStorageRD *RendererStorageRD::base_singleton = nullptr; + +RendererStorageRD::RendererStorageRD() { + base_singleton = this; + + for (int i = 0; i < SHADER_TYPE_MAX; i++) { + shader_data_request_func[i] = nullptr; + } + + static_assert(sizeof(GlobalVariables::Value) == 16); + + global_variables.buffer_size = GLOBAL_GET("rendering/limits/global_shader_variables/buffer_size"); + global_variables.buffer_size = MAX(4096, global_variables.buffer_size); + global_variables.buffer_values = memnew_arr(GlobalVariables::Value, global_variables.buffer_size); + zeromem(global_variables.buffer_values, sizeof(GlobalVariables::Value) * global_variables.buffer_size); + global_variables.buffer_usage = memnew_arr(GlobalVariables::ValueUsage, global_variables.buffer_size); + global_variables.buffer_dirty_regions = memnew_arr(bool, global_variables.buffer_size / GlobalVariables::BUFFER_DIRTY_REGION_SIZE); + zeromem(global_variables.buffer_dirty_regions, sizeof(bool) * global_variables.buffer_size / GlobalVariables::BUFFER_DIRTY_REGION_SIZE); + global_variables.buffer = RD::get_singleton()->storage_buffer_create(sizeof(GlobalVariables::Value) * global_variables.buffer_size); + + material_update_list = nullptr; + { //create default textures + + RD::TextureFormat tformat; + tformat.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + tformat.width = 4; + tformat.height = 4; + tformat.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; + tformat.texture_type = RD::TEXTURE_TYPE_2D; + + Vector<uint8_t> pv; + pv.resize(16 * 4); + for (int i = 0; i < 16; i++) { + pv.set(i * 4 + 0, 255); + pv.set(i * 4 + 1, 255); + pv.set(i * 4 + 2, 255); + pv.set(i * 4 + 3, 255); + } + + { + Vector<Vector<uint8_t>> vpv; + vpv.push_back(pv); + default_rd_textures[DEFAULT_RD_TEXTURE_WHITE] = RD::get_singleton()->texture_create(tformat, RD::TextureView(), vpv); + } + + for (int i = 0; i < 16; i++) { + pv.set(i * 4 + 0, 0); + pv.set(i * 4 + 1, 0); + pv.set(i * 4 + 2, 0); + pv.set(i * 4 + 3, 255); + } + + { + Vector<Vector<uint8_t>> vpv; + vpv.push_back(pv); + default_rd_textures[DEFAULT_RD_TEXTURE_BLACK] = RD::get_singleton()->texture_create(tformat, RD::TextureView(), vpv); + + //take the chance and initialize decal atlas to something + decal_atlas.texture = RD::get_singleton()->texture_create(tformat, RD::TextureView(), vpv); + decal_atlas.texture_srgb = decal_atlas.texture; + } + + for (int i = 0; i < 16; i++) { + pv.set(i * 4 + 0, 128); + pv.set(i * 4 + 1, 128); + pv.set(i * 4 + 2, 255); + pv.set(i * 4 + 3, 255); + } + + { + Vector<Vector<uint8_t>> vpv; + vpv.push_back(pv); + default_rd_textures[DEFAULT_RD_TEXTURE_NORMAL] = RD::get_singleton()->texture_create(tformat, RD::TextureView(), vpv); + } + + for (int i = 0; i < 16; i++) { + pv.set(i * 4 + 0, 255); + pv.set(i * 4 + 1, 128); + pv.set(i * 4 + 2, 255); + pv.set(i * 4 + 3, 255); + } + + { + Vector<Vector<uint8_t>> vpv; + vpv.push_back(pv); + default_rd_textures[DEFAULT_RD_TEXTURE_ANISO] = RD::get_singleton()->texture_create(tformat, RD::TextureView(), vpv); + } + + for (int i = 0; i < 16; i++) { + pv.set(i * 4 + 0, 0); + pv.set(i * 4 + 1, 0); + pv.set(i * 4 + 2, 0); + pv.set(i * 4 + 3, 0); + } + + default_rd_textures[DEFAULT_RD_TEXTURE_MULTIMESH_BUFFER] = RD::get_singleton()->texture_buffer_create(16, RD::DATA_FORMAT_R8G8B8A8_UNORM, pv); + + for (int i = 0; i < 16; i++) { + pv.set(i * 4 + 0, 0); + pv.set(i * 4 + 1, 0); + pv.set(i * 4 + 2, 0); + pv.set(i * 4 + 3, 0); + } + + { + tformat.format = RD::DATA_FORMAT_R8G8B8A8_UINT; + Vector<Vector<uint8_t>> vpv; + vpv.push_back(pv); + default_rd_textures[DEFAULT_RD_TEXTURE_2D_UINT] = RD::get_singleton()->texture_create(tformat, RD::TextureView(), vpv); + } + } + + { //create default cubemap + + RD::TextureFormat tformat; + tformat.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + tformat.width = 4; + tformat.height = 4; + tformat.array_layers = 6; + tformat.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; + tformat.texture_type = RD::TEXTURE_TYPE_CUBE_ARRAY; + + Vector<uint8_t> pv; + pv.resize(16 * 4); + for (int i = 0; i < 16; i++) { + pv.set(i * 4 + 0, 0); + pv.set(i * 4 + 1, 0); + pv.set(i * 4 + 2, 0); + pv.set(i * 4 + 3, 0); + } + + { + Vector<Vector<uint8_t>> vpv; + for (int i = 0; i < 6; i++) { + vpv.push_back(pv); + } + default_rd_textures[DEFAULT_RD_TEXTURE_CUBEMAP_ARRAY_BLACK] = RD::get_singleton()->texture_create(tformat, RD::TextureView(), vpv); + } + } + + { //create default cubemap array + + RD::TextureFormat tformat; + tformat.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + tformat.width = 4; + tformat.height = 4; + tformat.array_layers = 6; + tformat.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; + tformat.texture_type = RD::TEXTURE_TYPE_CUBE; + + Vector<uint8_t> pv; + pv.resize(16 * 4); + for (int i = 0; i < 16; i++) { + pv.set(i * 4 + 0, 0); + pv.set(i * 4 + 1, 0); + pv.set(i * 4 + 2, 0); + pv.set(i * 4 + 3, 0); + } + + { + Vector<Vector<uint8_t>> vpv; + for (int i = 0; i < 6; i++) { + vpv.push_back(pv); + } + default_rd_textures[DEFAULT_RD_TEXTURE_CUBEMAP_BLACK] = RD::get_singleton()->texture_create(tformat, RD::TextureView(), vpv); + } + } + + { //create default cubemap white array + + RD::TextureFormat tformat; + tformat.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + tformat.width = 4; + tformat.height = 4; + tformat.array_layers = 6; + tformat.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; + tformat.texture_type = RD::TEXTURE_TYPE_CUBE; + + Vector<uint8_t> pv; + pv.resize(16 * 4); + for (int i = 0; i < 16; i++) { + pv.set(i * 4 + 0, 255); + pv.set(i * 4 + 1, 255); + pv.set(i * 4 + 2, 255); + pv.set(i * 4 + 3, 255); + } + + { + Vector<Vector<uint8_t>> vpv; + for (int i = 0; i < 6; i++) { + vpv.push_back(pv); + } + default_rd_textures[DEFAULT_RD_TEXTURE_CUBEMAP_WHITE] = RD::get_singleton()->texture_create(tformat, RD::TextureView(), vpv); + } + } + + { //create default 3D + + RD::TextureFormat tformat; + tformat.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + tformat.width = 4; + tformat.height = 4; + tformat.depth = 4; + tformat.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; + tformat.texture_type = RD::TEXTURE_TYPE_3D; + + Vector<uint8_t> pv; + pv.resize(64 * 4); + for (int i = 0; i < 64; i++) { + pv.set(i * 4 + 0, 0); + pv.set(i * 4 + 1, 0); + pv.set(i * 4 + 2, 0); + pv.set(i * 4 + 3, 0); + } + + { + Vector<Vector<uint8_t>> vpv; + vpv.push_back(pv); + default_rd_textures[DEFAULT_RD_TEXTURE_3D_WHITE] = RD::get_singleton()->texture_create(tformat, RD::TextureView(), vpv); + } + } + + { //create default array + + RD::TextureFormat tformat; + tformat.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + tformat.width = 4; + tformat.height = 4; + tformat.array_layers = 1; + tformat.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; + tformat.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + + Vector<uint8_t> pv; + pv.resize(16 * 4); + for (int i = 0; i < 16; i++) { + pv.set(i * 4 + 0, 255); + pv.set(i * 4 + 1, 255); + pv.set(i * 4 + 2, 255); + pv.set(i * 4 + 3, 255); + } + + { + Vector<Vector<uint8_t>> vpv; + vpv.push_back(pv); + default_rd_textures[DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE] = RD::get_singleton()->texture_create(tformat, RD::TextureView(), vpv); + } + } + + //default samplers + for (int i = 1; i < RS::CANVAS_ITEM_TEXTURE_FILTER_MAX; i++) { + for (int j = 1; j < RS::CANVAS_ITEM_TEXTURE_REPEAT_MAX; j++) { + RD::SamplerState sampler_state; + switch (i) { + case RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST: { + sampler_state.mag_filter = RD::SAMPLER_FILTER_NEAREST; + sampler_state.min_filter = RD::SAMPLER_FILTER_NEAREST; + sampler_state.max_lod = 0; + } break; + case RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR: { + sampler_state.mag_filter = RD::SAMPLER_FILTER_LINEAR; + sampler_state.min_filter = RD::SAMPLER_FILTER_LINEAR; + sampler_state.max_lod = 0; + } break; + case RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS: { + sampler_state.mag_filter = RD::SAMPLER_FILTER_NEAREST; + sampler_state.min_filter = RD::SAMPLER_FILTER_LINEAR; + sampler_state.mip_filter = RD::SAMPLER_FILTER_LINEAR; + } break; + case RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS: { + sampler_state.mag_filter = RD::SAMPLER_FILTER_LINEAR; + sampler_state.min_filter = RD::SAMPLER_FILTER_LINEAR; + sampler_state.mip_filter = RD::SAMPLER_FILTER_LINEAR; + + } break; + case RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS_ANISOTROPIC: { + sampler_state.mag_filter = RD::SAMPLER_FILTER_NEAREST; + sampler_state.min_filter = RD::SAMPLER_FILTER_LINEAR; + sampler_state.mip_filter = RD::SAMPLER_FILTER_LINEAR; + sampler_state.use_anisotropy = true; + sampler_state.anisotropy_max = 1 << int(GLOBAL_GET("rendering/textures/default_filters/anisotropic_filtering_level")); + } break; + case RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS_ANISOTROPIC: { + sampler_state.mag_filter = RD::SAMPLER_FILTER_LINEAR; + sampler_state.min_filter = RD::SAMPLER_FILTER_LINEAR; + sampler_state.mip_filter = RD::SAMPLER_FILTER_LINEAR; + sampler_state.use_anisotropy = true; + sampler_state.anisotropy_max = 1 << int(GLOBAL_GET("rendering/textures/default_filters/anisotropic_filtering_level")); + + } break; + default: { + } + } + switch (j) { + case RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED: { + sampler_state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; + sampler_state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; + sampler_state.repeat_w = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; + + } break; + case RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED: { + sampler_state.repeat_u = RD::SAMPLER_REPEAT_MODE_REPEAT; + sampler_state.repeat_v = RD::SAMPLER_REPEAT_MODE_REPEAT; + sampler_state.repeat_w = RD::SAMPLER_REPEAT_MODE_REPEAT; + } break; + case RS::CANVAS_ITEM_TEXTURE_REPEAT_MIRROR: { + sampler_state.repeat_u = RD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT; + sampler_state.repeat_v = RD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT; + sampler_state.repeat_w = RD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT; + } break; + default: { + } + } + + default_rd_samplers[i][j] = RD::get_singleton()->sampler_create(sampler_state); + } + } + + //default rd buffers + { + Vector<uint8_t> buffer; + { + buffer.resize(sizeof(float) * 3); + { + uint8_t *w = buffer.ptrw(); + float *fptr = (float *)w; + fptr[0] = 0.0; + fptr[1] = 0.0; + fptr[2] = 0.0; + } + mesh_default_rd_buffers[DEFAULT_RD_BUFFER_VERTEX] = RD::get_singleton()->vertex_buffer_create(buffer.size(), buffer); + } + + { //normal + buffer.resize(sizeof(float) * 3); + { + uint8_t *w = buffer.ptrw(); + float *fptr = (float *)w; + fptr[0] = 1.0; + fptr[1] = 0.0; + fptr[2] = 0.0; + } + mesh_default_rd_buffers[DEFAULT_RD_BUFFER_NORMAL] = RD::get_singleton()->vertex_buffer_create(buffer.size(), buffer); + } + + { //tangent + buffer.resize(sizeof(float) * 4); + { + uint8_t *w = buffer.ptrw(); + float *fptr = (float *)w; + fptr[0] = 1.0; + fptr[1] = 0.0; + fptr[2] = 0.0; + fptr[3] = 0.0; + } + mesh_default_rd_buffers[DEFAULT_RD_BUFFER_TANGENT] = RD::get_singleton()->vertex_buffer_create(buffer.size(), buffer); + } + + { //color + buffer.resize(sizeof(float) * 4); + { + uint8_t *w = buffer.ptrw(); + float *fptr = (float *)w; + fptr[0] = 1.0; + fptr[1] = 1.0; + fptr[2] = 1.0; + fptr[3] = 1.0; + } + mesh_default_rd_buffers[DEFAULT_RD_BUFFER_COLOR] = RD::get_singleton()->vertex_buffer_create(buffer.size(), buffer); + } + + { //tex uv 1 + buffer.resize(sizeof(float) * 2); + { + uint8_t *w = buffer.ptrw(); + float *fptr = (float *)w; + fptr[0] = 0.0; + fptr[1] = 0.0; + } + mesh_default_rd_buffers[DEFAULT_RD_BUFFER_TEX_UV] = RD::get_singleton()->vertex_buffer_create(buffer.size(), buffer); + } + { //tex uv 2 + buffer.resize(sizeof(float) * 2); + { + uint8_t *w = buffer.ptrw(); + float *fptr = (float *)w; + fptr[0] = 0.0; + fptr[1] = 0.0; + } + mesh_default_rd_buffers[DEFAULT_RD_BUFFER_TEX_UV2] = RD::get_singleton()->vertex_buffer_create(buffer.size(), buffer); + } + + for (int i = 0; i < RS::ARRAY_CUSTOM_COUNT; i++) { + buffer.resize(sizeof(float) * 4); + { + uint8_t *w = buffer.ptrw(); + float *fptr = (float *)w; + fptr[0] = 0.0; + fptr[1] = 0.0; + fptr[2] = 0.0; + fptr[3] = 0.0; + } + mesh_default_rd_buffers[DEFAULT_RD_BUFFER_CUSTOM0 + i] = RD::get_singleton()->vertex_buffer_create(buffer.size(), buffer); + } + + { //bones + buffer.resize(sizeof(uint32_t) * 4); + { + uint8_t *w = buffer.ptrw(); + uint32_t *fptr = (uint32_t *)w; + fptr[0] = 0; + fptr[1] = 0; + fptr[2] = 0; + fptr[3] = 0; + } + mesh_default_rd_buffers[DEFAULT_RD_BUFFER_BONES] = RD::get_singleton()->vertex_buffer_create(buffer.size(), buffer); + } + + { //weights + buffer.resize(sizeof(float) * 4); + { + uint8_t *w = buffer.ptrw(); + float *fptr = (float *)w; + fptr[0] = 0.0; + fptr[1] = 0.0; + fptr[2] = 0.0; + fptr[3] = 0.0; + } + mesh_default_rd_buffers[DEFAULT_RD_BUFFER_WEIGHTS] = RD::get_singleton()->vertex_buffer_create(buffer.size(), buffer); + } + } + + { + Vector<String> sdf_versions; + sdf_versions.push_back(""); //one only + giprobe_sdf_shader.initialize(sdf_versions); + giprobe_sdf_shader_version = giprobe_sdf_shader.version_create(); + giprobe_sdf_shader.version_set_compute_code(giprobe_sdf_shader_version, "", "", "", Vector<String>()); + giprobe_sdf_shader_version_shader = giprobe_sdf_shader.version_get_shader(giprobe_sdf_shader_version, 0); + giprobe_sdf_shader_pipeline = RD::get_singleton()->compute_pipeline_create(giprobe_sdf_shader_version_shader); + } + + using_lightmap_array = true; // high end + if (using_lightmap_array) { + uint32_t textures_per_stage = RD::get_singleton()->limit_get(RD::LIMIT_MAX_TEXTURES_PER_SHADER_STAGE); + + if (textures_per_stage <= 256) { + lightmap_textures.resize(32); + } else { + lightmap_textures.resize(1024); + } + + for (int i = 0; i < lightmap_textures.size(); i++) { + lightmap_textures.write[i] = default_rd_textures[DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE]; + } + } + + lightmap_probe_capture_update_speed = GLOBAL_GET("rendering/lightmapping/probe_capture/update_speed"); + + /* Particles */ + + { + // Initialize particles + Vector<String> particles_modes; + particles_modes.push_back(""); + particles_shader.shader.initialize(particles_modes, String()); + } + shader_set_data_request_function(RendererStorageRD::SHADER_TYPE_PARTICLES, _create_particles_shader_funcs); + material_set_data_request_function(RendererStorageRD::SHADER_TYPE_PARTICLES, _create_particles_material_funcs); + + { + ShaderCompilerRD::DefaultIdentifierActions actions; + + actions.renames["COLOR"] = "PARTICLE.color"; + actions.renames["VELOCITY"] = "PARTICLE.velocity"; + //actions.renames["MASS"] = "mass"; ? + actions.renames["ACTIVE"] = "PARTICLE.is_active"; + actions.renames["RESTART"] = "restart"; + actions.renames["CUSTOM"] = "PARTICLE.custom"; + actions.renames["TRANSFORM"] = "PARTICLE.xform"; + actions.renames["TIME"] = "FRAME.time"; + actions.renames["LIFETIME"] = "params.lifetime"; + actions.renames["DELTA"] = "local_delta"; + actions.renames["NUMBER"] = "particle"; + actions.renames["INDEX"] = "index"; + //actions.renames["GRAVITY"] = "current_gravity"; + actions.renames["EMISSION_TRANSFORM"] = "FRAME.emission_transform"; + actions.renames["RANDOM_SEED"] = "FRAME.random_seed"; + actions.renames["FLAG_EMIT_POSITION"] = "EMISSION_FLAG_HAS_POSITION"; + actions.renames["FLAG_EMIT_ROT_SCALE"] = "EMISSION_FLAG_HAS_ROTATION_SCALE"; + actions.renames["FLAG_EMIT_VELOCITY"] = "EMISSION_FLAG_HAS_VELOCITY"; + actions.renames["FLAG_EMIT_COLOR"] = "EMISSION_FLAG_HAS_COLOR"; + actions.renames["FLAG_EMIT_CUSTOM"] = "EMISSION_FLAG_HAS_CUSTOM"; + actions.renames["RESTART_POSITION"] = "restart_position"; + actions.renames["RESTART_ROT_SCALE"] = "restart_rotation_scale"; + actions.renames["RESTART_VELOCITY"] = "restart_velocity"; + actions.renames["RESTART_COLOR"] = "restart_color"; + actions.renames["RESTART_CUSTOM"] = "restart_custom"; + actions.renames["emit_subparticle"] = "emit_subparticle"; + actions.renames["COLLIDED"] = "collided"; + actions.renames["COLLISION_NORMAL"] = "collision_normal"; + actions.renames["COLLISION_DEPTH"] = "collision_depth"; + actions.renames["ATTRACTOR_FORCE"] = "attractor_force"; + + actions.render_mode_defines["disable_force"] = "#define DISABLE_FORCE\n"; + actions.render_mode_defines["disable_velocity"] = "#define DISABLE_VELOCITY\n"; + actions.render_mode_defines["keep_data"] = "#define ENABLE_KEEP_DATA\n"; + actions.render_mode_defines["collision_use_scale"] = "#define USE_COLLISON_SCALE\n"; + + actions.sampler_array_name = "material_samplers"; + actions.base_texture_binding_index = 1; + actions.texture_layout_set = 3; + actions.base_uniform_string = "material."; + actions.base_varying_index = 10; + + actions.default_filter = ShaderLanguage::FILTER_LINEAR_MIPMAP; + actions.default_repeat = ShaderLanguage::REPEAT_ENABLE; + actions.global_buffer_array_variable = "global_variables.data"; + + particles_shader.compiler.initialize(actions); + } + + { + // default material and shader for particles shader + particles_shader.default_shader = shader_allocate(); + shader_initialize(particles_shader.default_shader); + shader_set_code(particles_shader.default_shader, "shader_type particles; void compute() { COLOR = vec4(1.0); } \n"); + particles_shader.default_material = material_allocate(); + material_initialize(particles_shader.default_material); + material_set_shader(particles_shader.default_material, particles_shader.default_shader); + + ParticlesMaterialData *md = (ParticlesMaterialData *)material_get_data(particles_shader.default_material, RendererStorageRD::SHADER_TYPE_PARTICLES); + particles_shader.default_shader_rd = particles_shader.shader.version_get_shader(md->shader_data->version, 0); + + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 1; + u.ids.resize(12); + RID *ids_ptr = u.ids.ptrw(); + ids_ptr[0] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[1] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[2] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[3] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[4] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[5] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[6] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[7] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[8] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[9] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[10] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[11] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + u.ids.push_back(global_variables_get_storage_buffer()); + uniforms.push_back(u); + } + + particles_shader.base_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, particles_shader.default_shader_rd, 0); + } + + default_rd_storage_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 4); + + { + Vector<String> copy_modes; + copy_modes.push_back("\n#define MODE_FILL_INSTANCES\n"); + copy_modes.push_back("\n#define MODE_FILL_SORT_BUFFER\n#define USE_SORT_BUFFER\n"); + copy_modes.push_back("\n#define MODE_FILL_INSTANCES\n#define USE_SORT_BUFFER\n"); + + particles_shader.copy_shader.initialize(copy_modes); + + particles_shader.copy_shader_version = particles_shader.copy_shader.version_create(); + + for (int i = 0; i < ParticlesShader::COPY_MODE_MAX; i++) { + particles_shader.copy_pipelines[i] = RD::get_singleton()->compute_pipeline_create(particles_shader.copy_shader.version_get_shader(particles_shader.copy_shader_version, i)); + } + } + + { + Vector<String> sdf_modes; + sdf_modes.push_back("\n#define MODE_LOAD\n"); + sdf_modes.push_back("\n#define MODE_LOAD_SHRINK\n"); + sdf_modes.push_back("\n#define MODE_PROCESS\n"); + sdf_modes.push_back("\n#define MODE_PROCESS_OPTIMIZED\n"); + sdf_modes.push_back("\n#define MODE_STORE\n"); + sdf_modes.push_back("\n#define MODE_STORE_SHRINK\n"); + + rt_sdf.shader.initialize(sdf_modes); + + rt_sdf.shader_version = rt_sdf.shader.version_create(); + + for (int i = 0; i < RenderTargetSDF::SHADER_MAX; i++) { + rt_sdf.pipelines[i] = RD::get_singleton()->compute_pipeline_create(rt_sdf.shader.version_get_shader(rt_sdf.shader_version, i)); + } + } + { + Vector<String> skeleton_modes; + skeleton_modes.push_back("\n#define MODE_2D\n"); + skeleton_modes.push_back(""); + + skeleton_shader.shader.initialize(skeleton_modes); + skeleton_shader.version = skeleton_shader.shader.version_create(); + for (int i = 0; i < SkeletonShader::SHADER_MODE_MAX; i++) { + skeleton_shader.version_shader[i] = skeleton_shader.shader.version_get_shader(skeleton_shader.version, i); + skeleton_shader.pipeline[i] = RD::get_singleton()->compute_pipeline_create(skeleton_shader.version_shader[i]); + } + + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.binding = 0; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(default_rd_storage_buffer); + uniforms.push_back(u); + } + skeleton_shader.default_skeleton_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, skeleton_shader.version_shader[0], SkeletonShader::UNIFORM_SET_SKELETON); + } + } +} + +RendererStorageRD::~RendererStorageRD() { + memdelete_arr(global_variables.buffer_values); + memdelete_arr(global_variables.buffer_usage); + memdelete_arr(global_variables.buffer_dirty_regions); + RD::get_singleton()->free(global_variables.buffer); + + //def textures + for (int i = 0; i < DEFAULT_RD_TEXTURE_MAX; i++) { + RD::get_singleton()->free(default_rd_textures[i]); + } + + //def samplers + for (int i = 1; i < RS::CANVAS_ITEM_TEXTURE_FILTER_MAX; i++) { + for (int j = 1; j < RS::CANVAS_ITEM_TEXTURE_REPEAT_MAX; j++) { + RD::get_singleton()->free(default_rd_samplers[i][j]); + } + } + + //def buffers + for (int i = 0; i < DEFAULT_RD_BUFFER_MAX; i++) { + RD::get_singleton()->free(mesh_default_rd_buffers[i]); + } + + giprobe_sdf_shader.version_free(giprobe_sdf_shader_version); + particles_shader.copy_shader.version_free(particles_shader.copy_shader_version); + rt_sdf.shader.version_free(rt_sdf.shader_version); + + skeleton_shader.shader.version_free(skeleton_shader.version); + + RenderingServer::get_singleton()->free(particles_shader.default_material); + RenderingServer::get_singleton()->free(particles_shader.default_shader); + + RD::get_singleton()->free(default_rd_storage_buffer); + + if (decal_atlas.textures.size()) { + ERR_PRINT("Decal Atlas: " + itos(decal_atlas.textures.size()) + " textures were not removed from the atlas."); + } + + if (decal_atlas.texture.is_valid()) { + RD::get_singleton()->free(decal_atlas.texture); + } +} diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.h b/servers/rendering/renderer_rd/renderer_storage_rd.h new file mode 100644 index 0000000000..68256dc155 --- /dev/null +++ b/servers/rendering/renderer_rd/renderer_storage_rd.h @@ -0,0 +1,2288 @@ +/*************************************************************************/ +/* renderer_storage_rd.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef RENDERING_SERVER_STORAGE_RD_H +#define RENDERING_SERVER_STORAGE_RD_H + +#include "core/templates/list.h" +#include "core/templates/local_vector.h" +#include "core/templates/rid_owner.h" +#include "servers/rendering/renderer_compositor.h" +#include "servers/rendering/renderer_rd/effects_rd.h" +#include "servers/rendering/renderer_rd/shader_compiler_rd.h" +#include "servers/rendering/renderer_rd/shaders/canvas_sdf.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/giprobe_sdf.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/particles.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/particles_copy.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/skeleton.glsl.gen.h" +#include "servers/rendering/renderer_scene_render.h" +#include "servers/rendering/rendering_device.h" +class RendererStorageRD : public RendererStorage { +public: + static _FORCE_INLINE_ void store_transform(const Transform &p_mtx, float *p_array) { + p_array[0] = p_mtx.basis.elements[0][0]; + p_array[1] = p_mtx.basis.elements[1][0]; + p_array[2] = p_mtx.basis.elements[2][0]; + p_array[3] = 0; + p_array[4] = p_mtx.basis.elements[0][1]; + p_array[5] = p_mtx.basis.elements[1][1]; + p_array[6] = p_mtx.basis.elements[2][1]; + p_array[7] = 0; + p_array[8] = p_mtx.basis.elements[0][2]; + p_array[9] = p_mtx.basis.elements[1][2]; + p_array[10] = p_mtx.basis.elements[2][2]; + p_array[11] = 0; + p_array[12] = p_mtx.origin.x; + p_array[13] = p_mtx.origin.y; + p_array[14] = p_mtx.origin.z; + p_array[15] = 1; + } + + static _FORCE_INLINE_ void store_basis_3x4(const Basis &p_mtx, float *p_array) { + p_array[0] = p_mtx.elements[0][0]; + p_array[1] = p_mtx.elements[1][0]; + p_array[2] = p_mtx.elements[2][0]; + p_array[3] = 0; + p_array[4] = p_mtx.elements[0][1]; + p_array[5] = p_mtx.elements[1][1]; + p_array[6] = p_mtx.elements[2][1]; + p_array[7] = 0; + p_array[8] = p_mtx.elements[0][2]; + p_array[9] = p_mtx.elements[1][2]; + p_array[10] = p_mtx.elements[2][2]; + p_array[11] = 0; + } + + static _FORCE_INLINE_ void store_transform_3x3(const Basis &p_mtx, float *p_array) { + p_array[0] = p_mtx.elements[0][0]; + p_array[1] = p_mtx.elements[1][0]; + p_array[2] = p_mtx.elements[2][0]; + p_array[3] = 0; + p_array[4] = p_mtx.elements[0][1]; + p_array[5] = p_mtx.elements[1][1]; + p_array[6] = p_mtx.elements[2][1]; + p_array[7] = 0; + p_array[8] = p_mtx.elements[0][2]; + p_array[9] = p_mtx.elements[1][2]; + p_array[10] = p_mtx.elements[2][2]; + p_array[11] = 0; + } + + static _FORCE_INLINE_ void store_transform_transposed_3x4(const Transform &p_mtx, float *p_array) { + p_array[0] = p_mtx.basis.elements[0][0]; + p_array[1] = p_mtx.basis.elements[0][1]; + p_array[2] = p_mtx.basis.elements[0][2]; + p_array[3] = p_mtx.origin.x; + p_array[4] = p_mtx.basis.elements[1][0]; + p_array[5] = p_mtx.basis.elements[1][1]; + p_array[6] = p_mtx.basis.elements[1][2]; + p_array[7] = p_mtx.origin.y; + p_array[8] = p_mtx.basis.elements[2][0]; + p_array[9] = p_mtx.basis.elements[2][1]; + p_array[10] = p_mtx.basis.elements[2][2]; + p_array[11] = p_mtx.origin.z; + } + + static _FORCE_INLINE_ void store_camera(const CameraMatrix &p_mtx, float *p_array) { + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + p_array[i * 4 + j] = p_mtx.matrix[i][j]; + } + } + } + + static _FORCE_INLINE_ void store_soft_shadow_kernel(const float *p_kernel, float *p_array) { + for (int i = 0; i < 128; i++) { + p_array[i] = p_kernel[i]; + } + } + + enum ShaderType { + SHADER_TYPE_2D, + SHADER_TYPE_3D, + SHADER_TYPE_PARTICLES, + SHADER_TYPE_SKY, + SHADER_TYPE_MAX + }; + + struct ShaderData { + virtual void set_code(const String &p_Code) = 0; + virtual void set_default_texture_param(const StringName &p_name, RID p_texture) = 0; + virtual void get_param_list(List<PropertyInfo> *p_param_list) const = 0; + + virtual void get_instance_param_list(List<InstanceShaderParam> *p_param_list) const = 0; + virtual bool is_param_texture(const StringName &p_param) const = 0; + virtual bool is_animated() const = 0; + virtual bool casts_shadows() const = 0; + virtual Variant get_default_parameter(const StringName &p_parameter) const = 0; + virtual RS::ShaderNativeSourceCode get_native_source_code() const { return RS::ShaderNativeSourceCode(); } + + virtual ~ShaderData() {} + }; + + typedef ShaderData *(*ShaderDataRequestFunction)(); + + struct MaterialData { + void update_uniform_buffer(const Map<StringName, ShaderLanguage::ShaderNode::Uniform> &p_uniforms, const uint32_t *p_uniform_offsets, const Map<StringName, Variant> &p_parameters, uint8_t *p_buffer, uint32_t p_buffer_size, bool p_use_linear_color); + void update_textures(const Map<StringName, Variant> &p_parameters, const Map<StringName, RID> &p_default_textures, const Vector<ShaderCompilerRD::GeneratedCode::Texture> &p_texture_uniforms, RID *p_textures, bool p_use_linear_color); + + virtual void set_render_priority(int p_priority) = 0; + virtual void set_next_pass(RID p_pass) = 0; + virtual void update_parameters(const Map<StringName, Variant> &p_parameters, bool p_uniform_dirty, bool p_textures_dirty) = 0; + virtual ~MaterialData(); + + private: + friend class RendererStorageRD; + RID self; + List<RID>::Element *global_buffer_E = nullptr; + List<RID>::Element *global_texture_E = nullptr; + uint64_t global_textures_pass = 0; + Map<StringName, uint64_t> used_global_textures; + }; + typedef MaterialData *(*MaterialDataRequestFunction)(ShaderData *); + + enum DefaultRDTexture { + DEFAULT_RD_TEXTURE_WHITE, + DEFAULT_RD_TEXTURE_BLACK, + DEFAULT_RD_TEXTURE_NORMAL, + DEFAULT_RD_TEXTURE_ANISO, + DEFAULT_RD_TEXTURE_MULTIMESH_BUFFER, + DEFAULT_RD_TEXTURE_CUBEMAP_BLACK, + DEFAULT_RD_TEXTURE_CUBEMAP_ARRAY_BLACK, + DEFAULT_RD_TEXTURE_CUBEMAP_WHITE, + DEFAULT_RD_TEXTURE_3D_WHITE, + DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE, + DEFAULT_RD_TEXTURE_2D_UINT, + DEFAULT_RD_TEXTURE_MAX + }; + + enum DefaultRDBuffer { + DEFAULT_RD_BUFFER_VERTEX, + DEFAULT_RD_BUFFER_NORMAL, + DEFAULT_RD_BUFFER_TANGENT, + DEFAULT_RD_BUFFER_COLOR, + DEFAULT_RD_BUFFER_TEX_UV, + DEFAULT_RD_BUFFER_TEX_UV2, + DEFAULT_RD_BUFFER_CUSTOM0, + DEFAULT_RD_BUFFER_CUSTOM1, + DEFAULT_RD_BUFFER_CUSTOM2, + DEFAULT_RD_BUFFER_CUSTOM3, + DEFAULT_RD_BUFFER_BONES, + DEFAULT_RD_BUFFER_WEIGHTS, + DEFAULT_RD_BUFFER_MAX, + }; + +private: + /* CANVAS TEXTURE API (2D) */ + + struct CanvasTexture { + RID diffuse; + RID normal_map; + RID specular; + Color specular_color = Color(1, 1, 1, 1); + float shininess = 1.0; + + RS::CanvasItemTextureFilter texture_filter = RS::CANVAS_ITEM_TEXTURE_FILTER_DEFAULT; + RS::CanvasItemTextureRepeat texture_repeat = RS::CANVAS_ITEM_TEXTURE_REPEAT_DEFAULT; + RID uniform_sets[RS::CANVAS_ITEM_TEXTURE_FILTER_MAX][RS::CANVAS_ITEM_TEXTURE_REPEAT_MAX]; + + Size2i size_cache = Size2i(1, 1); + bool use_normal_cache = false; + bool use_specular_cache = false; + bool cleared_cache = true; + void clear_sets(); + ~CanvasTexture(); + }; + + RID_PtrOwner<CanvasTexture, true> canvas_texture_owner; + + /* TEXTURE API */ + struct Texture { + enum Type { + TYPE_2D, + TYPE_LAYERED, + TYPE_3D + }; + + Type type; + RS::TextureLayeredType layered_type = RS::TEXTURE_LAYERED_2D_ARRAY; + + RenderingDevice::TextureType rd_type; + RID rd_texture; + RID rd_texture_srgb; + RenderingDevice::DataFormat rd_format; + RenderingDevice::DataFormat rd_format_srgb; + + RD::TextureView rd_view; + + Image::Format format; + Image::Format validated_format; + + int width; + int height; + int depth; + int layers; + int mipmaps; + + int height_2d; + int width_2d; + + struct BufferSlice3D { + Size2i size; + uint32_t offset = 0; + uint32_t buffer_size = 0; + }; + Vector<BufferSlice3D> buffer_slices_3d; + uint32_t buffer_size_3d = 0; + + bool is_render_target; + bool is_proxy; + + Ref<Image> image_cache_2d; + String path; + + RID proxy_to; + Vector<RID> proxies; + Set<RID> lightmap_users; + + RS::TextureDetectCallback detect_3d_callback = nullptr; + void *detect_3d_callback_ud = nullptr; + + RS::TextureDetectCallback detect_normal_callback = nullptr; + void *detect_normal_callback_ud = nullptr; + + RS::TextureDetectRoughnessCallback detect_roughness_callback = nullptr; + void *detect_roughness_callback_ud = nullptr; + + CanvasTexture *canvas_texture = nullptr; + }; + + struct TextureToRDFormat { + RD::DataFormat format; + RD::DataFormat format_srgb; + RD::TextureSwizzle swizzle_r; + RD::TextureSwizzle swizzle_g; + RD::TextureSwizzle swizzle_b; + RD::TextureSwizzle swizzle_a; + TextureToRDFormat() { + format = RD::DATA_FORMAT_MAX; + format_srgb = RD::DATA_FORMAT_MAX; + swizzle_r = RD::TEXTURE_SWIZZLE_R; + swizzle_g = RD::TEXTURE_SWIZZLE_G; + swizzle_b = RD::TEXTURE_SWIZZLE_B; + swizzle_a = RD::TEXTURE_SWIZZLE_A; + } + }; + + //textures can be created from threads, so this RID_Owner is thread safe + mutable RID_Owner<Texture, true> texture_owner; + + Ref<Image> _validate_texture_format(const Ref<Image> &p_image, TextureToRDFormat &r_format); + + RID default_rd_textures[DEFAULT_RD_TEXTURE_MAX]; + RID default_rd_samplers[RS::CANVAS_ITEM_TEXTURE_FILTER_MAX][RS::CANVAS_ITEM_TEXTURE_REPEAT_MAX]; + RID default_rd_storage_buffer; + + /* DECAL ATLAS */ + + struct DecalAtlas { + struct Texture { + int panorama_to_dp_users; + int users; + Rect2 uv_rect; + }; + + struct SortItem { + RID texture; + Size2i pixel_size; + Size2i size; + Point2i pos; + + bool operator<(const SortItem &p_item) const { + //sort larger to smaller + if (size.height == p_item.size.height) { + return size.width > p_item.size.width; + } else { + return size.height > p_item.size.height; + } + } + }; + + HashMap<RID, Texture> textures; + bool dirty = true; + int mipmaps = 5; + + RID texture; + RID texture_srgb; + struct MipMap { + RID fb; + RID texture; + Size2i size; + }; + Vector<MipMap> texture_mipmaps; + + Size2i size; + + } decal_atlas; + + void _update_decal_atlas(); + + /* SHADER */ + + struct Material; + + struct Shader { + ShaderData *data; + String code; + ShaderType type; + Map<StringName, RID> default_texture_parameter; + Set<Material *> owners; + }; + + ShaderDataRequestFunction shader_data_request_func[SHADER_TYPE_MAX]; + mutable RID_Owner<Shader, true> shader_owner; + + /* Material */ + + struct Material { + RID self; + MaterialData *data; + Shader *shader; + //shortcut to shader data and type + ShaderType shader_type; + uint32_t shader_id = 0; + bool update_requested; + bool uniform_dirty; + bool texture_dirty; + Material *update_next; + Map<StringName, Variant> params; + int32_t priority; + RID next_pass; + Dependency dependency; + }; + + MaterialDataRequestFunction material_data_request_func[SHADER_TYPE_MAX]; + mutable RID_Owner<Material, true> material_owner; + + Material *material_update_list; + void _material_queue_update(Material *material, bool p_uniform, bool p_texture); + void _update_queued_materials(); + + /* Mesh */ + + struct MeshInstance; + + struct Mesh { + struct Surface { + RS::PrimitiveType primitive = RS::PRIMITIVE_POINTS; + uint32_t format = 0; + + RID vertex_buffer; + RID attribute_buffer; + RID skin_buffer; + uint32_t vertex_count = 0; + uint32_t vertex_buffer_size = 0; + uint32_t skin_buffer_size = 0; + + // A different pipeline needs to be allocated + // depending on the inputs available in the + // material. + // There are never that many geometry/material + // combinations, so a simple array is the most + // cache-efficient structure. + + struct Version { + uint32_t input_mask = 0; + RD::VertexFormatID vertex_format = 0; + RID vertex_array; + }; + + SpinLock version_lock; //needed to access versions + Version *versions = nullptr; //allocated on demand + uint32_t version_count = 0; + + RID index_buffer; + RID index_array; + uint32_t index_count = 0; + + struct LOD { + float edge_length = 0.0; + RID index_buffer; + RID index_array; + }; + + LOD *lods = nullptr; + uint32_t lod_count = 0; + + AABB aabb; + + Vector<AABB> bone_aabbs; + + RID blend_shape_buffer; + + RID material; + + uint32_t render_index = 0; + uint64_t render_pass = 0; + + uint32_t multimesh_render_index = 0; + uint64_t multimesh_render_pass = 0; + + uint32_t particles_render_index = 0; + uint64_t particles_render_pass = 0; + + RID uniform_set; + }; + + uint32_t blend_shape_count = 0; + RS::BlendShapeMode blend_shape_mode = RS::BLEND_SHAPE_MODE_NORMALIZED; + + Surface **surfaces = nullptr; + uint32_t surface_count = 0; + + Vector<AABB> bone_aabbs; + + bool has_bone_weights = false; + + AABB aabb; + AABB custom_aabb; + + Vector<RID> material_cache; + + List<MeshInstance *> instances; + + RID shadow_mesh; + Set<Mesh *> shadow_owners; + + Dependency dependency; + }; + + mutable RID_Owner<Mesh, true> mesh_owner; + + struct MeshInstance { + Mesh *mesh; + RID skeleton; + struct Surface { + RID vertex_buffer; + RID uniform_set; + + Mesh::Surface::Version *versions = nullptr; //allocated on demand + uint32_t version_count = 0; + }; + LocalVector<Surface> surfaces; + LocalVector<float> blend_weights; + + RID blend_weights_buffer; + List<MeshInstance *>::Element *I = nullptr; //used to erase itself + uint64_t skeleton_version = 0; + bool dirty = false; + bool weights_dirty = false; + SelfList<MeshInstance> weight_update_list; + SelfList<MeshInstance> array_update_list; + MeshInstance() : + weight_update_list(this), array_update_list(this) {} + }; + + void _mesh_instance_clear(MeshInstance *mi); + void _mesh_instance_add_surface(MeshInstance *mi, Mesh *mesh, uint32_t p_surface); + + mutable RID_PtrOwner<MeshInstance> mesh_instance_owner; + + SelfList<MeshInstance>::List dirty_mesh_instance_weights; + SelfList<MeshInstance>::List dirty_mesh_instance_arrays; + + struct SkeletonShader { + struct PushConstant { + uint32_t has_normal; + uint32_t has_tangent; + uint32_t has_skeleton; + uint32_t has_blend_shape; + + uint32_t vertex_count; + uint32_t vertex_stride; + uint32_t skin_stride; + uint32_t skin_weight_offset; + + uint32_t blend_shape_count; + uint32_t normalized_blend_shapes; + uint32_t pad0; + uint32_t pad1; + }; + + enum { + UNIFORM_SET_INSTANCE = 0, + UNIFORM_SET_SURFACE = 1, + UNIFORM_SET_SKELETON = 2, + }; + enum { + SHADER_MODE_2D, + SHADER_MODE_3D, + SHADER_MODE_MAX + }; + + SkeletonShaderRD shader; + RID version; + RID version_shader[SHADER_MODE_MAX]; + RID pipeline[SHADER_MODE_MAX]; + + RID default_skeleton_uniform_set; + } skeleton_shader; + + void _mesh_surface_generate_version_for_input_mask(Mesh::Surface::Version &v, Mesh::Surface *s, uint32_t p_input_mask, MeshInstance::Surface *mis = nullptr); + + RID mesh_default_rd_buffers[DEFAULT_RD_BUFFER_MAX]; + + /* MultiMesh */ + struct MultiMesh { + RID mesh; + int instances = 0; + RS::MultimeshTransformFormat xform_format = RS::MULTIMESH_TRANSFORM_3D; + bool uses_colors = false; + bool uses_custom_data = false; + int visible_instances = -1; + AABB aabb; + bool aabb_dirty = false; + bool buffer_set = false; + uint32_t stride_cache = 0; + uint32_t color_offset_cache = 0; + uint32_t custom_data_offset_cache = 0; + + Vector<float> data_cache; //used if individual setting is used + bool *data_cache_dirty_regions = nullptr; + uint32_t data_cache_used_dirty_regions = 0; + + RID buffer; //storage buffer + RID uniform_set_3d; + + bool dirty = false; + MultiMesh *dirty_list = nullptr; + + Dependency dependency; + }; + + mutable RID_Owner<MultiMesh, true> multimesh_owner; + + MultiMesh *multimesh_dirty_list = nullptr; + + _FORCE_INLINE_ void _multimesh_make_local(MultiMesh *multimesh) const; + _FORCE_INLINE_ void _multimesh_mark_dirty(MultiMesh *multimesh, int p_index, bool p_aabb); + _FORCE_INLINE_ void _multimesh_mark_all_dirty(MultiMesh *multimesh, bool p_data, bool p_aabb); + _FORCE_INLINE_ void _multimesh_re_create_aabb(MultiMesh *multimesh, const float *p_data, int p_instances); + void _update_dirty_multimeshes(); + + /* PARTICLES */ + + struct ParticleData { + float xform[16]; + float velocity[3]; + uint32_t active; + float color[4]; + float custom[3]; + float lifetime; + uint32_t pad[3]; + }; + + struct ParticlesFrameParams { + enum { + MAX_ATTRACTORS = 32, + MAX_COLLIDERS = 32, + MAX_3D_TEXTURES = 7 + }; + + enum AttractorType { + ATTRACTOR_TYPE_SPHERE, + ATTRACTOR_TYPE_BOX, + ATTRACTOR_TYPE_VECTOR_FIELD, + }; + + struct Attractor { + float transform[16]; + float extents[3]; //exents or radius + uint32_t type; + + uint32_t texture_index; //texture index for vector field + float strength; + float attenuation; + float directionality; + }; + + enum CollisionType { + COLLISION_TYPE_SPHERE, + COLLISION_TYPE_BOX, + COLLISION_TYPE_SDF, + COLLISION_TYPE_HEIGHT_FIELD + }; + + struct Collider { + float transform[16]; + float extents[3]; //exents or radius + uint32_t type; + + uint32_t texture_index; //texture index for vector field + float scale; + uint32_t pad[2]; + }; + + uint32_t emitting; + float system_phase; + float prev_system_phase; + uint32_t cycle; + + float explosiveness; + float randomness; + float time; + float delta; + + uint32_t random_seed; + uint32_t attractor_count; + uint32_t collider_count; + float particle_size; + + float emission_transform[16]; + + Attractor attractors[MAX_ATTRACTORS]; + Collider colliders[MAX_COLLIDERS]; + }; + + struct ParticleEmissionBufferData { + }; + + struct ParticleEmissionBuffer { + struct Data { + float xform[16]; + float velocity[3]; + uint32_t flags; + float color[4]; + float custom[4]; + }; + + int32_t particle_count; + int32_t particle_max; + uint32_t pad1; + uint32_t pad2; + Data data[1]; //its 2020 and empty arrays are still non standard in C++ + }; + + struct Particles { + bool inactive; + float inactive_time; + bool emitting; + bool one_shot; + int amount; + float lifetime; + float pre_process_time; + float explosiveness; + float randomness; + bool restart_request; + AABB custom_aabb; + bool use_local_coords; + RID process_material; + + RS::ParticlesDrawOrder draw_order; + + Vector<RID> draw_passes; + + RID particle_buffer; + RID particle_instance_buffer; + RID frame_params_buffer; + + RID particles_material_uniform_set; + RID particles_copy_uniform_set; + RID particles_transforms_buffer_uniform_set; + RID collision_textures_uniform_set; + + RID collision_3d_textures[ParticlesFrameParams::MAX_3D_TEXTURES]; + uint32_t collision_3d_textures_used = 0; + RID collision_heightmap_texture; + + RID particles_sort_buffer; + RID particles_sort_uniform_set; + + bool dirty = false; + Particles *update_list = nullptr; + + RID sub_emitter; + + float phase; + float prev_phase; + uint64_t prev_ticks; + uint32_t random_seed; + + uint32_t cycle_number; + + float speed_scale; + + int fixed_fps; + bool fractional_delta; + float frame_remainder; + float collision_base_size; + + bool clear; + + bool force_sub_emit = false; + + Transform emission_transform; + + Vector<uint8_t> emission_buffer_data; + + ParticleEmissionBuffer *emission_buffer = nullptr; + RID emission_storage_buffer; + + Set<RID> collisions; + + Particles() : + inactive(true), + inactive_time(0.0), + emitting(false), + one_shot(false), + amount(0), + lifetime(1.0), + pre_process_time(0.0), + explosiveness(0.0), + randomness(0.0), + restart_request(false), + custom_aabb(AABB(Vector3(-4, -4, -4), Vector3(8, 8, 8))), + use_local_coords(true), + draw_order(RS::PARTICLES_DRAW_ORDER_INDEX), + prev_ticks(0), + random_seed(0), + cycle_number(0), + speed_scale(1.0), + fixed_fps(0), + fractional_delta(false), + frame_remainder(0), + collision_base_size(0.01), + clear(true) { + } + + Dependency dependency; + + ParticlesFrameParams frame_params; + }; + + void _particles_process(Particles *p_particles, float p_delta); + void _particles_allocate_emission_buffer(Particles *particles); + void _particles_free_data(Particles *particles); + + struct ParticlesShader { + struct PushConstant { + float lifetime; + uint32_t clear; + uint32_t total_particles; + uint32_t trail_size; + + uint32_t use_fractional_delta; + uint32_t sub_emitter_mode; + uint32_t can_emit; + uint32_t pad; + }; + + ParticlesShaderRD shader; + ShaderCompilerRD compiler; + + RID default_shader; + RID default_material; + RID default_shader_rd; + + RID base_uniform_set; + + struct CopyPushConstant { + float sort_direction[3]; + uint32_t total_particles; + }; + + enum { + COPY_MODE_FILL_INSTANCES, + COPY_MODE_FILL_SORT_BUFFER, + COPY_MODE_FILL_INSTANCES_WITH_SORT_BUFFER, + COPY_MODE_MAX, + }; + + ParticlesCopyShaderRD copy_shader; + RID copy_shader_version; + RID copy_pipelines[COPY_MODE_MAX]; + + } particles_shader; + + Particles *particle_update_list = nullptr; + + struct ParticlesShaderData : public ShaderData { + bool valid; + RID version; + + //PipelineCacheRD pipelines[SKY_VERSION_MAX]; + Map<StringName, ShaderLanguage::ShaderNode::Uniform> uniforms; + Vector<ShaderCompilerRD::GeneratedCode::Texture> texture_uniforms; + + Vector<uint32_t> ubo_offsets; + uint32_t ubo_size; + + String path; + String code; + Map<StringName, RID> default_texture_params; + + RID pipeline; + + bool uses_time; + + virtual void set_code(const String &p_Code); + virtual void set_default_texture_param(const StringName &p_name, RID p_texture); + virtual void get_param_list(List<PropertyInfo> *p_param_list) const; + virtual void get_instance_param_list(List<RendererStorage::InstanceShaderParam> *p_param_list) const; + virtual bool is_param_texture(const StringName &p_param) const; + virtual bool is_animated() const; + virtual bool casts_shadows() const; + virtual Variant get_default_parameter(const StringName &p_parameter) const; + virtual RS::ShaderNativeSourceCode get_native_source_code() const; + + ParticlesShaderData(); + virtual ~ParticlesShaderData(); + }; + + ShaderData *_create_particles_shader_func(); + static RendererStorageRD::ShaderData *_create_particles_shader_funcs() { + return base_singleton->_create_particles_shader_func(); + } + + struct ParticlesMaterialData : public MaterialData { + uint64_t last_frame; + ParticlesShaderData *shader_data; + RID uniform_buffer; + RID uniform_set; + Vector<RID> texture_cache; + Vector<uint8_t> ubo_data; + bool uniform_set_updated; + + virtual void set_render_priority(int p_priority) {} + virtual void set_next_pass(RID p_pass) {} + virtual void update_parameters(const Map<StringName, Variant> &p_parameters, bool p_uniform_dirty, bool p_textures_dirty); + virtual ~ParticlesMaterialData(); + }; + + MaterialData *_create_particles_material_func(ParticlesShaderData *p_shader); + static RendererStorageRD::MaterialData *_create_particles_material_funcs(ShaderData *p_shader) { + return base_singleton->_create_particles_material_func(static_cast<ParticlesShaderData *>(p_shader)); + } + + void update_particles(); + + mutable RID_Owner<Particles, true> particles_owner; + + /* Particles Collision */ + + struct ParticlesCollision { + RS::ParticlesCollisionType type = RS::PARTICLES_COLLISION_TYPE_SPHERE_ATTRACT; + uint32_t cull_mask = 0xFFFFFFFF; + float radius = 1.0; + Vector3 extents = Vector3(1, 1, 1); + float attractor_strength = 1.0; + float attractor_attenuation = 1.0; + float attractor_directionality = 0.0; + RID field_texture; + RID heightfield_texture; + RID heightfield_fb; + Size2i heightfield_fb_size; + + RS::ParticlesCollisionHeightfieldResolution heightfield_resolution = RS::PARTICLES_COLLISION_HEIGHTFIELD_RESOLUTION_1024; + + Dependency dependency; + }; + + mutable RID_Owner<ParticlesCollision, true> particles_collision_owner; + + struct ParticlesCollisionInstance { + RID collision; + Transform transform; + bool active = false; + }; + + mutable RID_Owner<ParticlesCollisionInstance> particles_collision_instance_owner; + + /* Skeleton */ + + struct Skeleton { + bool use_2d = false; + int size = 0; + Vector<float> data; + RID buffer; + + bool dirty = false; + Skeleton *dirty_list = nullptr; + Transform2D base_transform_2d; + + RID uniform_set_3d; + RID uniform_set_mi; + + uint64_t version = 1; + + Dependency dependency; + }; + + mutable RID_Owner<Skeleton, true> skeleton_owner; + + _FORCE_INLINE_ void _skeleton_make_dirty(Skeleton *skeleton); + + Skeleton *skeleton_dirty_list = nullptr; + + void _update_dirty_skeletons(); + + /* LIGHT */ + + struct Light { + RS::LightType type; + float param[RS::LIGHT_PARAM_MAX]; + Color color = Color(1, 1, 1, 1); + Color shadow_color; + RID projector; + bool shadow = false; + bool negative = false; + bool reverse_cull = false; + RS::LightBakeMode bake_mode = RS::LIGHT_BAKE_DYNAMIC; + uint32_t max_sdfgi_cascade = 2; + uint32_t cull_mask = 0xFFFFFFFF; + RS::LightOmniShadowMode omni_shadow_mode = RS::LIGHT_OMNI_SHADOW_DUAL_PARABOLOID; + RS::LightDirectionalShadowMode directional_shadow_mode = RS::LIGHT_DIRECTIONAL_SHADOW_ORTHOGONAL; + RS::LightDirectionalShadowDepthRangeMode directional_range_mode = RS::LIGHT_DIRECTIONAL_SHADOW_DEPTH_RANGE_STABLE; + bool directional_blend_splits = false; + bool directional_sky_only = false; + uint64_t version = 0; + + Dependency dependency; + }; + + mutable RID_Owner<Light, true> light_owner; + + /* REFLECTION PROBE */ + + struct ReflectionProbe { + RS::ReflectionProbeUpdateMode update_mode = RS::REFLECTION_PROBE_UPDATE_ONCE; + int resolution = 256; + float intensity = 1.0; + RS::ReflectionProbeAmbientMode ambient_mode = RS::REFLECTION_PROBE_AMBIENT_ENVIRONMENT; + Color ambient_color; + float ambient_color_energy = 1.0; + float max_distance = 0; + Vector3 extents = Vector3(1, 1, 1); + Vector3 origin_offset; + bool interior = false; + bool box_projection = false; + bool enable_shadows = false; + uint32_t cull_mask = (1 << 20) - 1; + float lod_threshold = 0.01; + + Dependency dependency; + }; + + mutable RID_Owner<ReflectionProbe, true> reflection_probe_owner; + + /* DECAL */ + + struct Decal { + Vector3 extents = Vector3(1, 1, 1); + RID textures[RS::DECAL_TEXTURE_MAX]; + float emission_energy = 1.0; + float albedo_mix = 1.0; + Color modulate = Color(1, 1, 1, 1); + uint32_t cull_mask = (1 << 20) - 1; + float upper_fade = 0.3; + float lower_fade = 0.3; + bool distance_fade = false; + float distance_fade_begin = 10; + float distance_fade_length = 1; + float normal_fade = 0.0; + + Dependency dependency; + }; + + mutable RID_Owner<Decal, true> decal_owner; + + /* GI PROBE */ + + struct GIProbe { + RID octree_buffer; + RID data_buffer; + RID sdf_texture; + + uint32_t octree_buffer_size = 0; + uint32_t data_buffer_size = 0; + + Vector<int> level_counts; + + int cell_count = 0; + + Transform to_cell_xform; + AABB bounds; + Vector3i octree_size; + + float dynamic_range = 4.0; + float energy = 1.0; + float ao = 0.0; + float ao_size = 0.5; + float bias = 1.4; + float normal_bias = 0.0; + float propagation = 0.7; + bool interior = false; + bool use_two_bounces = false; + + float anisotropy_strength = 0.5; + + uint32_t version = 1; + uint32_t data_version = 1; + + Dependency dependency; + }; + + GiprobeSdfShaderRD giprobe_sdf_shader; + RID giprobe_sdf_shader_version; + RID giprobe_sdf_shader_version_shader; + RID giprobe_sdf_shader_pipeline; + + mutable RID_Owner<GIProbe, true> gi_probe_owner; + + /* REFLECTION PROBE */ + + struct Lightmap { + RID light_texture; + bool uses_spherical_harmonics = false; + bool interior = false; + AABB bounds = AABB(Vector3(), Vector3(1, 1, 1)); + int32_t array_index = -1; //unassigned + PackedVector3Array points; + PackedColorArray point_sh; + PackedInt32Array tetrahedra; + PackedInt32Array bsp_tree; + + struct BSP { + static const int32_t EMPTY_LEAF = INT32_MIN; + float plane[4]; + int32_t over = EMPTY_LEAF, under = EMPTY_LEAF; + }; + + Dependency dependency; + }; + + bool using_lightmap_array; //high end uses this + /* for high end */ + + Vector<RID> lightmap_textures; + + uint64_t lightmap_array_version = 0; + + mutable RID_Owner<Lightmap, true> lightmap_owner; + + float lightmap_probe_capture_update_speed = 4; + + /* RENDER TARGET */ + + struct RenderTarget { + Size2i size; + RID framebuffer; + RID color; + + //used for retrieving from CPU + RD::DataFormat color_format = RD::DATA_FORMAT_R4G4_UNORM_PACK8; + RD::DataFormat color_format_srgb = RD::DATA_FORMAT_R4G4_UNORM_PACK8; + Image::Format image_format = Image::FORMAT_L8; + + bool flags[RENDER_TARGET_FLAG_MAX]; + + RID backbuffer; //used for effects + RID backbuffer_fb; + RID backbuffer_mipmap0; + + struct BackbufferMipmap { + RID mipmap; + RID mipmap_copy; + }; + + Vector<BackbufferMipmap> backbuffer_mipmaps; + + RID framebuffer_uniform_set; + RID backbuffer_uniform_set; + + RID sdf_buffer_write; + RID sdf_buffer_write_fb; + RID sdf_buffer_process[2]; + RID sdf_buffer_read; + RID sdf_buffer_process_uniform_sets[2]; + RS::ViewportSDFOversize sdf_oversize = RS::VIEWPORT_SDF_OVERSIZE_120_PERCENT; + RS::ViewportSDFScale sdf_scale = RS::VIEWPORT_SDF_SCALE_50_PERCENT; + Size2i process_size; + + //texture generated for this owner (nor RD). + RID texture; + bool was_used; + + //clear request + bool clear_requested; + Color clear_color; + }; + + mutable RID_Owner<RenderTarget> render_target_owner; + + void _clear_render_target(RenderTarget *rt); + void _update_render_target(RenderTarget *rt); + void _create_render_target_backbuffer(RenderTarget *rt); + void _render_target_allocate_sdf(RenderTarget *rt); + void _render_target_clear_sdf(RenderTarget *rt); + Rect2i _render_target_get_sdf_rect(const RenderTarget *rt) const; + + struct RenderTargetSDF { + enum { + SHADER_LOAD, + SHADER_LOAD_SHRINK, + SHADER_PROCESS, + SHADER_PROCESS_OPTIMIZED, + SHADER_STORE, + SHADER_STORE_SHRINK, + SHADER_MAX + }; + + struct PushConstant { + int32_t size[2]; + int32_t stride; + int32_t shift; + int32_t base_size[2]; + int32_t pad[2]; + }; + + CanvasSdfShaderRD shader; + RID shader_version; + RID pipelines[SHADER_MAX]; + } rt_sdf; + + /* GLOBAL SHADER VARIABLES */ + + struct GlobalVariables { + enum { + BUFFER_DIRTY_REGION_SIZE = 1024 + }; + struct Variable { + Set<RID> texture_materials; // materials using this + + RS::GlobalVariableType type; + Variant value; + Variant override; + int32_t buffer_index; //for vectors + int32_t buffer_elements; //for vectors + }; + + HashMap<StringName, Variable> variables; + + struct Value { + float x; + float y; + float z; + float w; + }; + + struct ValueInt { + int32_t x; + int32_t y; + int32_t z; + int32_t w; + }; + + struct ValueUInt { + uint32_t x; + uint32_t y; + uint32_t z; + uint32_t w; + }; + + struct ValueUsage { + uint32_t elements = 0; + }; + + List<RID> materials_using_buffer; + List<RID> materials_using_texture; + + RID buffer; + Value *buffer_values; + ValueUsage *buffer_usage; + bool *buffer_dirty_regions; + uint32_t buffer_dirty_region_count = 0; + + uint32_t buffer_size; + + bool must_update_texture_materials = false; + bool must_update_buffer_materials = false; + + HashMap<RID, int32_t> instance_buffer_pos; + + } global_variables; + + int32_t _global_variable_allocate(uint32_t p_elements); + void _global_variable_store_in_buffer(int32_t p_index, RS::GlobalVariableType p_type, const Variant &p_value); + void _global_variable_mark_buffer_dirty(int32_t p_index, int32_t p_elements); + + void _update_global_variables(); + /* EFFECTS */ + + EffectsRD effects; + +public: + virtual bool can_create_resources_async() const; + + /* TEXTURE API */ + + virtual RID texture_allocate(); + + virtual void texture_2d_initialize(RID p_texture, const Ref<Image> &p_image); + virtual void texture_2d_layered_initialize(RID p_texture, const Vector<Ref<Image>> &p_layers, RS::TextureLayeredType p_layered_type); + virtual void texture_3d_initialize(RID p_texture, Image::Format p_format, int p_width, int p_height, int p_depth, bool p_mipmaps, const Vector<Ref<Image>> &p_data); //all slices, then all the mipmaps, must be coherent + virtual void texture_proxy_initialize(RID p_texture, RID p_base); + + virtual void _texture_2d_update(RID p_texture, const Ref<Image> &p_image, int p_layer, bool p_immediate); + + virtual void texture_2d_update_immediate(RID p_texture, const Ref<Image> &p_image, int p_layer = 0); //mostly used for video and streaming + virtual void texture_2d_update(RID p_texture, const Ref<Image> &p_image, int p_layer = 0); + virtual void texture_3d_update(RID p_texture, const Vector<Ref<Image>> &p_data); + virtual void texture_proxy_update(RID p_texture, RID p_proxy_to); + + //these two APIs can be used together or in combination with the others. + virtual void texture_2d_placeholder_initialize(RID p_texture); + virtual void texture_2d_layered_placeholder_initialize(RID p_texture, RenderingServer::TextureLayeredType p_layered_type); + virtual void texture_3d_placeholder_initialize(RID p_texture); + + virtual Ref<Image> texture_2d_get(RID p_texture) const; + virtual Ref<Image> texture_2d_layer_get(RID p_texture, int p_layer) const; + virtual Vector<Ref<Image>> texture_3d_get(RID p_texture) const; + + virtual void texture_replace(RID p_texture, RID p_by_texture); + virtual void texture_set_size_override(RID p_texture, int p_width, int p_height); + + virtual void texture_set_path(RID p_texture, const String &p_path); + virtual String texture_get_path(RID p_texture) const; + + virtual void texture_set_detect_3d_callback(RID p_texture, RS::TextureDetectCallback p_callback, void *p_userdata); + virtual void texture_set_detect_normal_callback(RID p_texture, RS::TextureDetectCallback p_callback, void *p_userdata); + virtual void texture_set_detect_roughness_callback(RID p_texture, RS::TextureDetectRoughnessCallback p_callback, void *p_userdata); + + virtual void texture_debug_usage(List<RS::TextureInfo> *r_info); + + virtual void texture_set_proxy(RID p_proxy, RID p_base); + virtual void texture_set_force_redraw_if_visible(RID p_texture, bool p_enable); + + virtual Size2 texture_size_with_proxy(RID p_proxy); + + virtual void texture_add_to_decal_atlas(RID p_texture, bool p_panorama_to_dp = false); + virtual void texture_remove_from_decal_atlas(RID p_texture, bool p_panorama_to_dp = false); + + RID decal_atlas_get_texture() const; + RID decal_atlas_get_texture_srgb() const; + _FORCE_INLINE_ Rect2 decal_atlas_get_texture_rect(RID p_texture) { + DecalAtlas::Texture *t = decal_atlas.textures.getptr(p_texture); + if (!t) { + return Rect2(); + } + + return t->uv_rect; + } + + //internal usage + + _FORCE_INLINE_ RID texture_get_rd_texture(RID p_texture, bool p_srgb = false) { + if (p_texture.is_null()) { + return RID(); + } + Texture *tex = texture_owner.getornull(p_texture); + + if (!tex) { + return RID(); + } + return (p_srgb && tex->rd_texture_srgb.is_valid()) ? tex->rd_texture_srgb : tex->rd_texture; + } + + _FORCE_INLINE_ Size2i texture_2d_get_size(RID p_texture) { + if (p_texture.is_null()) { + return Size2i(); + } + Texture *tex = texture_owner.getornull(p_texture); + + if (!tex) { + return Size2i(); + } + return Size2i(tex->width_2d, tex->height_2d); + } + + _FORCE_INLINE_ RID texture_rd_get_default(DefaultRDTexture p_texture) { + return default_rd_textures[p_texture]; + } + _FORCE_INLINE_ RID sampler_rd_get_default(RS::CanvasItemTextureFilter p_filter, RS::CanvasItemTextureRepeat p_repeat) { + return default_rd_samplers[p_filter][p_repeat]; + } + + /* CANVAS TEXTURE API */ + + RID canvas_texture_allocate(); + void canvas_texture_initialize(RID p_canvas_texture); + + virtual void canvas_texture_set_channel(RID p_canvas_texture, RS::CanvasTextureChannel p_channel, RID p_texture); + virtual void canvas_texture_set_shading_parameters(RID p_canvas_texture, const Color &p_specular_color, float p_shininess); + + virtual void canvas_texture_set_texture_filter(RID p_canvas_texture, RS::CanvasItemTextureFilter p_filter); + virtual void canvas_texture_set_texture_repeat(RID p_canvas_texture, RS::CanvasItemTextureRepeat p_repeat); + + bool canvas_texture_get_uniform_set(RID p_texture, RS::CanvasItemTextureFilter p_base_filter, RS::CanvasItemTextureRepeat p_base_repeat, RID p_base_shader, int p_base_set, RID &r_uniform_set, Size2i &r_size, Color &r_specular_shininess, bool &r_use_normal, bool &r_use_specular); + + /* SHADER API */ + + RID shader_allocate(); + void shader_initialize(RID p_shader); + + void shader_set_code(RID p_shader, const String &p_code); + String shader_get_code(RID p_shader) const; + void shader_get_param_list(RID p_shader, List<PropertyInfo> *p_param_list) const; + + void shader_set_default_texture_param(RID p_shader, const StringName &p_name, RID p_texture); + RID shader_get_default_texture_param(RID p_shader, const StringName &p_name) const; + Variant shader_get_param_default(RID p_shader, const StringName &p_param) const; + void shader_set_data_request_function(ShaderType p_shader_type, ShaderDataRequestFunction p_function); + + virtual RS::ShaderNativeSourceCode shader_get_native_source_code(RID p_shader) const; + + /* COMMON MATERIAL API */ + + RID material_allocate(); + void material_initialize(RID p_material); + + void material_set_shader(RID p_material, RID p_shader); + + void material_set_param(RID p_material, const StringName &p_param, const Variant &p_value); + Variant material_get_param(RID p_material, const StringName &p_param) const; + + void material_set_next_pass(RID p_material, RID p_next_material); + void material_set_render_priority(RID p_material, int priority); + + bool material_is_animated(RID p_material); + bool material_casts_shadows(RID p_material); + + void material_get_instance_shader_parameters(RID p_material, List<InstanceShaderParam> *r_parameters); + + void material_update_dependency(RID p_material, DependencyTracker *p_instance); + void material_force_update_textures(RID p_material, ShaderType p_shader_type); + + void material_set_data_request_function(ShaderType p_shader_type, MaterialDataRequestFunction p_function); + + _FORCE_INLINE_ uint32_t material_get_shader_id(RID p_material) { + Material *material = material_owner.getornull(p_material); + return material->shader_id; + } + + _FORCE_INLINE_ MaterialData *material_get_data(RID p_material, ShaderType p_shader_type) { + Material *material = material_owner.getornull(p_material); + if (!material || material->shader_type != p_shader_type) { + return nullptr; + } else { + return material->data; + } + } + + /* MESH API */ + + RID mesh_allocate(); + void mesh_initialize(RID p_mesh); + + virtual void mesh_set_blend_shape_count(RID p_mesh, int p_blend_shape_count); + + /// Return stride + virtual void mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface); + + virtual int mesh_get_blend_shape_count(RID p_mesh) const; + + virtual void mesh_set_blend_shape_mode(RID p_mesh, RS::BlendShapeMode p_mode); + virtual RS::BlendShapeMode mesh_get_blend_shape_mode(RID p_mesh) const; + + virtual void mesh_surface_update_region(RID p_mesh, int p_surface, int p_offset, const Vector<uint8_t> &p_data); + + virtual void mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material); + virtual RID mesh_surface_get_material(RID p_mesh, int p_surface) const; + + virtual RS::SurfaceData mesh_get_surface(RID p_mesh, int p_surface) const; + + virtual int mesh_get_surface_count(RID p_mesh) const; + + virtual void mesh_set_custom_aabb(RID p_mesh, const AABB &p_aabb); + virtual AABB mesh_get_custom_aabb(RID p_mesh) const; + + virtual AABB mesh_get_aabb(RID p_mesh, RID p_skeleton = RID()); + virtual void mesh_set_shadow_mesh(RID p_mesh, RID p_shadow_mesh); + + virtual void mesh_clear(RID p_mesh); + + virtual bool mesh_needs_instance(RID p_mesh, bool p_has_skeleton); + + /* MESH INSTANCE */ + + virtual RID mesh_instance_create(RID p_base); + virtual void mesh_instance_set_skeleton(RID p_mesh_instance, RID p_skeleton); + virtual void mesh_instance_set_blend_shape_weight(RID p_mesh_instance, int p_shape, float p_weight); + virtual void mesh_instance_check_for_update(RID p_mesh_instance); + virtual void update_mesh_instances(); + + _FORCE_INLINE_ const RID *mesh_get_surface_count_and_materials(RID p_mesh, uint32_t &r_surface_count) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh, nullptr); + r_surface_count = mesh->surface_count; + if (r_surface_count == 0) { + return nullptr; + } + if (mesh->material_cache.is_empty()) { + mesh->material_cache.resize(mesh->surface_count); + for (uint32_t i = 0; i < r_surface_count; i++) { + mesh->material_cache.write[i] = mesh->surfaces[i]->material; + } + } + + return mesh->material_cache.ptr(); + } + + _FORCE_INLINE_ void *mesh_get_surface(RID p_mesh, uint32_t p_surface_index) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh, nullptr); + ERR_FAIL_UNSIGNED_INDEX_V(p_surface_index, mesh->surface_count, nullptr); + + return mesh->surfaces[p_surface_index]; + } + + _FORCE_INLINE_ RID mesh_get_shadow_mesh(RID p_mesh) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh, RID()); + + return mesh->shadow_mesh; + } + + _FORCE_INLINE_ RS::PrimitiveType mesh_surface_get_primitive(void *p_surface) { + Mesh::Surface *surface = reinterpret_cast<Mesh::Surface *>(p_surface); + return surface->primitive; + } + + _FORCE_INLINE_ bool mesh_surface_has_lod(void *p_surface) const { + Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); + return s->lod_count > 0; + } + + _FORCE_INLINE_ uint32_t mesh_surface_get_lod(void *p_surface, float p_model_scale, float p_distance_threshold, float p_lod_threshold) const { + Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); + + int32_t current_lod = -1; + for (uint32_t i = 0; i < s->lod_count; i++) { + float screen_size = s->lods[i].edge_length * p_model_scale / p_distance_threshold; + if (screen_size > p_lod_threshold) { + break; + } + current_lod = i; + } + if (current_lod == -1) { + return 0; + } else { + return current_lod + 1; + } + } + + _FORCE_INLINE_ RID mesh_surface_get_index_array(void *p_surface, uint32_t p_lod) const { + Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); + + if (p_lod == 0) { + return s->index_array; + } else { + return s->lods[p_lod - 1].index_array; + } + } + + _FORCE_INLINE_ void mesh_surface_get_vertex_arrays_and_format(void *p_surface, uint32_t p_input_mask, RID &r_vertex_array_rd, RD::VertexFormatID &r_vertex_format) { + Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); + + s->version_lock.lock(); + + //there will never be more than, at much, 3 or 4 versions, so iterating is the fastest way + + for (uint32_t i = 0; i < s->version_count; i++) { + if (s->versions[i].input_mask != p_input_mask) { + continue; + } + //we have this version, hooray + r_vertex_format = s->versions[i].vertex_format; + r_vertex_array_rd = s->versions[i].vertex_array; + s->version_lock.unlock(); + return; + } + + uint32_t version = s->version_count; + s->version_count++; + s->versions = (Mesh::Surface::Version *)memrealloc(s->versions, sizeof(Mesh::Surface::Version) * s->version_count); + + _mesh_surface_generate_version_for_input_mask(s->versions[version], s, p_input_mask); + + r_vertex_format = s->versions[version].vertex_format; + r_vertex_array_rd = s->versions[version].vertex_array; + + s->version_lock.unlock(); + } + + _FORCE_INLINE_ void mesh_instance_surface_get_vertex_arrays_and_format(RID p_mesh_instance, uint32_t p_surface_index, uint32_t p_input_mask, RID &r_vertex_array_rd, RD::VertexFormatID &r_vertex_format) { + MeshInstance *mi = mesh_instance_owner.getornull(p_mesh_instance); + ERR_FAIL_COND(!mi); + Mesh *mesh = mi->mesh; + ERR_FAIL_UNSIGNED_INDEX(p_surface_index, mesh->surface_count); + + MeshInstance::Surface *mis = &mi->surfaces[p_surface_index]; + Mesh::Surface *s = mesh->surfaces[p_surface_index]; + + s->version_lock.lock(); + + //there will never be more than, at much, 3 or 4 versions, so iterating is the fastest way + + for (uint32_t i = 0; i < mis->version_count; i++) { + if (mis->versions[i].input_mask != p_input_mask) { + continue; + } + //we have this version, hooray + r_vertex_format = mis->versions[i].vertex_format; + r_vertex_array_rd = mis->versions[i].vertex_array; + s->version_lock.unlock(); + return; + } + + uint32_t version = mis->version_count; + mis->version_count++; + mis->versions = (Mesh::Surface::Version *)memrealloc(mis->versions, sizeof(Mesh::Surface::Version) * mis->version_count); + + _mesh_surface_generate_version_for_input_mask(mis->versions[version], s, p_input_mask, mis); + + r_vertex_format = mis->versions[version].vertex_format; + r_vertex_array_rd = mis->versions[version].vertex_array; + + s->version_lock.unlock(); + } + + _FORCE_INLINE_ RID mesh_get_default_rd_buffer(DefaultRDBuffer p_buffer) { + ERR_FAIL_INDEX_V(p_buffer, DEFAULT_RD_BUFFER_MAX, RID()); + return mesh_default_rd_buffers[p_buffer]; + } + + _FORCE_INLINE_ uint32_t mesh_surface_get_render_pass_index(RID p_mesh, uint32_t p_surface_index, uint64_t p_render_pass, uint32_t *r_index) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + Mesh::Surface *s = mesh->surfaces[p_surface_index]; + + if (s->render_pass != p_render_pass) { + (*r_index)++; + s->render_pass = p_render_pass; + s->render_index = *r_index; + } + + return s->render_index; + } + + _FORCE_INLINE_ uint32_t mesh_surface_get_multimesh_render_pass_index(RID p_mesh, uint32_t p_surface_index, uint64_t p_render_pass, uint32_t *r_index) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + Mesh::Surface *s = mesh->surfaces[p_surface_index]; + + if (s->multimesh_render_pass != p_render_pass) { + (*r_index)++; + s->multimesh_render_pass = p_render_pass; + s->multimesh_render_index = *r_index; + } + + return s->multimesh_render_index; + } + + _FORCE_INLINE_ uint32_t mesh_surface_get_particles_render_pass_index(RID p_mesh, uint32_t p_surface_index, uint64_t p_render_pass, uint32_t *r_index) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + Mesh::Surface *s = mesh->surfaces[p_surface_index]; + + if (s->particles_render_pass != p_render_pass) { + (*r_index)++; + s->particles_render_pass = p_render_pass; + s->particles_render_index = *r_index; + } + + return s->particles_render_index; + } + + /* MULTIMESH API */ + + RID multimesh_allocate(); + void multimesh_initialize(RID p_multimesh); + + void multimesh_allocate_data(RID p_multimesh, int p_instances, RS::MultimeshTransformFormat p_transform_format, bool p_use_colors = false, bool p_use_custom_data = false); + int multimesh_get_instance_count(RID p_multimesh) const; + + void multimesh_set_mesh(RID p_multimesh, RID p_mesh); + void multimesh_instance_set_transform(RID p_multimesh, int p_index, const Transform &p_transform); + void multimesh_instance_set_transform_2d(RID p_multimesh, int p_index, const Transform2D &p_transform); + void multimesh_instance_set_color(RID p_multimesh, int p_index, const Color &p_color); + void multimesh_instance_set_custom_data(RID p_multimesh, int p_index, const Color &p_color); + + RID multimesh_get_mesh(RID p_multimesh) const; + + Transform multimesh_instance_get_transform(RID p_multimesh, int p_index) const; + Transform2D multimesh_instance_get_transform_2d(RID p_multimesh, int p_index) const; + Color multimesh_instance_get_color(RID p_multimesh, int p_index) const; + Color multimesh_instance_get_custom_data(RID p_multimesh, int p_index) const; + + void multimesh_set_buffer(RID p_multimesh, const Vector<float> &p_buffer); + Vector<float> multimesh_get_buffer(RID p_multimesh) const; + + void multimesh_set_visible_instances(RID p_multimesh, int p_visible); + int multimesh_get_visible_instances(RID p_multimesh) const; + + AABB multimesh_get_aabb(RID p_multimesh) const; + + _FORCE_INLINE_ RS::MultimeshTransformFormat multimesh_get_transform_format(RID p_multimesh) const { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + return multimesh->xform_format; + } + + _FORCE_INLINE_ bool multimesh_uses_colors(RID p_multimesh) const { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + return multimesh->uses_colors; + } + + _FORCE_INLINE_ bool multimesh_uses_custom_data(RID p_multimesh) const { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + return multimesh->uses_custom_data; + } + + _FORCE_INLINE_ uint32_t multimesh_get_instances_to_draw(RID p_multimesh) const { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + if (multimesh->visible_instances >= 0) { + return multimesh->visible_instances; + } + return multimesh->instances; + } + + _FORCE_INLINE_ RID multimesh_get_3d_uniform_set(RID p_multimesh, RID p_shader, uint32_t p_set) const { + MultiMesh *multimesh = multimesh_owner.getornull(p_multimesh); + if (!multimesh->uniform_set_3d.is_valid()) { + Vector<RD::Uniform> uniforms; + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 0; + u.ids.push_back(multimesh->buffer); + uniforms.push_back(u); + multimesh->uniform_set_3d = RD::get_singleton()->uniform_set_create(uniforms, p_shader, p_set); + } + + return multimesh->uniform_set_3d; + } + + /* IMMEDIATE API */ + + RID immediate_allocate() { return RID(); } + void immediate_initialize(RID p_immediate) {} + + virtual void immediate_begin(RID p_immediate, RS::PrimitiveType p_rimitive, RID p_texture = RID()) {} + virtual void immediate_vertex(RID p_immediate, const Vector3 &p_vertex) {} + virtual void immediate_normal(RID p_immediate, const Vector3 &p_normal) {} + virtual void immediate_tangent(RID p_immediate, const Plane &p_tangent) {} + virtual void immediate_color(RID p_immediate, const Color &p_color) {} + virtual void immediate_uv(RID p_immediate, const Vector2 &tex_uv) {} + virtual void immediate_uv2(RID p_immediate, const Vector2 &tex_uv) {} + virtual void immediate_end(RID p_immediate) {} + virtual void immediate_clear(RID p_immediate) {} + virtual void immediate_set_material(RID p_immediate, RID p_material) {} + virtual RID immediate_get_material(RID p_immediate) const { return RID(); } + virtual AABB immediate_get_aabb(RID p_immediate) const { return AABB(); } + + /* SKELETON API */ + + RID skeleton_allocate(); + void skeleton_initialize(RID p_skeleton); + + void skeleton_allocate_data(RID p_skeleton, int p_bones, bool p_2d_skeleton = false); + void skeleton_set_base_transform_2d(RID p_skeleton, const Transform2D &p_base_transform); + void skeleton_set_world_transform(RID p_skeleton, bool p_enable, const Transform &p_world_transform); + int skeleton_get_bone_count(RID p_skeleton) const; + void skeleton_bone_set_transform(RID p_skeleton, int p_bone, const Transform &p_transform); + Transform skeleton_bone_get_transform(RID p_skeleton, int p_bone) const; + void skeleton_bone_set_transform_2d(RID p_skeleton, int p_bone, const Transform2D &p_transform); + Transform2D skeleton_bone_get_transform_2d(RID p_skeleton, int p_bone) const; + + _FORCE_INLINE_ bool skeleton_is_valid(RID p_skeleton) { + return skeleton_owner.getornull(p_skeleton) != nullptr; + } + + _FORCE_INLINE_ RID skeleton_get_3d_uniform_set(RID p_skeleton, RID p_shader, uint32_t p_set) const { + Skeleton *skeleton = skeleton_owner.getornull(p_skeleton); + ERR_FAIL_COND_V(!skeleton, RID()); + ERR_FAIL_COND_V(skeleton->size == 0, RID()); + if (skeleton->use_2d) { + return RID(); + } + if (!skeleton->uniform_set_3d.is_valid()) { + Vector<RD::Uniform> uniforms; + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 0; + u.ids.push_back(skeleton->buffer); + uniforms.push_back(u); + skeleton->uniform_set_3d = RD::get_singleton()->uniform_set_create(uniforms, p_shader, p_set); + } + + return skeleton->uniform_set_3d; + } + /* Light API */ + + void _light_initialize(RID p_rid, RS::LightType p_type); + + RID directional_light_allocate(); + void directional_light_initialize(RID p_light); + + RID omni_light_allocate(); + void omni_light_initialize(RID p_light); + + RID spot_light_allocate(); + void spot_light_initialize(RID p_light); + + void light_set_color(RID p_light, const Color &p_color); + void light_set_param(RID p_light, RS::LightParam p_param, float p_value); + void light_set_shadow(RID p_light, bool p_enabled); + void light_set_shadow_color(RID p_light, const Color &p_color); + void light_set_projector(RID p_light, RID p_texture); + void light_set_negative(RID p_light, bool p_enable); + void light_set_cull_mask(RID p_light, uint32_t p_mask); + void light_set_reverse_cull_face_mode(RID p_light, bool p_enabled); + void light_set_bake_mode(RID p_light, RS::LightBakeMode p_bake_mode); + void light_set_max_sdfgi_cascade(RID p_light, uint32_t p_cascade); + + void light_omni_set_shadow_mode(RID p_light, RS::LightOmniShadowMode p_mode); + + void light_directional_set_shadow_mode(RID p_light, RS::LightDirectionalShadowMode p_mode); + void light_directional_set_blend_splits(RID p_light, bool p_enable); + bool light_directional_get_blend_splits(RID p_light) const; + void light_directional_set_sky_only(RID p_light, bool p_sky_only); + bool light_directional_is_sky_only(RID p_light) const; + void light_directional_set_shadow_depth_range_mode(RID p_light, RS::LightDirectionalShadowDepthRangeMode p_range_mode); + RS::LightDirectionalShadowDepthRangeMode light_directional_get_shadow_depth_range_mode(RID p_light) const; + + RS::LightDirectionalShadowMode light_directional_get_shadow_mode(RID p_light); + RS::LightOmniShadowMode light_omni_get_shadow_mode(RID p_light); + + _FORCE_INLINE_ RS::LightType light_get_type(RID p_light) const { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, RS::LIGHT_DIRECTIONAL); + + return light->type; + } + AABB light_get_aabb(RID p_light) const; + + _FORCE_INLINE_ float light_get_param(RID p_light, RS::LightParam p_param) { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, 0); + + return light->param[p_param]; + } + + _FORCE_INLINE_ RID light_get_projector(RID p_light) { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, RID()); + + return light->projector; + } + + _FORCE_INLINE_ Color light_get_color(RID p_light) { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, Color()); + + return light->color; + } + + _FORCE_INLINE_ Color light_get_shadow_color(RID p_light) { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, Color()); + + return light->shadow_color; + } + + _FORCE_INLINE_ uint32_t light_get_cull_mask(RID p_light) { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, 0); + + return light->cull_mask; + } + + _FORCE_INLINE_ bool light_has_shadow(RID p_light) const { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, RS::LIGHT_DIRECTIONAL); + + return light->shadow; + } + + _FORCE_INLINE_ bool light_is_negative(RID p_light) const { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, RS::LIGHT_DIRECTIONAL); + + return light->negative; + } + + _FORCE_INLINE_ float light_get_transmittance_bias(RID p_light) const { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, 0.0); + + return light->param[RS::LIGHT_PARAM_TRANSMITTANCE_BIAS]; + } + + _FORCE_INLINE_ float light_get_shadow_volumetric_fog_fade(RID p_light) const { + const Light *light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light, 0.0); + + return light->param[RS::LIGHT_PARAM_SHADOW_VOLUMETRIC_FOG_FADE]; + } + + RS::LightBakeMode light_get_bake_mode(RID p_light); + uint32_t light_get_max_sdfgi_cascade(RID p_light); + uint64_t light_get_version(RID p_light) const; + + /* PROBE API */ + + RID reflection_probe_allocate(); + void reflection_probe_initialize(RID p_reflection_probe); + + void reflection_probe_set_update_mode(RID p_probe, RS::ReflectionProbeUpdateMode p_mode); + void reflection_probe_set_intensity(RID p_probe, float p_intensity); + void reflection_probe_set_ambient_mode(RID p_probe, RS::ReflectionProbeAmbientMode p_mode); + void reflection_probe_set_ambient_color(RID p_probe, const Color &p_color); + void reflection_probe_set_ambient_energy(RID p_probe, float p_energy); + void reflection_probe_set_max_distance(RID p_probe, float p_distance); + void reflection_probe_set_extents(RID p_probe, const Vector3 &p_extents); + void reflection_probe_set_origin_offset(RID p_probe, const Vector3 &p_offset); + void reflection_probe_set_as_interior(RID p_probe, bool p_enable); + void reflection_probe_set_enable_box_projection(RID p_probe, bool p_enable); + void reflection_probe_set_enable_shadows(RID p_probe, bool p_enable); + void reflection_probe_set_cull_mask(RID p_probe, uint32_t p_layers); + void reflection_probe_set_resolution(RID p_probe, int p_resolution); + void reflection_probe_set_lod_threshold(RID p_probe, float p_ratio); + + AABB reflection_probe_get_aabb(RID p_probe) const; + RS::ReflectionProbeUpdateMode reflection_probe_get_update_mode(RID p_probe) const; + uint32_t reflection_probe_get_cull_mask(RID p_probe) const; + Vector3 reflection_probe_get_extents(RID p_probe) const; + Vector3 reflection_probe_get_origin_offset(RID p_probe) const; + float reflection_probe_get_origin_max_distance(RID p_probe) const; + float reflection_probe_get_lod_threshold(RID p_probe) const; + + int reflection_probe_get_resolution(RID p_probe) const; + bool reflection_probe_renders_shadows(RID p_probe) const; + + float reflection_probe_get_intensity(RID p_probe) const; + bool reflection_probe_is_interior(RID p_probe) const; + bool reflection_probe_is_box_projection(RID p_probe) const; + RS::ReflectionProbeAmbientMode reflection_probe_get_ambient_mode(RID p_probe) const; + Color reflection_probe_get_ambient_color(RID p_probe) const; + float reflection_probe_get_ambient_color_energy(RID p_probe) const; + + void base_update_dependency(RID p_base, DependencyTracker *p_instance); + void skeleton_update_dependency(RID p_skeleton, DependencyTracker *p_instance); + + /* DECAL API */ + + RID decal_allocate(); + void decal_initialize(RID p_decal); + + virtual void decal_set_extents(RID p_decal, const Vector3 &p_extents); + virtual void decal_set_texture(RID p_decal, RS::DecalTexture p_type, RID p_texture); + virtual void decal_set_emission_energy(RID p_decal, float p_energy); + virtual void decal_set_albedo_mix(RID p_decal, float p_mix); + virtual void decal_set_modulate(RID p_decal, const Color &p_modulate); + virtual void decal_set_cull_mask(RID p_decal, uint32_t p_layers); + virtual void decal_set_distance_fade(RID p_decal, bool p_enabled, float p_begin, float p_length); + virtual void decal_set_fade(RID p_decal, float p_above, float p_below); + virtual void decal_set_normal_fade(RID p_decal, float p_fade); + + _FORCE_INLINE_ Vector3 decal_get_extents(RID p_decal) { + const Decal *decal = decal_owner.getornull(p_decal); + return decal->extents; + } + + _FORCE_INLINE_ RID decal_get_texture(RID p_decal, RS::DecalTexture p_texture) { + const Decal *decal = decal_owner.getornull(p_decal); + return decal->textures[p_texture]; + } + + _FORCE_INLINE_ Color decal_get_modulate(RID p_decal) { + const Decal *decal = decal_owner.getornull(p_decal); + return decal->modulate; + } + + _FORCE_INLINE_ float decal_get_emission_energy(RID p_decal) { + const Decal *decal = decal_owner.getornull(p_decal); + return decal->emission_energy; + } + + _FORCE_INLINE_ float decal_get_albedo_mix(RID p_decal) { + const Decal *decal = decal_owner.getornull(p_decal); + return decal->albedo_mix; + } + + _FORCE_INLINE_ uint32_t decal_get_cull_mask(RID p_decal) { + const Decal *decal = decal_owner.getornull(p_decal); + return decal->cull_mask; + } + + _FORCE_INLINE_ float decal_get_upper_fade(RID p_decal) { + const Decal *decal = decal_owner.getornull(p_decal); + return decal->upper_fade; + } + + _FORCE_INLINE_ float decal_get_lower_fade(RID p_decal) { + const Decal *decal = decal_owner.getornull(p_decal); + return decal->lower_fade; + } + + _FORCE_INLINE_ float decal_get_normal_fade(RID p_decal) { + const Decal *decal = decal_owner.getornull(p_decal); + return decal->normal_fade; + } + + _FORCE_INLINE_ bool decal_is_distance_fade_enabled(RID p_decal) { + const Decal *decal = decal_owner.getornull(p_decal); + return decal->distance_fade; + } + + _FORCE_INLINE_ float decal_get_distance_fade_begin(RID p_decal) { + const Decal *decal = decal_owner.getornull(p_decal); + return decal->distance_fade_begin; + } + + _FORCE_INLINE_ float decal_get_distance_fade_length(RID p_decal) { + const Decal *decal = decal_owner.getornull(p_decal); + return decal->distance_fade_length; + } + + virtual AABB decal_get_aabb(RID p_decal) const; + + /* GI PROBE API */ + + RID gi_probe_allocate(); + void gi_probe_initialize(RID p_gi_probe); + + void gi_probe_allocate_data(RID p_gi_probe, const Transform &p_to_cell_xform, const AABB &p_aabb, const Vector3i &p_octree_size, const Vector<uint8_t> &p_octree_cells, const Vector<uint8_t> &p_data_cells, const Vector<uint8_t> &p_distance_field, const Vector<int> &p_level_counts); + + AABB gi_probe_get_bounds(RID p_gi_probe) const; + Vector3i gi_probe_get_octree_size(RID p_gi_probe) const; + Vector<uint8_t> gi_probe_get_octree_cells(RID p_gi_probe) const; + Vector<uint8_t> gi_probe_get_data_cells(RID p_gi_probe) const; + Vector<uint8_t> gi_probe_get_distance_field(RID p_gi_probe) const; + + Vector<int> gi_probe_get_level_counts(RID p_gi_probe) const; + Transform gi_probe_get_to_cell_xform(RID p_gi_probe) const; + + void gi_probe_set_dynamic_range(RID p_gi_probe, float p_range); + float gi_probe_get_dynamic_range(RID p_gi_probe) const; + + void gi_probe_set_propagation(RID p_gi_probe, float p_range); + float gi_probe_get_propagation(RID p_gi_probe) const; + + void gi_probe_set_energy(RID p_gi_probe, float p_energy); + float gi_probe_get_energy(RID p_gi_probe) const; + + void gi_probe_set_ao(RID p_gi_probe, float p_ao); + float gi_probe_get_ao(RID p_gi_probe) const; + + void gi_probe_set_ao_size(RID p_gi_probe, float p_strength); + float gi_probe_get_ao_size(RID p_gi_probe) const; + + void gi_probe_set_bias(RID p_gi_probe, float p_bias); + float gi_probe_get_bias(RID p_gi_probe) const; + + void gi_probe_set_normal_bias(RID p_gi_probe, float p_range); + float gi_probe_get_normal_bias(RID p_gi_probe) const; + + void gi_probe_set_interior(RID p_gi_probe, bool p_enable); + bool gi_probe_is_interior(RID p_gi_probe) const; + + void gi_probe_set_use_two_bounces(RID p_gi_probe, bool p_enable); + bool gi_probe_is_using_two_bounces(RID p_gi_probe) const; + + void gi_probe_set_anisotropy_strength(RID p_gi_probe, float p_strength); + float gi_probe_get_anisotropy_strength(RID p_gi_probe) const; + + uint32_t gi_probe_get_version(RID p_probe); + uint32_t gi_probe_get_data_version(RID p_probe); + + RID gi_probe_get_octree_buffer(RID p_gi_probe) const; + RID gi_probe_get_data_buffer(RID p_gi_probe) const; + + RID gi_probe_get_sdf_texture(RID p_gi_probe); + + /* LIGHTMAP CAPTURE */ + + RID lightmap_allocate(); + void lightmap_initialize(RID p_lightmap); + + virtual void lightmap_set_textures(RID p_lightmap, RID p_light, bool p_uses_spherical_haromics); + virtual void lightmap_set_probe_bounds(RID p_lightmap, const AABB &p_bounds); + virtual void lightmap_set_probe_interior(RID p_lightmap, bool p_interior); + virtual void lightmap_set_probe_capture_data(RID p_lightmap, const PackedVector3Array &p_points, const PackedColorArray &p_point_sh, const PackedInt32Array &p_tetrahedra, const PackedInt32Array &p_bsp_tree); + virtual PackedVector3Array lightmap_get_probe_capture_points(RID p_lightmap) const; + virtual PackedColorArray lightmap_get_probe_capture_sh(RID p_lightmap) const; + virtual PackedInt32Array lightmap_get_probe_capture_tetrahedra(RID p_lightmap) const; + virtual PackedInt32Array lightmap_get_probe_capture_bsp_tree(RID p_lightmap) const; + virtual AABB lightmap_get_aabb(RID p_lightmap) const; + virtual bool lightmap_is_interior(RID p_lightmap) const; + virtual void lightmap_tap_sh_light(RID p_lightmap, const Vector3 &p_point, Color *r_sh); + virtual void lightmap_set_probe_capture_update_speed(float p_speed); + _FORCE_INLINE_ float lightmap_get_probe_capture_update_speed() const { + return lightmap_probe_capture_update_speed; + } + _FORCE_INLINE_ RID lightmap_get_texture(RID p_lightmap) const { + const Lightmap *lm = lightmap_owner.getornull(p_lightmap); + ERR_FAIL_COND_V(!lm, RID()); + return lm->light_texture; + } + _FORCE_INLINE_ int32_t lightmap_get_array_index(RID p_lightmap) const { + ERR_FAIL_COND_V(!using_lightmap_array, -1); //only for arrays + const Lightmap *lm = lightmap_owner.getornull(p_lightmap); + return lm->array_index; + } + _FORCE_INLINE_ bool lightmap_uses_spherical_harmonics(RID p_lightmap) const { + ERR_FAIL_COND_V(!using_lightmap_array, false); //only for arrays + const Lightmap *lm = lightmap_owner.getornull(p_lightmap); + return lm->uses_spherical_harmonics; + } + _FORCE_INLINE_ uint64_t lightmap_array_get_version() const { + ERR_FAIL_COND_V(!using_lightmap_array, 0); //only for arrays + return lightmap_array_version; + } + + _FORCE_INLINE_ int lightmap_array_get_size() const { + ERR_FAIL_COND_V(!using_lightmap_array, 0); //only for arrays + return lightmap_textures.size(); + } + + _FORCE_INLINE_ const Vector<RID> &lightmap_array_get_textures() const { + ERR_FAIL_COND_V(!using_lightmap_array, lightmap_textures); //only for arrays + return lightmap_textures; + } + + /* PARTICLES */ + + RID particles_allocate(); + void particles_initialize(RID p_particles_collision); + + void particles_set_emitting(RID p_particles, bool p_emitting); + void particles_set_amount(RID p_particles, int p_amount); + void particles_set_lifetime(RID p_particles, float p_lifetime); + void particles_set_one_shot(RID p_particles, bool p_one_shot); + void particles_set_pre_process_time(RID p_particles, float p_time); + void particles_set_explosiveness_ratio(RID p_particles, float p_ratio); + void particles_set_randomness_ratio(RID p_particles, float p_ratio); + void particles_set_custom_aabb(RID p_particles, const AABB &p_aabb); + void particles_set_speed_scale(RID p_particles, float p_scale); + void particles_set_use_local_coordinates(RID p_particles, bool p_enable); + void particles_set_process_material(RID p_particles, RID p_material); + void particles_set_fixed_fps(RID p_particles, int p_fps); + void particles_set_fractional_delta(RID p_particles, bool p_enable); + void particles_set_collision_base_size(RID p_particles, float p_size); + void particles_restart(RID p_particles); + void particles_emit(RID p_particles, const Transform &p_transform, const Vector3 &p_velocity, const Color &p_color, const Color &p_custom, uint32_t p_emit_flags); + void particles_set_subemitter(RID p_particles, RID p_subemitter_particles); + + void particles_set_draw_order(RID p_particles, RS::ParticlesDrawOrder p_order); + + void particles_set_draw_passes(RID p_particles, int p_count); + void particles_set_draw_pass_mesh(RID p_particles, int p_pass, RID p_mesh); + + void particles_request_process(RID p_particles); + AABB particles_get_current_aabb(RID p_particles); + AABB particles_get_aabb(RID p_particles) const; + + void particles_set_emission_transform(RID p_particles, const Transform &p_transform); + + bool particles_get_emitting(RID p_particles); + int particles_get_draw_passes(RID p_particles) const; + RID particles_get_draw_pass_mesh(RID p_particles, int p_pass) const; + + void particles_set_view_axis(RID p_particles, const Vector3 &p_axis); + + virtual bool particles_is_inactive(RID p_particles) const; + + _FORCE_INLINE_ uint32_t particles_get_amount(RID p_particles) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND_V(!particles, 0); + + return particles->amount; + } + + _FORCE_INLINE_ uint32_t particles_is_using_local_coords(RID p_particles) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND_V(!particles, false); + + return particles->use_local_coords; + } + + _FORCE_INLINE_ RID particles_get_instance_buffer_uniform_set(RID p_particles, RID p_shader, uint32_t p_set) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND_V(!particles, RID()); + if (particles->particles_transforms_buffer_uniform_set.is_null()) { + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 0; + u.ids.push_back(particles->particle_instance_buffer); + uniforms.push_back(u); + } + + particles->particles_transforms_buffer_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, p_shader, p_set); + } + + return particles->particles_transforms_buffer_uniform_set; + } + + virtual void particles_add_collision(RID p_particles, RID p_particles_collision_instance); + virtual void particles_remove_collision(RID p_particles, RID p_particles_collision_instance); + + /* PARTICLES COLLISION */ + + RID particles_collision_allocate(); + void particles_collision_initialize(RID p_particles_collision); + + virtual void particles_collision_set_collision_type(RID p_particles_collision, RS::ParticlesCollisionType p_type); + virtual void particles_collision_set_cull_mask(RID p_particles_collision, uint32_t p_cull_mask); + virtual void particles_collision_set_sphere_radius(RID p_particles_collision, float p_radius); //for spheres + virtual void particles_collision_set_box_extents(RID p_particles_collision, const Vector3 &p_extents); //for non-spheres + virtual void particles_collision_set_attractor_strength(RID p_particles_collision, float p_strength); + virtual void particles_collision_set_attractor_directionality(RID p_particles_collision, float p_directionality); + virtual void particles_collision_set_attractor_attenuation(RID p_particles_collision, float p_curve); + virtual void particles_collision_set_field_texture(RID p_particles_collision, RID p_texture); //for SDF and vector field, heightfield is dynamic + virtual void particles_collision_height_field_update(RID p_particles_collision); //for SDF and vector field + virtual void particles_collision_set_height_field_resolution(RID p_particles_collision, RS::ParticlesCollisionHeightfieldResolution p_resolution); //for SDF and vector field + virtual AABB particles_collision_get_aabb(RID p_particles_collision) const; + virtual Vector3 particles_collision_get_extents(RID p_particles_collision) const; + virtual bool particles_collision_is_heightfield(RID p_particles_collision) const; + RID particles_collision_get_heightfield_framebuffer(RID p_particles_collision) const; + + //used from 2D and 3D + virtual RID particles_collision_instance_create(RID p_collision); + virtual void particles_collision_instance_set_transform(RID p_collision_instance, const Transform &p_transform); + virtual void particles_collision_instance_set_active(RID p_collision_instance, bool p_active); + + /* GLOBAL VARIABLES API */ + + virtual void global_variable_add(const StringName &p_name, RS::GlobalVariableType p_type, const Variant &p_value); + virtual void global_variable_remove(const StringName &p_name); + virtual Vector<StringName> global_variable_get_list() const; + + virtual void global_variable_set(const StringName &p_name, const Variant &p_value); + virtual void global_variable_set_override(const StringName &p_name, const Variant &p_value); + virtual Variant global_variable_get(const StringName &p_name) const; + virtual RS::GlobalVariableType global_variable_get_type(const StringName &p_name) const; + RS::GlobalVariableType global_variable_get_type_internal(const StringName &p_name) const; + + virtual void global_variables_load_settings(bool p_load_textures = true); + virtual void global_variables_clear(); + + virtual int32_t global_variables_instance_allocate(RID p_instance); + virtual void global_variables_instance_free(RID p_instance); + virtual void global_variables_instance_update(RID p_instance, int p_index, const Variant &p_value); + + RID global_variables_get_storage_buffer() const; + + /* RENDER TARGET API */ + + RID render_target_create(); + void render_target_set_position(RID p_render_target, int p_x, int p_y); + void render_target_set_size(RID p_render_target, int p_width, int p_height); + RID render_target_get_texture(RID p_render_target); + void render_target_set_external_texture(RID p_render_target, unsigned int p_texture_id); + void render_target_set_flag(RID p_render_target, RenderTargetFlags p_flag, bool p_value); + bool render_target_was_used(RID p_render_target); + void render_target_set_as_unused(RID p_render_target); + void render_target_copy_to_back_buffer(RID p_render_target, const Rect2i &p_region, bool p_gen_mipmaps); + void render_target_clear_back_buffer(RID p_render_target, const Rect2i &p_region, const Color &p_color); + void render_target_gen_back_buffer_mipmaps(RID p_render_target, const Rect2i &p_region); + + RID render_target_get_back_buffer_uniform_set(RID p_render_target, RID p_base_shader); + + virtual void render_target_request_clear(RID p_render_target, const Color &p_clear_color); + virtual bool render_target_is_clear_requested(RID p_render_target); + virtual Color render_target_get_clear_request_color(RID p_render_target); + virtual void render_target_disable_clear_request(RID p_render_target); + virtual void render_target_do_clear_request(RID p_render_target); + + virtual void render_target_set_sdf_size_and_scale(RID p_render_target, RS::ViewportSDFOversize p_size, RS::ViewportSDFScale p_scale); + RID render_target_get_sdf_texture(RID p_render_target); + RID render_target_get_sdf_framebuffer(RID p_render_target); + void render_target_sdf_process(RID p_render_target); + virtual Rect2i render_target_get_sdf_rect(RID p_render_target) const; + + Size2 render_target_get_size(RID p_render_target); + RID render_target_get_rd_framebuffer(RID p_render_target); + RID render_target_get_rd_texture(RID p_render_target); + RID render_target_get_rd_backbuffer(RID p_render_target); + RID render_target_get_rd_backbuffer_framebuffer(RID p_render_target); + + RID render_target_get_framebuffer_uniform_set(RID p_render_target); + RID render_target_get_backbuffer_uniform_set(RID p_render_target); + + void render_target_set_framebuffer_uniform_set(RID p_render_target, RID p_uniform_set); + void render_target_set_backbuffer_uniform_set(RID p_render_target, RID p_uniform_set); + + RS::InstanceType get_base_type(RID p_rid) const; + + bool free(RID p_rid); + + bool has_os_feature(const String &p_feature) const; + + void update_dirty_resources(); + + void set_debug_generate_wireframes(bool p_generate) {} + + void render_info_begin_capture() {} + void render_info_end_capture() {} + int get_captured_render_info(RS::RenderInfo p_info) { return 0; } + + int get_render_info(RS::RenderInfo p_info) { return 0; } + String get_video_adapter_name() const { return String(); } + String get_video_adapter_vendor() const { return String(); } + + virtual void capture_timestamps_begin(); + virtual void capture_timestamp(const String &p_name); + virtual uint32_t get_captured_timestamps_count() const; + virtual uint64_t get_captured_timestamps_frame() const; + virtual uint64_t get_captured_timestamp_gpu_time(uint32_t p_index) const; + virtual uint64_t get_captured_timestamp_cpu_time(uint32_t p_index) const; + virtual String get_captured_timestamp_name(uint32_t p_index) const; + + RID get_default_rd_storage_buffer() { return default_rd_storage_buffer; } + + static RendererStorageRD *base_singleton; + + EffectsRD *get_effects(); + + RendererStorageRD(); + ~RendererStorageRD(); +}; + +#endif // RASTERIZER_STORAGE_RD_H diff --git a/servers/rendering/renderer_rd/shader_compiler_rd.cpp b/servers/rendering/renderer_rd/shader_compiler_rd.cpp new file mode 100644 index 0000000000..8135d388e1 --- /dev/null +++ b/servers/rendering/renderer_rd/shader_compiler_rd.cpp @@ -0,0 +1,1519 @@ +/*************************************************************************/ +/* shader_compiler_rd.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "shader_compiler_rd.h" + +#include "core/config/project_settings.h" +#include "core/os/os.h" +#include "renderer_storage_rd.h" +#include "servers/rendering_server.h" + +#define SL ShaderLanguage + +static String _mktab(int p_level) { + String tb; + for (int i = 0; i < p_level; i++) { + tb += "\t"; + } + + return tb; +} + +static String _typestr(SL::DataType p_type) { + String type = ShaderLanguage::get_datatype_name(p_type); + if (ShaderLanguage::is_sampler_type(p_type)) { + type = type.replace("sampler", "texture"); //we use textures instead of samplers + } + return type; +} + +static int _get_datatype_size(SL::DataType p_type) { + switch (p_type) { + case SL::TYPE_VOID: + return 0; + case SL::TYPE_BOOL: + return 4; + case SL::TYPE_BVEC2: + return 8; + case SL::TYPE_BVEC3: + return 12; + case SL::TYPE_BVEC4: + return 16; + case SL::TYPE_INT: + return 4; + case SL::TYPE_IVEC2: + return 8; + case SL::TYPE_IVEC3: + return 12; + case SL::TYPE_IVEC4: + return 16; + case SL::TYPE_UINT: + return 4; + case SL::TYPE_UVEC2: + return 8; + case SL::TYPE_UVEC3: + return 12; + case SL::TYPE_UVEC4: + return 16; + case SL::TYPE_FLOAT: + return 4; + case SL::TYPE_VEC2: + return 8; + case SL::TYPE_VEC3: + return 12; + case SL::TYPE_VEC4: + return 16; + case SL::TYPE_MAT2: + return 32; //4 * 4 + 4 * 4 + case SL::TYPE_MAT3: + return 48; // 4 * 4 + 4 * 4 + 4 * 4 + case SL::TYPE_MAT4: + return 64; + case SL::TYPE_SAMPLER2D: + return 16; + case SL::TYPE_ISAMPLER2D: + return 16; + case SL::TYPE_USAMPLER2D: + return 16; + case SL::TYPE_SAMPLER2DARRAY: + return 16; + case SL::TYPE_ISAMPLER2DARRAY: + return 16; + case SL::TYPE_USAMPLER2DARRAY: + return 16; + case SL::TYPE_SAMPLER3D: + return 16; + case SL::TYPE_ISAMPLER3D: + return 16; + case SL::TYPE_USAMPLER3D: + return 16; + case SL::TYPE_SAMPLERCUBE: + return 16; + case SL::TYPE_SAMPLERCUBEARRAY: + return 16; + case SL::TYPE_STRUCT: + return 0; + + case SL::TYPE_MAX: { + ERR_FAIL_V(0); + }; + } + + ERR_FAIL_V(0); +} + +static int _get_datatype_alignment(SL::DataType p_type) { + switch (p_type) { + case SL::TYPE_VOID: + return 0; + case SL::TYPE_BOOL: + return 4; + case SL::TYPE_BVEC2: + return 8; + case SL::TYPE_BVEC3: + return 16; + case SL::TYPE_BVEC4: + return 16; + case SL::TYPE_INT: + return 4; + case SL::TYPE_IVEC2: + return 8; + case SL::TYPE_IVEC3: + return 16; + case SL::TYPE_IVEC4: + return 16; + case SL::TYPE_UINT: + return 4; + case SL::TYPE_UVEC2: + return 8; + case SL::TYPE_UVEC3: + return 16; + case SL::TYPE_UVEC4: + return 16; + case SL::TYPE_FLOAT: + return 4; + case SL::TYPE_VEC2: + return 8; + case SL::TYPE_VEC3: + return 16; + case SL::TYPE_VEC4: + return 16; + case SL::TYPE_MAT2: + return 16; + case SL::TYPE_MAT3: + return 16; + case SL::TYPE_MAT4: + return 16; + case SL::TYPE_SAMPLER2D: + return 16; + case SL::TYPE_ISAMPLER2D: + return 16; + case SL::TYPE_USAMPLER2D: + return 16; + case SL::TYPE_SAMPLER2DARRAY: + return 16; + case SL::TYPE_ISAMPLER2DARRAY: + return 16; + case SL::TYPE_USAMPLER2DARRAY: + return 16; + case SL::TYPE_SAMPLER3D: + return 16; + case SL::TYPE_ISAMPLER3D: + return 16; + case SL::TYPE_USAMPLER3D: + return 16; + case SL::TYPE_SAMPLERCUBE: + return 16; + case SL::TYPE_SAMPLERCUBEARRAY: + return 16; + case SL::TYPE_STRUCT: + return 0; + case SL::TYPE_MAX: { + ERR_FAIL_V(0); + } + } + + ERR_FAIL_V(0); +} + +static String _interpstr(SL::DataInterpolation p_interp) { + switch (p_interp) { + case SL::INTERPOLATION_FLAT: + return "flat "; + case SL::INTERPOLATION_SMOOTH: + return ""; + } + return ""; +} + +static String _prestr(SL::DataPrecision p_pres) { + switch (p_pres) { + case SL::PRECISION_LOWP: + return "lowp "; + case SL::PRECISION_MEDIUMP: + return "mediump "; + case SL::PRECISION_HIGHP: + return "highp "; + case SL::PRECISION_DEFAULT: + return ""; + } + return ""; +} + +static String _qualstr(SL::ArgumentQualifier p_qual) { + switch (p_qual) { + case SL::ARGUMENT_QUALIFIER_IN: + return ""; + case SL::ARGUMENT_QUALIFIER_OUT: + return "out "; + case SL::ARGUMENT_QUALIFIER_INOUT: + return "inout "; + } + return ""; +} + +static String _opstr(SL::Operator p_op) { + return SL::get_operator_text(p_op); +} + +static String _mkid(const String &p_id) { + String id = "m_" + p_id.replace("__", "_dus_"); + return id.replace("__", "_dus_"); //doubleunderscore is reserved in glsl +} + +static String f2sp0(float p_float) { + String num = rtoss(p_float); + if (num.find(".") == -1 && num.find("e") == -1) { + num += ".0"; + } + return num; +} + +static String get_constant_text(SL::DataType p_type, const Vector<SL::ConstantNode::Value> &p_values) { + switch (p_type) { + case SL::TYPE_BOOL: + return p_values[0].boolean ? "true" : "false"; + case SL::TYPE_BVEC2: + case SL::TYPE_BVEC3: + case SL::TYPE_BVEC4: { + String text = "bvec" + itos(p_type - SL::TYPE_BOOL + 1) + "("; + for (int i = 0; i < p_values.size(); i++) { + if (i > 0) { + text += ","; + } + + text += p_values[i].boolean ? "true" : "false"; + } + text += ")"; + return text; + } + + case SL::TYPE_INT: + return itos(p_values[0].sint); + case SL::TYPE_IVEC2: + case SL::TYPE_IVEC3: + case SL::TYPE_IVEC4: { + String text = "ivec" + itos(p_type - SL::TYPE_INT + 1) + "("; + for (int i = 0; i < p_values.size(); i++) { + if (i > 0) { + text += ","; + } + + text += itos(p_values[i].sint); + } + text += ")"; + return text; + + } break; + case SL::TYPE_UINT: + return itos(p_values[0].uint) + "u"; + case SL::TYPE_UVEC2: + case SL::TYPE_UVEC3: + case SL::TYPE_UVEC4: { + String text = "uvec" + itos(p_type - SL::TYPE_UINT + 1) + "("; + for (int i = 0; i < p_values.size(); i++) { + if (i > 0) { + text += ","; + } + + text += itos(p_values[i].uint) + "u"; + } + text += ")"; + return text; + } break; + case SL::TYPE_FLOAT: + return f2sp0(p_values[0].real); + case SL::TYPE_VEC2: + case SL::TYPE_VEC3: + case SL::TYPE_VEC4: { + String text = "vec" + itos(p_type - SL::TYPE_FLOAT + 1) + "("; + for (int i = 0; i < p_values.size(); i++) { + if (i > 0) { + text += ","; + } + + text += f2sp0(p_values[i].real); + } + text += ")"; + return text; + + } break; + case SL::TYPE_MAT2: + case SL::TYPE_MAT3: + case SL::TYPE_MAT4: { + String text = "mat" + itos(p_type - SL::TYPE_MAT2 + 2) + "("; + for (int i = 0; i < p_values.size(); i++) { + if (i > 0) { + text += ","; + } + + text += f2sp0(p_values[i].real); + } + text += ")"; + return text; + + } break; + default: + ERR_FAIL_V(String()); + } +} + +String ShaderCompilerRD::_get_sampler_name(ShaderLanguage::TextureFilter p_filter, ShaderLanguage::TextureRepeat p_repeat) { + if (p_filter == ShaderLanguage::FILTER_DEFAULT) { + ERR_FAIL_COND_V(actions.default_filter == ShaderLanguage::FILTER_DEFAULT, String()); + p_filter = actions.default_filter; + } + if (p_repeat == ShaderLanguage::REPEAT_DEFAULT) { + ERR_FAIL_COND_V(actions.default_repeat == ShaderLanguage::REPEAT_DEFAULT, String()); + p_repeat = actions.default_repeat; + } + return actions.sampler_array_name + "[" + itos(p_filter + (p_repeat == ShaderLanguage::REPEAT_ENABLE ? ShaderLanguage::FILTER_DEFAULT : 0)) + "]"; +} + +void ShaderCompilerRD::_dump_function_deps(const SL::ShaderNode *p_node, const StringName &p_for_func, const Map<StringName, String> &p_func_code, String &r_to_add, Set<StringName> &added) { + int fidx = -1; + + for (int i = 0; i < p_node->functions.size(); i++) { + if (p_node->functions[i].name == p_for_func) { + fidx = i; + break; + } + } + + ERR_FAIL_COND(fidx == -1); + + for (Set<StringName>::Element *E = p_node->functions[fidx].uses_function.front(); E; E = E->next()) { + if (added.has(E->get())) { + continue; //was added already + } + + _dump_function_deps(p_node, E->get(), p_func_code, r_to_add, added); + + SL::FunctionNode *fnode = nullptr; + + for (int i = 0; i < p_node->functions.size(); i++) { + if (p_node->functions[i].name == E->get()) { + fnode = p_node->functions[i].function; + break; + } + } + + ERR_FAIL_COND(!fnode); + + r_to_add += "\n"; + + String header; + if (fnode->return_type == SL::TYPE_STRUCT) { + header = _mkid(fnode->return_struct_name) + " " + _mkid(fnode->name) + "("; + } else { + header = _typestr(fnode->return_type) + " " + _mkid(fnode->name) + "("; + } + for (int i = 0; i < fnode->arguments.size(); i++) { + if (i > 0) { + header += ", "; + } + if (fnode->arguments[i].is_const) { + header += "const "; + } + if (fnode->arguments[i].type == SL::TYPE_STRUCT) { + header += _qualstr(fnode->arguments[i].qualifier) + _mkid(fnode->arguments[i].type_str) + " " + _mkid(fnode->arguments[i].name); + } else { + header += _qualstr(fnode->arguments[i].qualifier) + _prestr(fnode->arguments[i].precision) + _typestr(fnode->arguments[i].type) + " " + _mkid(fnode->arguments[i].name); + } + } + + header += ")\n"; + r_to_add += header; + r_to_add += p_func_code[E->get()]; + + added.insert(E->get()); + } +} + +static String _get_global_variable_from_type_and_index(const String &p_buffer, const String &p_index, ShaderLanguage::DataType p_type) { + switch (p_type) { + case ShaderLanguage::TYPE_BOOL: { + return "(" + p_buffer + "[" + p_index + "].x != 0.0)"; + } + case ShaderLanguage::TYPE_BVEC2: { + return "(notEqual(" + p_buffer + "[" + p_index + "].xy, vec2(0.0)))"; + } + case ShaderLanguage::TYPE_BVEC3: { + return "(notEqual(" + p_buffer + "[" + p_index + "].xyz, vec3(0.0)))"; + } + case ShaderLanguage::TYPE_BVEC4: { + return "(notEqual(" + p_buffer + "[" + p_index + "].xyzw, vec4(0.0)))"; + } + case ShaderLanguage::TYPE_INT: { + return "floatBitsToInt(" + p_buffer + "[" + p_index + "].x)"; + } + case ShaderLanguage::TYPE_IVEC2: { + return "floatBitsToInt(" + p_buffer + "[" + p_index + "].xy)"; + } + case ShaderLanguage::TYPE_IVEC3: { + return "floatBitsToInt(" + p_buffer + "[" + p_index + "].xyz)"; + } + case ShaderLanguage::TYPE_IVEC4: { + return "floatBitsToInt(" + p_buffer + "[" + p_index + "].xyzw)"; + } + case ShaderLanguage::TYPE_UINT: { + return "floatBitsToUint(" + p_buffer + "[" + p_index + "].x)"; + } + case ShaderLanguage::TYPE_UVEC2: { + return "floatBitsToUint(" + p_buffer + "[" + p_index + "].xy)"; + } + case ShaderLanguage::TYPE_UVEC3: { + return "floatBitsToUint(" + p_buffer + "[" + p_index + "].xyz)"; + } + case ShaderLanguage::TYPE_UVEC4: { + return "floatBitsToUint(" + p_buffer + "[" + p_index + "].xyzw)"; + } + case ShaderLanguage::TYPE_FLOAT: { + return "(" + p_buffer + "[" + p_index + "].x)"; + } + case ShaderLanguage::TYPE_VEC2: { + return "(" + p_buffer + "[" + p_index + "].xy)"; + } + case ShaderLanguage::TYPE_VEC3: { + return "(" + p_buffer + "[" + p_index + "].xyz)"; + } + case ShaderLanguage::TYPE_VEC4: { + return "(" + p_buffer + "[" + p_index + "].xyzw)"; + } + case ShaderLanguage::TYPE_MAT2: { + return "mat2(" + p_buffer + "[" + p_index + "].xy," + p_buffer + "[" + p_index + "+1].xy)"; + } + case ShaderLanguage::TYPE_MAT3: { + return "mat3(" + p_buffer + "[" + p_index + "].xyz," + p_buffer + "[" + p_index + "+1].xyz," + p_buffer + "[" + p_index + "+2].xyz)"; + } + case ShaderLanguage::TYPE_MAT4: { + return "mat4(" + p_buffer + "[" + p_index + "].xyzw," + p_buffer + "[" + p_index + "+1].xyzw," + p_buffer + "[" + p_index + "+2].xyzw," + p_buffer + "[" + p_index + "+3].xyzw)"; + } + default: { + ERR_FAIL_V("void"); + } + } +} + +String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, GeneratedCode &r_gen_code, IdentifierActions &p_actions, const DefaultIdentifierActions &p_default_actions, bool p_assigning, bool p_use_scope) { + String code; + + switch (p_node->type) { + case SL::Node::TYPE_SHADER: { + SL::ShaderNode *pnode = (SL::ShaderNode *)p_node; + + for (int i = 0; i < pnode->render_modes.size(); i++) { + if (p_default_actions.render_mode_defines.has(pnode->render_modes[i]) && !used_rmode_defines.has(pnode->render_modes[i])) { + r_gen_code.defines.push_back(p_default_actions.render_mode_defines[pnode->render_modes[i]]); + used_rmode_defines.insert(pnode->render_modes[i]); + } + + if (p_actions.render_mode_flags.has(pnode->render_modes[i])) { + *p_actions.render_mode_flags[pnode->render_modes[i]] = true; + } + + if (p_actions.render_mode_values.has(pnode->render_modes[i])) { + Pair<int *, int> &p = p_actions.render_mode_values[pnode->render_modes[i]]; + *p.first = p.second; + } + } + + // structs + + for (int i = 0; i < pnode->vstructs.size(); i++) { + SL::StructNode *st = pnode->vstructs[i].shader_struct; + String struct_code; + + struct_code += "struct "; + struct_code += _mkid(pnode->vstructs[i].name); + struct_code += " "; + struct_code += "{\n"; + for (int j = 0; j < st->members.size(); j++) { + SL::MemberNode *m = st->members[j]; + if (m->datatype == SL::TYPE_STRUCT) { + struct_code += _mkid(m->struct_name); + } else { + struct_code += _prestr(m->precision); + struct_code += _typestr(m->datatype); + } + struct_code += " "; + struct_code += m->name; + if (m->array_size > 0) { + struct_code += "["; + struct_code += itos(m->array_size); + struct_code += "]"; + } + struct_code += ";\n"; + } + struct_code += "}"; + struct_code += ";\n"; + + r_gen_code.vertex_global += struct_code; + r_gen_code.fragment_global += struct_code; + r_gen_code.compute_global += struct_code; + } + + int max_texture_uniforms = 0; + int max_uniforms = 0; + + for (Map<StringName, SL::ShaderNode::Uniform>::Element *E = pnode->uniforms.front(); E; E = E->next()) { + if (SL::is_sampler_type(E->get().type)) { + max_texture_uniforms++; + } else { + if (E->get().scope == SL::ShaderNode::Uniform::SCOPE_INSTANCE) { + continue; //instances are indexed directly, dont need index uniforms + } + + max_uniforms++; + } + } + + r_gen_code.texture_uniforms.resize(max_texture_uniforms); + + Vector<int> uniform_sizes; + Vector<int> uniform_alignments; + Vector<StringName> uniform_defines; + uniform_sizes.resize(max_uniforms); + uniform_alignments.resize(max_uniforms); + uniform_defines.resize(max_uniforms); + bool uses_uniforms = false; + + for (Map<StringName, SL::ShaderNode::Uniform>::Element *E = pnode->uniforms.front(); E; E = E->next()) { + String ucode; + + if (E->get().scope == SL::ShaderNode::Uniform::SCOPE_INSTANCE) { + //insert, but don't generate any code. + p_actions.uniforms->insert(E->key(), E->get()); + continue; //instances are indexed directly, dont need index uniforms + } + if (SL::is_sampler_type(E->get().type)) { + ucode = "layout(set = " + itos(actions.texture_layout_set) + ", binding = " + itos(actions.base_texture_binding_index + E->get().texture_order) + ") uniform "; + } + + bool is_buffer_global = !SL::is_sampler_type(E->get().type) && E->get().scope == SL::ShaderNode::Uniform::SCOPE_GLOBAL; + + if (is_buffer_global) { + //this is an integer to index the global table + ucode += _typestr(ShaderLanguage::TYPE_UINT); + } else { + ucode += _prestr(E->get().precision); + ucode += _typestr(E->get().type); + } + + ucode += " " + _mkid(E->key()); + ucode += ";\n"; + if (SL::is_sampler_type(E->get().type)) { + r_gen_code.vertex_global += ucode; + r_gen_code.fragment_global += ucode; + r_gen_code.compute_global += ucode; + + GeneratedCode::Texture texture; + texture.name = E->key(); + texture.hint = E->get().hint; + texture.type = E->get().type; + texture.filter = E->get().filter; + texture.repeat = E->get().repeat; + texture.global = E->get().scope == ShaderLanguage::ShaderNode::Uniform::SCOPE_GLOBAL; + if (texture.global) { + r_gen_code.uses_global_textures = true; + } + + r_gen_code.texture_uniforms.write[E->get().texture_order] = texture; + } else { + if (!uses_uniforms) { + r_gen_code.defines.push_back(String("#define USE_MATERIAL_UNIFORMS\n")); + uses_uniforms = true; + } + uniform_defines.write[E->get().order] = ucode; + if (is_buffer_global) { + //globals are indices into the global table + uniform_sizes.write[E->get().order] = _get_datatype_size(ShaderLanguage::TYPE_UINT); + uniform_alignments.write[E->get().order] = _get_datatype_alignment(ShaderLanguage::TYPE_UINT); + } else { + uniform_sizes.write[E->get().order] = _get_datatype_size(E->get().type); + uniform_alignments.write[E->get().order] = _get_datatype_alignment(E->get().type); + } + } + + p_actions.uniforms->insert(E->key(), E->get()); + } + + for (int i = 0; i < max_uniforms; i++) { + r_gen_code.uniforms += uniform_defines[i]; + } + +#if 1 + // add up + int offset = 0; + for (int i = 0; i < uniform_sizes.size(); i++) { + int align = offset % uniform_alignments[i]; + + if (align != 0) { + offset += uniform_alignments[i] - align; + } + + r_gen_code.uniform_offsets.push_back(offset); + + offset += uniform_sizes[i]; + } + + r_gen_code.uniform_total_size = offset; + + if (r_gen_code.uniform_total_size % 16 != 0) { //UBO sizes must be multiples of 16 + r_gen_code.uniform_total_size += 16 - (r_gen_code.uniform_total_size % 16); + } +#else + // add up + for (int i = 0; i < uniform_sizes.size(); i++) { + if (i > 0) { + int align = uniform_sizes[i - 1] % uniform_alignments[i]; + if (align != 0) { + uniform_sizes[i - 1] += uniform_alignments[i] - align; + } + + uniform_sizes[i] = uniform_sizes[i] + uniform_sizes[i - 1]; + } + } + //offset + r_gen_code.uniform_offsets.resize(uniform_sizes.size()); + for (int i = 0; i < uniform_sizes.size(); i++) { + if (i > 0) + r_gen_code.uniform_offsets[i] = uniform_sizes[i - 1]; + else + r_gen_code.uniform_offsets[i] = 0; + } + /* + for(Map<StringName,SL::ShaderNode::Uniform>::Element *E=pnode->uniforms.front();E;E=E->next()) { + if (SL::is_sampler_type(E->get().type)) { + continue; + } + + } + +*/ + if (uniform_sizes.size()) { + r_gen_code.uniform_total_size = uniform_sizes[uniform_sizes.size() - 1]; + } else { + r_gen_code.uniform_total_size = 0; + } +#endif + + uint32_t index = p_default_actions.base_varying_index; + + List<Pair<StringName, SL::ShaderNode::Varying>> var_frag_to_light; + + for (Map<StringName, SL::ShaderNode::Varying>::Element *E = pnode->varyings.front(); E; E = E->next()) { + if (E->get().stage == SL::ShaderNode::Varying::STAGE_FRAGMENT_TO_LIGHT || E->get().stage == SL::ShaderNode::Varying::STAGE_FRAGMENT) { + var_frag_to_light.push_back(Pair<StringName, SL::ShaderNode::Varying>(E->key(), E->get())); + fragment_varyings.insert(E->key()); + continue; + } + + String vcode; + String interp_mode = _interpstr(E->get().interpolation); + vcode += _prestr(E->get().precision); + vcode += _typestr(E->get().type); + vcode += " " + _mkid(E->key()); + if (E->get().array_size > 0) { + vcode += "["; + vcode += itos(E->get().array_size); + vcode += "]"; + } + vcode += ";\n"; + r_gen_code.vertex_global += "layout(location=" + itos(index) + ") " + interp_mode + "out " + vcode; + r_gen_code.fragment_global += "layout(location=" + itos(index) + ") " + interp_mode + "in " + vcode; + r_gen_code.compute_global += "layout(location=" + itos(index) + ") " + interp_mode + "out " + vcode; + index++; + } + + if (var_frag_to_light.size() > 0) { + String gcode = "\n\nstruct {\n"; + for (List<Pair<StringName, SL::ShaderNode::Varying>>::Element *E = var_frag_to_light.front(); E; E = E->next()) { + gcode += "\t" + _prestr(E->get().second.precision) + _typestr(E->get().second.type) + " " + _mkid(E->get().first); + if (E->get().second.array_size > 0) { + gcode += "["; + gcode += itos(E->get().second.array_size); + gcode += "]"; + } + gcode += ";\n"; + } + gcode += "} frag_to_light;\n"; + r_gen_code.fragment_global += gcode; + } + + for (int i = 0; i < pnode->vconstants.size(); i++) { + const SL::ShaderNode::Constant &cnode = pnode->vconstants[i]; + String gcode; + gcode += "const "; + gcode += _prestr(cnode.precision); + if (cnode.type == SL::TYPE_STRUCT) { + gcode += _mkid(cnode.type_str); + } else { + gcode += _typestr(cnode.type); + } + gcode += " " + _mkid(String(cnode.name)); + if (cnode.array_size > 0) { + gcode += "["; + gcode += itos(cnode.array_size); + gcode += "]"; + } + gcode += "="; + gcode += _dump_node_code(cnode.initializer, p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + gcode += ";\n"; + r_gen_code.vertex_global += gcode; + r_gen_code.fragment_global += gcode; + r_gen_code.compute_global += gcode; + } + + Map<StringName, String> function_code; + + //code for functions + for (int i = 0; i < pnode->functions.size(); i++) { + SL::FunctionNode *fnode = pnode->functions[i].function; + function = fnode; + current_func_name = fnode->name; + function_code[fnode->name] = _dump_node_code(fnode->body, p_level + 1, r_gen_code, p_actions, p_default_actions, p_assigning); + function = nullptr; + } + + //place functions in actual code + + Set<StringName> added_vtx; + Set<StringName> added_fragment; //share for light + Set<StringName> added_compute; //share for light + + for (int i = 0; i < pnode->functions.size(); i++) { + SL::FunctionNode *fnode = pnode->functions[i].function; + + function = fnode; + + current_func_name = fnode->name; + + if (fnode->name == vertex_name) { + _dump_function_deps(pnode, fnode->name, function_code, r_gen_code.vertex_global, added_vtx); + r_gen_code.vertex = function_code[vertex_name]; + } + + if (fnode->name == fragment_name) { + _dump_function_deps(pnode, fnode->name, function_code, r_gen_code.fragment_global, added_fragment); + r_gen_code.fragment = function_code[fragment_name]; + } + + if (fnode->name == light_name) { + _dump_function_deps(pnode, fnode->name, function_code, r_gen_code.fragment_global, added_fragment); + r_gen_code.light = function_code[light_name]; + } + + if (fnode->name == compute_name) { + _dump_function_deps(pnode, fnode->name, function_code, r_gen_code.compute_global, added_compute); + r_gen_code.compute = function_code[compute_name]; + } + + function = nullptr; + } + + //code+=dump_node_code(pnode->body,p_level); + } break; + case SL::Node::TYPE_STRUCT: { + } break; + case SL::Node::TYPE_FUNCTION: { + } break; + case SL::Node::TYPE_BLOCK: { + SL::BlockNode *bnode = (SL::BlockNode *)p_node; + + //variables + if (!bnode->single_statement) { + code += _mktab(p_level - 1) + "{\n"; + } + + for (int i = 0; i < bnode->statements.size(); i++) { + String scode = _dump_node_code(bnode->statements[i], p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + + if (bnode->statements[i]->type == SL::Node::TYPE_CONTROL_FLOW || bnode->single_statement) { + code += scode; //use directly + } else { + code += _mktab(p_level) + scode + ";\n"; + } + } + if (!bnode->single_statement) { + code += _mktab(p_level - 1) + "}\n"; + } + + } break; + case SL::Node::TYPE_VARIABLE_DECLARATION: { + SL::VariableDeclarationNode *vdnode = (SL::VariableDeclarationNode *)p_node; + + String declaration; + if (vdnode->is_const) { + declaration += "const "; + } + if (vdnode->datatype == SL::TYPE_STRUCT) { + declaration += _mkid(vdnode->struct_name); + } else { + declaration += _prestr(vdnode->precision) + _typestr(vdnode->datatype); + } + for (int i = 0; i < vdnode->declarations.size(); i++) { + if (i > 0) { + declaration += ","; + } else { + declaration += " "; + } + declaration += _mkid(vdnode->declarations[i].name); + if (vdnode->declarations[i].initializer) { + declaration += "="; + declaration += _dump_node_code(vdnode->declarations[i].initializer, p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + } + } + + code += declaration; + } break; + case SL::Node::TYPE_VARIABLE: { + SL::VariableNode *vnode = (SL::VariableNode *)p_node; + bool use_fragment_varying = false; + + if (current_func_name != vertex_name) { + if (p_assigning) { + if (shader->varyings.has(vnode->name)) { + use_fragment_varying = true; + } + } else { + if (fragment_varyings.has(vnode->name)) { + use_fragment_varying = true; + } + } + } + + if (p_assigning && p_actions.write_flag_pointers.has(vnode->name)) { + *p_actions.write_flag_pointers[vnode->name] = true; + } + + if (p_default_actions.usage_defines.has(vnode->name) && !used_name_defines.has(vnode->name)) { + String define = p_default_actions.usage_defines[vnode->name]; + if (define.begins_with("@")) { + define = p_default_actions.usage_defines[define.substr(1, define.length())]; + } + r_gen_code.defines.push_back(define); + used_name_defines.insert(vnode->name); + } + + if (p_actions.usage_flag_pointers.has(vnode->name) && !used_flag_pointers.has(vnode->name)) { + *p_actions.usage_flag_pointers[vnode->name] = true; + used_flag_pointers.insert(vnode->name); + } + + if (p_default_actions.renames.has(vnode->name)) { + code = p_default_actions.renames[vnode->name]; + } else { + if (shader->uniforms.has(vnode->name)) { + //its a uniform! + const ShaderLanguage::ShaderNode::Uniform &u = shader->uniforms[vnode->name]; + if (u.texture_order >= 0) { + code = _mkid(vnode->name); //texture, use as is + } else { + //a scalar or vector + if (u.scope == ShaderLanguage::ShaderNode::Uniform::SCOPE_GLOBAL) { + code = actions.base_uniform_string + _mkid(vnode->name); //texture, use as is + //global variable, this means the code points to an index to the global table + code = _get_global_variable_from_type_and_index(p_default_actions.global_buffer_array_variable, code, u.type); + } else if (u.scope == ShaderLanguage::ShaderNode::Uniform::SCOPE_INSTANCE) { + //instance variable, index it as such + code = "(" + p_default_actions.instance_uniform_index_variable + "+" + itos(u.instance_index) + ")"; + code = _get_global_variable_from_type_and_index(p_default_actions.global_buffer_array_variable, code, u.type); + } else { + //regular uniform, index from UBO + code = actions.base_uniform_string + _mkid(vnode->name); + } + } + + } else { + if (use_fragment_varying) { + code = "frag_to_light."; + } + code += _mkid(vnode->name); //its something else (local var most likely) use as is + } + } + + if (vnode->name == time_name) { + if (current_func_name == vertex_name) { + r_gen_code.uses_vertex_time = true; + } + if (current_func_name == fragment_name || current_func_name == light_name) { + r_gen_code.uses_fragment_time = true; + } + } + + } break; + case SL::Node::TYPE_ARRAY_CONSTRUCT: { + SL::ArrayConstructNode *acnode = (SL::ArrayConstructNode *)p_node; + int sz = acnode->initializer.size(); + if (acnode->datatype == SL::TYPE_STRUCT) { + code += _mkid(acnode->struct_name); + } else { + code += _typestr(acnode->datatype); + } + code += "["; + code += itos(acnode->initializer.size()); + code += "]"; + code += "("; + for (int i = 0; i < sz; i++) { + code += _dump_node_code(acnode->initializer[i], p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + if (i != sz - 1) { + code += ", "; + } + } + code += ")"; + } break; + case SL::Node::TYPE_ARRAY_DECLARATION: { + SL::ArrayDeclarationNode *adnode = (SL::ArrayDeclarationNode *)p_node; + String declaration; + if (adnode->is_const) { + declaration += "const "; + } + if (adnode->datatype == SL::TYPE_STRUCT) { + declaration += _mkid(adnode->struct_name); + } else { + declaration += _prestr(adnode->precision) + _typestr(adnode->datatype); + } + for (int i = 0; i < adnode->declarations.size(); i++) { + if (i > 0) { + declaration += ","; + } else { + declaration += " "; + } + declaration += _mkid(adnode->declarations[i].name); + declaration += "["; + if (adnode->size_expression != nullptr) { + declaration += _dump_node_code(adnode->size_expression, p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + } else { + declaration += itos(adnode->declarations[i].size); + } + declaration += "]"; + int sz = adnode->declarations[i].initializer.size(); + if (sz > 0) { + declaration += "="; + if (adnode->datatype == SL::TYPE_STRUCT) { + declaration += _mkid(adnode->struct_name); + } else { + declaration += _typestr(adnode->datatype); + } + declaration += "["; + declaration += itos(sz); + declaration += "]"; + declaration += "("; + for (int j = 0; j < sz; j++) { + declaration += _dump_node_code(adnode->declarations[i].initializer[j], p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + if (j != sz - 1) { + declaration += ", "; + } + } + declaration += ")"; + } + } + + code += declaration; + } break; + case SL::Node::TYPE_ARRAY: { + SL::ArrayNode *anode = (SL::ArrayNode *)p_node; + bool use_fragment_varying = false; + + if (current_func_name != vertex_name) { + if (anode->assign_expression != nullptr) { + use_fragment_varying = true; + } else { + if (p_assigning) { + if (shader->varyings.has(anode->name)) { + use_fragment_varying = true; + } + } else { + if (fragment_varyings.has(anode->name)) { + use_fragment_varying = true; + } + } + } + } + + if (p_assigning && p_actions.write_flag_pointers.has(anode->name)) { + *p_actions.write_flag_pointers[anode->name] = true; + } + + if (p_default_actions.usage_defines.has(anode->name) && !used_name_defines.has(anode->name)) { + String define = p_default_actions.usage_defines[anode->name]; + if (define.begins_with("@")) { + define = p_default_actions.usage_defines[define.substr(1, define.length())]; + } + r_gen_code.defines.push_back(define); + used_name_defines.insert(anode->name); + } + + if (p_actions.usage_flag_pointers.has(anode->name) && !used_flag_pointers.has(anode->name)) { + *p_actions.usage_flag_pointers[anode->name] = true; + used_flag_pointers.insert(anode->name); + } + + if (p_default_actions.renames.has(anode->name)) { + code = p_default_actions.renames[anode->name]; + } else { + if (use_fragment_varying) { + code = "frag_to_light."; + } + code += _mkid(anode->name); + } + + if (anode->call_expression != nullptr) { + code += "."; + code += _dump_node_code(anode->call_expression, p_level, r_gen_code, p_actions, p_default_actions, p_assigning, false); + } else if (anode->index_expression != nullptr) { + code += "["; + code += _dump_node_code(anode->index_expression, p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + code += "]"; + } else if (anode->assign_expression != nullptr) { + code += "="; + code += _dump_node_code(anode->assign_expression, p_level, r_gen_code, p_actions, p_default_actions, true, false); + } + + if (anode->name == time_name) { + if (current_func_name == vertex_name) { + r_gen_code.uses_vertex_time = true; + } + if (current_func_name == fragment_name || current_func_name == light_name) { + r_gen_code.uses_fragment_time = true; + } + } + + } break; + case SL::Node::TYPE_CONSTANT: { + SL::ConstantNode *cnode = (SL::ConstantNode *)p_node; + + if (cnode->array_size == 0) { + return get_constant_text(cnode->datatype, cnode->values); + } else { + if (cnode->get_datatype() == SL::TYPE_STRUCT) { + code += _mkid(cnode->struct_name); + } else { + code += _typestr(cnode->datatype); + } + code += "["; + code += itos(cnode->array_size); + code += "]"; + code += "("; + for (int i = 0; i < cnode->array_size; i++) { + if (i > 0) { + code += ","; + } else { + code += ""; + } + code += _dump_node_code(cnode->array_declarations[0].initializer[i], p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + } + code += ")"; + } + + } break; + case SL::Node::TYPE_OPERATOR: { + SL::OperatorNode *onode = (SL::OperatorNode *)p_node; + + switch (onode->op) { + case SL::OP_ASSIGN: + case SL::OP_ASSIGN_ADD: + case SL::OP_ASSIGN_SUB: + case SL::OP_ASSIGN_MUL: + case SL::OP_ASSIGN_DIV: + case SL::OP_ASSIGN_SHIFT_LEFT: + case SL::OP_ASSIGN_SHIFT_RIGHT: + case SL::OP_ASSIGN_MOD: + case SL::OP_ASSIGN_BIT_AND: + case SL::OP_ASSIGN_BIT_OR: + case SL::OP_ASSIGN_BIT_XOR: + code = _dump_node_code(onode->arguments[0], p_level, r_gen_code, p_actions, p_default_actions, true) + _opstr(onode->op) + _dump_node_code(onode->arguments[1], p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + break; + case SL::OP_BIT_INVERT: + case SL::OP_NEGATE: + case SL::OP_NOT: + case SL::OP_DECREMENT: + case SL::OP_INCREMENT: + code = _opstr(onode->op) + _dump_node_code(onode->arguments[0], p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + break; + case SL::OP_POST_DECREMENT: + case SL::OP_POST_INCREMENT: + code = _dump_node_code(onode->arguments[0], p_level, r_gen_code, p_actions, p_default_actions, p_assigning) + _opstr(onode->op); + break; + case SL::OP_CALL: + case SL::OP_STRUCT: + case SL::OP_CONSTRUCT: { + ERR_FAIL_COND_V(onode->arguments[0]->type != SL::Node::TYPE_VARIABLE, String()); + + SL::VariableNode *vnode = (SL::VariableNode *)onode->arguments[0]; + + bool is_texture_func = false; + if (onode->op == SL::OP_STRUCT) { + code += _mkid(vnode->name); + } else if (onode->op == SL::OP_CONSTRUCT) { + code += String(vnode->name); + } else { + if (p_actions.usage_flag_pointers.has(vnode->name) && !used_flag_pointers.has(vnode->name)) { + *p_actions.usage_flag_pointers[vnode->name] = true; + used_flag_pointers.insert(vnode->name); + } + + if (internal_functions.has(vnode->name)) { + code += vnode->name; + is_texture_func = texture_functions.has(vnode->name); + } else if (p_default_actions.renames.has(vnode->name)) { + code += p_default_actions.renames[vnode->name]; + } else { + code += _mkid(vnode->name); + } + } + + code += "("; + + for (int i = 1; i < onode->arguments.size(); i++) { + if (i > 1) { + code += ", "; + } + String node_code = _dump_node_code(onode->arguments[i], p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + if (is_texture_func && i == 1 && onode->arguments[i]->type == SL::Node::TYPE_VARIABLE) { + //need to map from texture to sampler in order to sample + const SL::VariableNode *varnode = static_cast<const SL::VariableNode *>(onode->arguments[i]); + + StringName texture_uniform = varnode->name; + + String sampler_name; + + if (actions.custom_samplers.has(texture_uniform)) { + sampler_name = actions.custom_samplers[texture_uniform]; + } else { + if (shader->uniforms.has(texture_uniform)) { + sampler_name = _get_sampler_name(shader->uniforms[texture_uniform].filter, shader->uniforms[texture_uniform].repeat); + } else { + bool found = false; + + for (int j = 0; j < function->arguments.size(); j++) { + if (function->arguments[j].name == texture_uniform) { + if (function->arguments[j].tex_builtin_check) { + ERR_CONTINUE(!actions.custom_samplers.has(function->arguments[j].tex_builtin)); + sampler_name = actions.custom_samplers[function->arguments[j].tex_builtin]; + found = true; + break; + } + if (function->arguments[j].tex_argument_check) { + sampler_name = _get_sampler_name(function->arguments[j].tex_argument_filter, function->arguments[j].tex_argument_repeat); + found = true; + break; + } + } + } + if (!found) { + //function was most likely unused, so use anything (compiler will remove it anyway) + sampler_name = _get_sampler_name(ShaderLanguage::FILTER_DEFAULT, ShaderLanguage::REPEAT_DEFAULT); + } + } + } + + code += ShaderLanguage::get_datatype_name(onode->arguments[i]->get_datatype()) + "(" + node_code + ", " + sampler_name + ")"; + } else { + code += node_code; + } + } + code += ")"; + } break; + case SL::OP_INDEX: { + code += _dump_node_code(onode->arguments[0], p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + code += "["; + code += _dump_node_code(onode->arguments[1], p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + code += "]"; + + } break; + case SL::OP_SELECT_IF: { + code += "("; + code += _dump_node_code(onode->arguments[0], p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + code += "?"; + code += _dump_node_code(onode->arguments[1], p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + code += ":"; + code += _dump_node_code(onode->arguments[2], p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + code += ")"; + + } break; + + default: { + if (p_use_scope) { + code += "("; + } + code += _dump_node_code(onode->arguments[0], p_level, r_gen_code, p_actions, p_default_actions, p_assigning) + _opstr(onode->op) + _dump_node_code(onode->arguments[1], p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + if (p_use_scope) { + code += ")"; + } + break; + } + } + + } break; + case SL::Node::TYPE_CONTROL_FLOW: { + SL::ControlFlowNode *cfnode = (SL::ControlFlowNode *)p_node; + if (cfnode->flow_op == SL::FLOW_OP_IF) { + code += _mktab(p_level) + "if (" + _dump_node_code(cfnode->expressions[0], p_level, r_gen_code, p_actions, p_default_actions, p_assigning) + ")\n"; + code += _dump_node_code(cfnode->blocks[0], p_level + 1, r_gen_code, p_actions, p_default_actions, p_assigning); + if (cfnode->blocks.size() == 2) { + code += _mktab(p_level) + "else\n"; + code += _dump_node_code(cfnode->blocks[1], p_level + 1, r_gen_code, p_actions, p_default_actions, p_assigning); + } + } else if (cfnode->flow_op == SL::FLOW_OP_SWITCH) { + code += _mktab(p_level) + "switch (" + _dump_node_code(cfnode->expressions[0], p_level, r_gen_code, p_actions, p_default_actions, p_assigning) + ")\n"; + code += _dump_node_code(cfnode->blocks[0], p_level + 1, r_gen_code, p_actions, p_default_actions, p_assigning); + } else if (cfnode->flow_op == SL::FLOW_OP_CASE) { + code += _mktab(p_level) + "case " + _dump_node_code(cfnode->expressions[0], p_level, r_gen_code, p_actions, p_default_actions, p_assigning) + ":\n"; + code += _dump_node_code(cfnode->blocks[0], p_level + 1, r_gen_code, p_actions, p_default_actions, p_assigning); + } else if (cfnode->flow_op == SL::FLOW_OP_DEFAULT) { + code += _mktab(p_level) + "default:\n"; + code += _dump_node_code(cfnode->blocks[0], p_level + 1, r_gen_code, p_actions, p_default_actions, p_assigning); + } else if (cfnode->flow_op == SL::FLOW_OP_DO) { + code += _mktab(p_level) + "do"; + code += _dump_node_code(cfnode->blocks[0], p_level + 1, r_gen_code, p_actions, p_default_actions, p_assigning); + code += _mktab(p_level) + "while (" + _dump_node_code(cfnode->expressions[0], p_level, r_gen_code, p_actions, p_default_actions, p_assigning) + ");"; + } else if (cfnode->flow_op == SL::FLOW_OP_WHILE) { + code += _mktab(p_level) + "while (" + _dump_node_code(cfnode->expressions[0], p_level, r_gen_code, p_actions, p_default_actions, p_assigning) + ")\n"; + code += _dump_node_code(cfnode->blocks[0], p_level + 1, r_gen_code, p_actions, p_default_actions, p_assigning); + } else if (cfnode->flow_op == SL::FLOW_OP_FOR) { + String left = _dump_node_code(cfnode->blocks[0], p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + String middle = _dump_node_code(cfnode->expressions[0], p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + String right = _dump_node_code(cfnode->expressions[1], p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + code += _mktab(p_level) + "for (" + left + ";" + middle + ";" + right + ")\n"; + code += _dump_node_code(cfnode->blocks[1], p_level + 1, r_gen_code, p_actions, p_default_actions, p_assigning); + + } else if (cfnode->flow_op == SL::FLOW_OP_RETURN) { + if (cfnode->expressions.size()) { + code = "return " + _dump_node_code(cfnode->expressions[0], p_level, r_gen_code, p_actions, p_default_actions, p_assigning) + ";"; + } else { + code = "return;"; + } + } else if (cfnode->flow_op == SL::FLOW_OP_DISCARD) { + if (p_actions.usage_flag_pointers.has("DISCARD") && !used_flag_pointers.has("DISCARD")) { + *p_actions.usage_flag_pointers["DISCARD"] = true; + used_flag_pointers.insert("DISCARD"); + } + + code = "discard;"; + } else if (cfnode->flow_op == SL::FLOW_OP_CONTINUE) { + code = "continue;"; + } else if (cfnode->flow_op == SL::FLOW_OP_BREAK) { + code = "break;"; + } + + } break; + case SL::Node::TYPE_MEMBER: { + SL::MemberNode *mnode = (SL::MemberNode *)p_node; + code = _dump_node_code(mnode->owner, p_level, r_gen_code, p_actions, p_default_actions, p_assigning) + "." + mnode->name; + if (mnode->index_expression != nullptr) { + code += "["; + code += _dump_node_code(mnode->index_expression, p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + code += "]"; + } else if (mnode->assign_expression != nullptr) { + code += "="; + code += _dump_node_code(mnode->assign_expression, p_level, r_gen_code, p_actions, p_default_actions, true, false); + } + } break; + } + + return code; +} + +ShaderLanguage::DataType ShaderCompilerRD::_get_variable_type(const StringName &p_type) { + RS::GlobalVariableType gvt = ((RendererStorageRD *)(RendererStorage::base_singleton))->global_variable_get_type_internal(p_type); + return RS::global_variable_type_get_shader_datatype(gvt); +} + +Error ShaderCompilerRD::compile(RS::ShaderMode p_mode, const String &p_code, IdentifierActions *p_actions, const String &p_path, GeneratedCode &r_gen_code) { + Error err = parser.compile(p_code, ShaderTypes::get_singleton()->get_functions(p_mode), ShaderTypes::get_singleton()->get_modes(p_mode), ShaderTypes::get_singleton()->get_types(), _get_variable_type); + + if (err != OK) { + Vector<String> shader = p_code.split("\n"); + for (int i = 0; i < shader.size(); i++) { + print_line(itos(i + 1) + " " + shader[i]); + } + + _err_print_error(nullptr, p_path.utf8().get_data(), parser.get_error_line(), parser.get_error_text().utf8().get_data(), ERR_HANDLER_SHADER); + return err; + } + + r_gen_code.defines.clear(); + r_gen_code.vertex = String(); + r_gen_code.vertex_global = String(); + r_gen_code.fragment = String(); + r_gen_code.fragment_global = String(); + r_gen_code.compute = String(); + r_gen_code.compute_global = String(); + r_gen_code.light = String(); + r_gen_code.uses_fragment_time = false; + r_gen_code.uses_vertex_time = false; + r_gen_code.uses_global_textures = false; + + used_name_defines.clear(); + used_rmode_defines.clear(); + used_flag_pointers.clear(); + fragment_varyings.clear(); + + shader = parser.get_shader(); + function = nullptr; + _dump_node_code(shader, 1, r_gen_code, *p_actions, actions, false); + + return OK; +} + +void ShaderCompilerRD::initialize(DefaultIdentifierActions p_actions) { + actions = p_actions; + + vertex_name = "vertex"; + fragment_name = "fragment"; + compute_name = "compute"; + light_name = "light"; + time_name = "TIME"; + + List<String> func_list; + + ShaderLanguage::get_builtin_funcs(&func_list); + + for (List<String>::Element *E = func_list.front(); E; E = E->next()) { + internal_functions.insert(E->get()); + } + texture_functions.insert("texture"); + texture_functions.insert("textureProj"); + texture_functions.insert("textureLod"); + texture_functions.insert("textureProjLod"); + texture_functions.insert("textureGrad"); + texture_functions.insert("textureSize"); + texture_functions.insert("texelFetch"); +} + +ShaderCompilerRD::ShaderCompilerRD() { +#if 0 + + /** SPATIAL SHADER **/ + + actions[RS::SHADER_SPATIAL].renames["WORLD_MATRIX"] = "world_transform"; + actions[RS::SHADER_SPATIAL].renames["INV_CAMERA_MATRIX"] = "camera_inverse_matrix"; + actions[RS::SHADER_SPATIAL].renames["CAMERA_MATRIX"] = "camera_matrix"; + actions[RS::SHADER_SPATIAL].renames["PROJECTION_MATRIX"] = "projection_matrix"; + actions[RS::SHADER_SPATIAL].renames["INV_PROJECTION_MATRIX"] = "inv_projection_matrix"; + actions[RS::SHADER_SPATIAL].renames["MODELVIEW_MATRIX"] = "modelview"; + + actions[RS::SHADER_SPATIAL].renames["VERTEX"] = "vertex.xyz"; + actions[RS::SHADER_SPATIAL].renames["NORMAL"] = "normal"; + actions[RS::SHADER_SPATIAL].renames["TANGENT"] = "tangent"; + actions[RS::SHADER_SPATIAL].renames["BINORMAL"] = "binormal"; + actions[RS::SHADER_SPATIAL].renames["POSITION"] = "position"; + actions[RS::SHADER_SPATIAL].renames["UV"] = "uv_interp"; + actions[RS::SHADER_SPATIAL].renames["UV2"] = "uv2_interp"; + actions[RS::SHADER_SPATIAL].renames["COLOR"] = "color_interp"; + actions[RS::SHADER_SPATIAL].renames["POINT_SIZE"] = "gl_PointSize"; + actions[RS::SHADER_SPATIAL].renames["INSTANCE_ID"] = "gl_InstanceID"; + + //builtins + + actions[RS::SHADER_SPATIAL].renames["TIME"] = "time"; + actions[RS::SHADER_SPATIAL].renames["VIEWPORT_SIZE"] = "viewport_size"; + + actions[RS::SHADER_SPATIAL].renames["FRAGCOORD"] = "gl_FragCoord"; + actions[RS::SHADER_SPATIAL].renames["FRONT_FACING"] = "gl_FrontFacing"; + actions[RS::SHADER_SPATIAL].renames["NORMAL_MAP"] = "normal_map"; + actions[RS::SHADER_SPATIAL].renames["NORMAL_MAP_DEPTH"] = "normal_map_depth"; + actions[RS::SHADER_SPATIAL].renames["ALBEDO"] = "albedo"; + actions[RS::SHADER_SPATIAL].renames["ALPHA"] = "alpha"; + actions[RS::SHADER_SPATIAL].renames["METALLIC"] = "metallic"; + actions[RS::SHADER_SPATIAL].renames["SPECULAR"] = "specular"; + actions[RS::SHADER_SPATIAL].renames["ROUGHNESS"] = "roughness"; + actions[RS::SHADER_SPATIAL].renames["RIM"] = "rim"; + actions[RS::SHADER_SPATIAL].renames["RIM_TINT"] = "rim_tint"; + actions[RS::SHADER_SPATIAL].renames["CLEARCOAT"] = "clearcoat"; + actions[RS::SHADER_SPATIAL].renames["CLEARCOAT_GLOSS"] = "clearcoat_gloss"; + actions[RS::SHADER_SPATIAL].renames["ANISOTROPY"] = "anisotropy"; + actions[RS::SHADER_SPATIAL].renames["ANISOTROPY_FLOW"] = "anisotropy_flow"; + actions[RS::SHADER_SPATIAL].renames["SSS_STRENGTH"] = "sss_strength"; + actions[RS::SHADER_SPATIAL].renames["TRANSMISSION"] = "transmission"; + actions[RS::SHADER_SPATIAL].renames["AO"] = "ao"; + actions[RS::SHADER_SPATIAL].renames["AO_LIGHT_AFFECT"] = "ao_light_affect"; + actions[RS::SHADER_SPATIAL].renames["EMISSION"] = "emission"; + actions[RS::SHADER_SPATIAL].renames["POINT_COORD"] = "gl_PointCoord"; + actions[RS::SHADER_SPATIAL].renames["INSTANCE_CUSTOM"] = "instance_custom"; + actions[RS::SHADER_SPATIAL].renames["SCREEN_UV"] = "screen_uv"; + actions[RS::SHADER_SPATIAL].renames["SCREEN_TEXTURE"] = "screen_texture"; + actions[RS::SHADER_SPATIAL].renames["DEPTH_TEXTURE"] = "depth_buffer"; + actions[RS::SHADER_SPATIAL].renames["DEPTH"] = "gl_FragDepth"; + actions[RS::SHADER_SPATIAL].renames["ALPHA_SCISSOR"] = "alpha_scissor"; + actions[RS::SHADER_SPATIAL].renames["OUTPUT_IS_SRGB"] = "SHADER_IS_SRGB"; + + //for light + actions[RS::SHADER_SPATIAL].renames["VIEW"] = "view"; + actions[RS::SHADER_SPATIAL].renames["LIGHT_COLOR"] = "light_color"; + actions[RS::SHADER_SPATIAL].renames["LIGHT"] = "light"; + actions[RS::SHADER_SPATIAL].renames["ATTENUATION"] = "attenuation"; + actions[RS::SHADER_SPATIAL].renames["DIFFUSE_LIGHT"] = "diffuse_light"; + actions[RS::SHADER_SPATIAL].renames["SPECULAR_LIGHT"] = "specular_light"; + + actions[RS::SHADER_SPATIAL].usage_defines["TANGENT"] = "#define ENABLE_TANGENT_INTERP\n"; + actions[RS::SHADER_SPATIAL].usage_defines["BINORMAL"] = "@TANGENT"; + actions[RS::SHADER_SPATIAL].usage_defines["RIM"] = "#define LIGHT_USE_RIM\n"; + actions[RS::SHADER_SPATIAL].usage_defines["RIM_TINT"] = "@RIM"; + actions[RS::SHADER_SPATIAL].usage_defines["CLEARCOAT"] = "#define LIGHT_USE_CLEARCOAT\n"; + actions[RS::SHADER_SPATIAL].usage_defines["CLEARCOAT_GLOSS"] = "@CLEARCOAT"; + actions[RS::SHADER_SPATIAL].usage_defines["ANISOTROPY"] = "#define LIGHT_USE_ANISOTROPY\n"; + actions[RS::SHADER_SPATIAL].usage_defines["ANISOTROPY_FLOW"] = "@ANISOTROPY"; + actions[RS::SHADER_SPATIAL].usage_defines["AO"] = "#define ENABLE_AO\n"; + actions[RS::SHADER_SPATIAL].usage_defines["AO_LIGHT_AFFECT"] = "#define ENABLE_AO\n"; + actions[RS::SHADER_SPATIAL].usage_defines["UV"] = "#define ENABLE_UV_INTERP\n"; + actions[RS::SHADER_SPATIAL].usage_defines["UV2"] = "#define ENABLE_UV2_INTERP\n"; + actions[RS::SHADER_SPATIAL].usage_defines["NORMAL_MAP"] = "#define ENABLE_NORMAL_MAP\n"; + actions[RS::SHADER_SPATIAL].usage_defines["NORMAL_MAP_DEPTH"] = "@NORMAL_MAP"; + actions[RS::SHADER_SPATIAL].usage_defines["COLOR"] = "#define ENABLE_COLOR_INTERP\n"; + actions[RS::SHADER_SPATIAL].usage_defines["INSTANCE_CUSTOM"] = "#define ENABLE_INSTANCE_CUSTOM\n"; + actions[RS::SHADER_SPATIAL].usage_defines["ALPHA_SCISSOR"] = "#define ALPHA_SCISSOR_USED\n"; + actions[RS::SHADER_SPATIAL].usage_defines["POSITION"] = "#define OVERRIDE_POSITION\n"; + + actions[RS::SHADER_SPATIAL].usage_defines["SSS_STRENGTH"] = "#define ENABLE_SSS\n"; + actions[RS::SHADER_SPATIAL].usage_defines["TRANSMISSION"] = "#define TRANSMISSION_USED\n"; + actions[RS::SHADER_SPATIAL].usage_defines["SCREEN_TEXTURE"] = "#define SCREEN_TEXTURE_USED\n"; + actions[RS::SHADER_SPATIAL].usage_defines["SCREEN_UV"] = "#define SCREEN_UV_USED\n"; + + actions[RS::SHADER_SPATIAL].usage_defines["DIFFUSE_LIGHT"] = "#define USE_LIGHT_SHADER_CODE\n"; + actions[RS::SHADER_SPATIAL].usage_defines["SPECULAR_LIGHT"] = "#define USE_LIGHT_SHADER_CODE\n"; + + actions[RS::SHADER_SPATIAL].render_mode_defines["skip_vertex_transform"] = "#define SKIP_TRANSFORM_USED\n"; + actions[RS::SHADER_SPATIAL].render_mode_defines["world_vertex_coords"] = "#define VERTEX_WORLD_COORDS_USED\n"; + actions[RS::SHADER_SPATIAL].render_mode_defines["ensure_correct_normals"] = "#define ENSURE_CORRECT_NORMALS\n"; + actions[RS::SHADER_SPATIAL].render_mode_defines["cull_front"] = "#define DO_SIDE_CHECK\n"; + actions[RS::SHADER_SPATIAL].render_mode_defines["cull_disabled"] = "#define DO_SIDE_CHECK\n"; + + bool force_lambert = GLOBAL_GET("rendering/shading/overrides/force_lambert_over_burley"); + + if (!force_lambert) { + actions[RS::SHADER_SPATIAL].render_mode_defines["diffuse_burley"] = "#define DIFFUSE_BURLEY\n"; + } + + actions[RS::SHADER_SPATIAL].render_mode_defines["diffuse_oren_nayar"] = "#define DIFFUSE_OREN_NAYAR\n"; + actions[RS::SHADER_SPATIAL].render_mode_defines["diffuse_lambert_wrap"] = "#define DIFFUSE_LAMBERT_WRAP\n"; + actions[RS::SHADER_SPATIAL].render_mode_defines["diffuse_toon"] = "#define DIFFUSE_TOON\n"; + + bool force_blinn = GLOBAL_GET("rendering/shading/overrides/force_blinn_over_ggx"); + + if (!force_blinn) { + actions[RS::SHADER_SPATIAL].render_mode_defines["specular_schlick_ggx"] = "#define SPECULAR_SCHLICK_GGX\n"; + } else { + actions[RS::SHADER_SPATIAL].render_mode_defines["specular_schlick_ggx"] = "#define SPECULAR_BLINN\n"; + } + + actions[RS::SHADER_SPATIAL].render_mode_defines["specular_blinn"] = "#define SPECULAR_BLINN\n"; + actions[RS::SHADER_SPATIAL].render_mode_defines["specular_phong"] = "#define SPECULAR_PHONG\n"; + actions[RS::SHADER_SPATIAL].render_mode_defines["specular_toon"] = "#define SPECULAR_TOON\n"; + actions[RS::SHADER_SPATIAL].render_mode_defines["specular_disabled"] = "#define SPECULAR_DISABLED\n"; + actions[RS::SHADER_SPATIAL].render_mode_defines["shadows_disabled"] = "#define SHADOWS_DISABLED\n"; + actions[RS::SHADER_SPATIAL].render_mode_defines["ambient_light_disabled"] = "#define AMBIENT_LIGHT_DISABLED\n"; + actions[RS::SHADER_SPATIAL].render_mode_defines["shadow_to_opacity"] = "#define USE_SHADOW_TO_OPACITY\n"; + + /* PARTICLES SHADER */ + + actions[RS::SHADER_PARTICLES].renames["COLOR"] = "out_color"; + actions[RS::SHADER_PARTICLES].renames["VELOCITY"] = "out_velocity_active.xyz"; + actions[RS::SHADER_PARTICLES].renames["MASS"] = "mass"; + actions[RS::SHADER_PARTICLES].renames["ACTIVE"] = "shader_active"; + actions[RS::SHADER_PARTICLES].renames["RESTART"] = "restart"; + actions[RS::SHADER_PARTICLES].renames["CUSTOM"] = "out_custom"; + actions[RS::SHADER_PARTICLES].renames["TRANSFORM"] = "xform"; + actions[RS::SHADER_PARTICLES].renames["TIME"] = "time"; + actions[RS::SHADER_PARTICLES].renames["LIFETIME"] = "lifetime"; + actions[RS::SHADER_PARTICLES].renames["DELTA"] = "local_delta"; + actions[RS::SHADER_PARTICLES].renames["NUMBER"] = "particle_number"; + actions[RS::SHADER_PARTICLES].renames["INDEX"] = "index"; + actions[RS::SHADER_PARTICLES].renames["GRAVITY"] = "current_gravity"; + actions[RS::SHADER_PARTICLES].renames["EMISSION_TRANSFORM"] = "emission_transform"; + actions[RS::SHADER_PARTICLES].renames["RANDOM_SEED"] = "random_seed"; + + actions[RS::SHADER_PARTICLES].render_mode_defines["disable_force"] = "#define DISABLE_FORCE\n"; + actions[RS::SHADER_PARTICLES].render_mode_defines["disable_velocity"] = "#define DISABLE_VELOCITY\n"; + actions[RS::SHADER_PARTICLES].render_mode_defines["keep_data"] = "#define ENABLE_KEEP_DATA\n"; +#endif +} diff --git a/servers/rendering/renderer_rd/shader_compiler_rd.h b/servers/rendering/renderer_rd/shader_compiler_rd.h new file mode 100644 index 0000000000..6575829e73 --- /dev/null +++ b/servers/rendering/renderer_rd/shader_compiler_rd.h @@ -0,0 +1,130 @@ +/*************************************************************************/ +/* shader_compiler_rd.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef SHADER_COMPILER_RD_H +#define SHADER_COMPILER_RD_H + +#include "core/templates/pair.h" +#include "servers/rendering/shader_language.h" +#include "servers/rendering/shader_types.h" +#include "servers/rendering_server.h" + +class ShaderCompilerRD { +public: + struct IdentifierActions { + Map<StringName, Pair<int *, int>> render_mode_values; + Map<StringName, bool *> render_mode_flags; + Map<StringName, bool *> usage_flag_pointers; + Map<StringName, bool *> write_flag_pointers; + + Map<StringName, ShaderLanguage::ShaderNode::Uniform> *uniforms; + }; + + struct GeneratedCode { + Vector<String> defines; + struct Texture { + StringName name; + ShaderLanguage::DataType type; + ShaderLanguage::ShaderNode::Uniform::Hint hint; + ShaderLanguage::TextureFilter filter; + ShaderLanguage::TextureRepeat repeat; + bool global; + }; + + Vector<Texture> texture_uniforms; + + Vector<uint32_t> uniform_offsets; + uint32_t uniform_total_size; + String uniforms; + String vertex_global; + String vertex; + String fragment_global; + String fragment; + String light; + String compute_global; + String compute; + + bool uses_global_textures; + bool uses_fragment_time; + bool uses_vertex_time; + }; + + struct DefaultIdentifierActions { + Map<StringName, String> renames; + Map<StringName, String> render_mode_defines; + Map<StringName, String> usage_defines; + Map<StringName, String> custom_samplers; + ShaderLanguage::TextureFilter default_filter; + ShaderLanguage::TextureRepeat default_repeat; + String sampler_array_name; + int base_texture_binding_index = 0; + int texture_layout_set = 0; + String base_uniform_string; + String global_buffer_array_variable; + String instance_uniform_index_variable; + uint32_t base_varying_index = 0; + }; + +private: + ShaderLanguage parser; + + String _get_sampler_name(ShaderLanguage::TextureFilter p_filter, ShaderLanguage::TextureRepeat p_repeat); + + void _dump_function_deps(const ShaderLanguage::ShaderNode *p_node, const StringName &p_for_func, const Map<StringName, String> &p_func_code, String &r_to_add, Set<StringName> &added); + String _dump_node_code(const ShaderLanguage::Node *p_node, int p_level, GeneratedCode &r_gen_code, IdentifierActions &p_actions, const DefaultIdentifierActions &p_default_actions, bool p_assigning, bool p_scope = true); + + const ShaderLanguage::ShaderNode *shader; + const ShaderLanguage::FunctionNode *function; + StringName current_func_name; + StringName vertex_name; + StringName fragment_name; + StringName light_name; + StringName compute_name; + StringName time_name; + Set<StringName> texture_functions; + + Set<StringName> used_name_defines; + Set<StringName> used_flag_pointers; + Set<StringName> used_rmode_defines; + Set<StringName> internal_functions; + Set<StringName> fragment_varyings; + + DefaultIdentifierActions actions; + + static ShaderLanguage::DataType _get_variable_type(const StringName &p_type); + +public: + Error compile(RS::ShaderMode p_mode, const String &p_code, IdentifierActions *p_actions, const String &p_path, GeneratedCode &r_gen_code); + + void initialize(DefaultIdentifierActions p_actions); + ShaderCompilerRD(); +}; + +#endif // SHADERCOMPILERRD_H diff --git a/servers/rendering/renderer_rd/shader_rd.cpp b/servers/rendering/renderer_rd/shader_rd.cpp new file mode 100644 index 0000000000..e4a39ff813 --- /dev/null +++ b/servers/rendering/renderer_rd/shader_rd.cpp @@ -0,0 +1,638 @@ +/*************************************************************************/ +/* shader_rd.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "shader_rd.h" + +#include "core/string/string_builder.h" +#include "renderer_compositor_rd.h" +#include "servers/rendering/rendering_device.h" + +void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name) { + name = p_name; + //split vertex and shader code (thank you, shader compiler programmers from you know what company). + if (p_vertex_code) { + String defines_tag = "\nVERSION_DEFINES"; + String globals_tag = "\nVERTEX_SHADER_GLOBALS"; + String material_tag = "\nMATERIAL_UNIFORMS"; + String code_tag = "\nVERTEX_SHADER_CODE"; + String code = p_vertex_code; + + int cpos = code.find(defines_tag); + if (cpos != -1) { + vertex_codev = code.substr(0, cpos).ascii(); + code = code.substr(cpos + defines_tag.length(), code.length()); + } + + cpos = code.find(material_tag); + + if (cpos == -1) { + vertex_code0 = code.ascii(); + } else { + vertex_code0 = code.substr(0, cpos).ascii(); + code = code.substr(cpos + material_tag.length(), code.length()); + + cpos = code.find(globals_tag); + + if (cpos == -1) { + vertex_code1 = code.ascii(); + } else { + vertex_code1 = code.substr(0, cpos).ascii(); + String code2 = code.substr(cpos + globals_tag.length(), code.length()); + + cpos = code2.find(code_tag); + if (cpos == -1) { + vertex_code2 = code2.ascii(); + } else { + vertex_code2 = code2.substr(0, cpos).ascii(); + vertex_code3 = code2.substr(cpos + code_tag.length(), code2.length()).ascii(); + } + } + } + } + + if (p_fragment_code) { + String defines_tag = "\nVERSION_DEFINES"; + String globals_tag = "\nFRAGMENT_SHADER_GLOBALS"; + String material_tag = "\nMATERIAL_UNIFORMS"; + String code_tag = "\nFRAGMENT_SHADER_CODE"; + String light_code_tag = "\nLIGHT_SHADER_CODE"; + String code = p_fragment_code; + + int cpos = code.find(defines_tag); + if (cpos != -1) { + fragment_codev = code.substr(0, cpos).ascii(); + code = code.substr(cpos + defines_tag.length(), code.length()); + } + + cpos = code.find(material_tag); + if (cpos == -1) { + fragment_code0 = code.ascii(); + } else { + fragment_code0 = code.substr(0, cpos).ascii(); + //print_line("CODE0:\n"+String(fragment_code0.get_data())); + code = code.substr(cpos + material_tag.length(), code.length()); + cpos = code.find(globals_tag); + + if (cpos == -1) { + fragment_code1 = code.ascii(); + } else { + fragment_code1 = code.substr(0, cpos).ascii(); + //print_line("CODE1:\n"+String(fragment_code1.get_data())); + + String code2 = code.substr(cpos + globals_tag.length(), code.length()); + cpos = code2.find(light_code_tag); + + if (cpos == -1) { + fragment_code2 = code2.ascii(); + } else { + fragment_code2 = code2.substr(0, cpos).ascii(); + //print_line("CODE2:\n"+String(fragment_code2.get_data())); + + String code3 = code2.substr(cpos + light_code_tag.length(), code2.length()); + + cpos = code3.find(code_tag); + if (cpos == -1) { + fragment_code3 = code3.ascii(); + } else { + fragment_code3 = code3.substr(0, cpos).ascii(); + //print_line("CODE3:\n"+String(fragment_code3.get_data())); + fragment_code4 = code3.substr(cpos + code_tag.length(), code3.length()).ascii(); + //print_line("CODE4:\n"+String(fragment_code4.get_data())); + } + } + } + } + } + + if (p_compute_code) { + is_compute = true; + + String defines_tag = "\nVERSION_DEFINES"; + String globals_tag = "\nCOMPUTE_SHADER_GLOBALS"; + String material_tag = "\nMATERIAL_UNIFORMS"; + String code_tag = "\nCOMPUTE_SHADER_CODE"; + String code = p_compute_code; + + int cpos = code.find(defines_tag); + if (cpos != -1) { + compute_codev = code.substr(0, cpos).ascii(); + code = code.substr(cpos + defines_tag.length(), code.length()); + } + + cpos = code.find(material_tag); + + if (cpos == -1) { + compute_code0 = code.ascii(); + } else { + compute_code0 = code.substr(0, cpos).ascii(); + code = code.substr(cpos + material_tag.length(), code.length()); + + cpos = code.find(globals_tag); + + if (cpos == -1) { + compute_code1 = code.ascii(); + } else { + compute_code1 = code.substr(0, cpos).ascii(); + String code2 = code.substr(cpos + globals_tag.length(), code.length()); + + cpos = code2.find(code_tag); + if (cpos == -1) { + compute_code2 = code2.ascii(); + } else { + compute_code2 = code2.substr(0, cpos).ascii(); + compute_code3 = code2.substr(cpos + code_tag.length(), code2.length()).ascii(); + } + } + } + } +} + +RID ShaderRD::version_create() { + //initialize() was never called + ERR_FAIL_COND_V(variant_defines.size() == 0, RID()); + + Version version; + version.dirty = true; + version.valid = false; + version.initialize_needed = true; + version.variants = nullptr; + return version_owner.make_rid(version); +} + +void ShaderRD::_clear_version(Version *p_version) { + //clear versions if they exist + if (p_version->variants) { + for (int i = 0; i < variant_defines.size(); i++) { + RD::get_singleton()->free(p_version->variants[i]); + } + + memdelete_arr(p_version->variants); + p_version->variants = nullptr; + } +} + +void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) { + if (!variants_enabled[p_variant]) { + return; //variant is disabled, return + } + + Vector<RD::ShaderStageData> stages; + + String error; + String current_source; + RD::ShaderStage current_stage = RD::SHADER_STAGE_VERTEX; + bool build_ok = true; + + if (!is_compute) { + //vertex stage + + StringBuilder builder; + + builder.append(vertex_codev.get_data()); // version info (if exists) + builder.append("\n"); //make sure defines begin at newline + builder.append(general_defines.get_data()); + builder.append(variant_defines[p_variant].get_data()); + + for (int j = 0; j < p_version->custom_defines.size(); j++) { + builder.append(p_version->custom_defines[j].get_data()); + } + + builder.append(vertex_code0.get_data()); //first part of vertex + + builder.append(p_version->uniforms.get_data()); //uniforms (same for vertex and fragment) + + builder.append(vertex_code1.get_data()); //second part of vertex + + builder.append(p_version->vertex_globals.get_data()); // vertex globals + + builder.append(vertex_code2.get_data()); //third part of vertex + + builder.append(p_version->vertex_code.get_data()); // code + + builder.append(vertex_code3.get_data()); //fourth of vertex + + current_source = builder.as_string(); + RD::ShaderStageData stage; + stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_VERTEX, current_source, RD::SHADER_LANGUAGE_GLSL, &error); + if (stage.spir_v.size() == 0) { + build_ok = false; + } else { + stage.shader_stage = RD::SHADER_STAGE_VERTEX; + stages.push_back(stage); + } + } + + if (!is_compute && build_ok) { + //fragment stage + current_stage = RD::SHADER_STAGE_FRAGMENT; + + StringBuilder builder; + + builder.append(fragment_codev.get_data()); // version info (if exists) + builder.append("\n"); //make sure defines begin at newline + + builder.append(general_defines.get_data()); + builder.append(variant_defines[p_variant].get_data()); + for (int j = 0; j < p_version->custom_defines.size(); j++) { + builder.append(p_version->custom_defines[j].get_data()); + } + + builder.append(fragment_code0.get_data()); //first part of fragment + + builder.append(p_version->uniforms.get_data()); //uniforms (same for fragment and fragment) + + builder.append(fragment_code1.get_data()); //first part of fragment + + builder.append(p_version->fragment_globals.get_data()); // fragment globals + + builder.append(fragment_code2.get_data()); //third part of fragment + + builder.append(p_version->fragment_light.get_data()); // fragment light + + builder.append(fragment_code3.get_data()); //fourth part of fragment + + builder.append(p_version->fragment_code.get_data()); // fragment code + + builder.append(fragment_code4.get_data()); //fourth part of fragment + + current_source = builder.as_string(); + RD::ShaderStageData stage; + stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_FRAGMENT, current_source, RD::SHADER_LANGUAGE_GLSL, &error); + if (stage.spir_v.size() == 0) { + build_ok = false; + } else { + stage.shader_stage = RD::SHADER_STAGE_FRAGMENT; + stages.push_back(stage); + } + } + + if (is_compute) { + //compute stage + current_stage = RD::SHADER_STAGE_COMPUTE; + + StringBuilder builder; + + builder.append(compute_codev.get_data()); // version info (if exists) + builder.append("\n"); //make sure defines begin at newline + builder.append(base_compute_defines.get_data()); + builder.append(general_defines.get_data()); + builder.append(variant_defines[p_variant].get_data()); + + for (int j = 0; j < p_version->custom_defines.size(); j++) { + builder.append(p_version->custom_defines[j].get_data()); + } + + builder.append(compute_code0.get_data()); //first part of compute + + builder.append(p_version->uniforms.get_data()); //uniforms (same for compute and fragment) + + builder.append(compute_code1.get_data()); //second part of compute + + builder.append(p_version->compute_globals.get_data()); // compute globals + + builder.append(compute_code2.get_data()); //third part of compute + + builder.append(p_version->compute_code.get_data()); // code + + builder.append(compute_code3.get_data()); //fourth of compute + + current_source = builder.as_string(); + RD::ShaderStageData stage; + stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_COMPUTE, current_source, RD::SHADER_LANGUAGE_GLSL, &error); + if (stage.spir_v.size() == 0) { + build_ok = false; + } else { + stage.shader_stage = RD::SHADER_STAGE_COMPUTE; + stages.push_back(stage); + } + } + + if (!build_ok) { + MutexLock lock(variant_set_mutex); //properly print the errors + ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_COMPUTE ? "Compute " : (current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment")) + " shader, variant #" + itos(p_variant) + " (" + variant_defines[p_variant].get_data() + ")."); + ERR_PRINT(error); + +#ifdef DEBUG_ENABLED + ERR_PRINT("code:\n" + current_source.get_with_code_lines()); +#endif + return; + } + + RID shader = RD::get_singleton()->shader_create(stages); + { + MutexLock lock(variant_set_mutex); + p_version->variants[p_variant] = shader; + } +} + +RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_version) { + Version *version = version_owner.getornull(p_version); + RS::ShaderNativeSourceCode source_code; + ERR_FAIL_COND_V(!version, source_code); + + source_code.versions.resize(variant_defines.size()); + + for (int i = 0; i < source_code.versions.size(); i++) { + if (!is_compute) { + //vertex stage + + StringBuilder builder; + + builder.append(vertex_codev.get_data()); // version info (if exists) + builder.append("\n"); //make sure defines begin at newline + builder.append(general_defines.get_data()); + builder.append(variant_defines[i].get_data()); + + for (int j = 0; j < version->custom_defines.size(); j++) { + builder.append(version->custom_defines[j].get_data()); + } + + builder.append(vertex_code0.get_data()); //first part of vertex + + builder.append(version->uniforms.get_data()); //uniforms (same for vertex and fragment) + + builder.append(vertex_code1.get_data()); //second part of vertex + + builder.append(version->vertex_globals.get_data()); // vertex globals + + builder.append(vertex_code2.get_data()); //third part of vertex + + builder.append(version->vertex_code.get_data()); // code + + builder.append(vertex_code3.get_data()); //fourth of vertex + + RS::ShaderNativeSourceCode::Version::Stage stage; + stage.name = "vertex"; + stage.code = builder.as_string(); + + source_code.versions.write[i].stages.push_back(stage); + } + + if (!is_compute) { + //fragment stage + + StringBuilder builder; + + builder.append(fragment_codev.get_data()); // version info (if exists) + builder.append("\n"); //make sure defines begin at newline + builder.append(general_defines.get_data()); + builder.append(variant_defines[i].get_data()); + for (int j = 0; j < version->custom_defines.size(); j++) { + builder.append(version->custom_defines[j].get_data()); + } + + builder.append(fragment_code0.get_data()); //first part of fragment + + builder.append(version->uniforms.get_data()); //uniforms (same for fragment and fragment) + + builder.append(fragment_code1.get_data()); //first part of fragment + + builder.append(version->fragment_globals.get_data()); // fragment globals + + builder.append(fragment_code2.get_data()); //third part of fragment + + builder.append(version->fragment_light.get_data()); // fragment light + + builder.append(fragment_code3.get_data()); //fourth part of fragment + + builder.append(version->fragment_code.get_data()); // fragment code + + builder.append(fragment_code4.get_data()); //fourth part of fragment + + RS::ShaderNativeSourceCode::Version::Stage stage; + stage.name = "fragment"; + stage.code = builder.as_string(); + + source_code.versions.write[i].stages.push_back(stage); + } + + if (is_compute) { + //compute stage + + StringBuilder builder; + + builder.append(compute_codev.get_data()); // version info (if exists) + builder.append("\n"); //make sure defines begin at newline + builder.append(base_compute_defines.get_data()); + builder.append(general_defines.get_data()); + builder.append(variant_defines[i].get_data()); + + for (int j = 0; j < version->custom_defines.size(); j++) { + builder.append(version->custom_defines[j].get_data()); + } + + builder.append(compute_code0.get_data()); //first part of compute + + builder.append(version->uniforms.get_data()); //uniforms (same for compute and fragment) + + builder.append(compute_code1.get_data()); //second part of compute + + builder.append(version->compute_globals.get_data()); // compute globals + + builder.append(compute_code2.get_data()); //third part of compute + + builder.append(version->compute_code.get_data()); // code + + builder.append(compute_code3.get_data()); //fourth of compute + + RS::ShaderNativeSourceCode::Version::Stage stage; + stage.name = "compute"; + stage.code = builder.as_string(); + + source_code.versions.write[i].stages.push_back(stage); + } + } + + return source_code; +} + +void ShaderRD::_compile_version(Version *p_version) { + _clear_version(p_version); + + p_version->valid = false; + p_version->dirty = false; + + p_version->variants = memnew_arr(RID, variant_defines.size()); +#if 1 + + RendererThreadPool::singleton->thread_work_pool.do_work(variant_defines.size(), this, &ShaderRD::_compile_variant, p_version); +#else + for (int i = 0; i < variant_defines.size(); i++) { + _compile_variant(i, p_version); + } +#endif + + bool all_valid = true; + for (int i = 0; i < variant_defines.size(); i++) { + if (!variants_enabled[i]) { + continue; //disabled + } + if (p_version->variants[i].is_null()) { + all_valid = false; + break; + } + } + + if (!all_valid) { + //clear versions if they exist + for (int i = 0; i < variant_defines.size(); i++) { + if (!variants_enabled[i]) { + continue; //disabled + } + if (!p_version->variants[i].is_null()) { + RD::get_singleton()->free(p_version->variants[i]); + } + } + memdelete_arr(p_version->variants); + p_version->variants = nullptr; + return; + } + + p_version->valid = true; +} + +void ShaderRD::version_set_code(RID p_version, const String &p_uniforms, const String &p_vertex_globals, const String &p_vertex_code, const String &p_fragment_globals, const String &p_fragment_light, const String &p_fragment_code, const Vector<String> &p_custom_defines) { + ERR_FAIL_COND(is_compute); + + Version *version = version_owner.getornull(p_version); + ERR_FAIL_COND(!version); + version->vertex_globals = p_vertex_globals.utf8(); + version->vertex_code = p_vertex_code.utf8(); + version->fragment_light = p_fragment_light.utf8(); + version->fragment_globals = p_fragment_globals.utf8(); + version->fragment_code = p_fragment_code.utf8(); + version->uniforms = p_uniforms.utf8(); + + version->custom_defines.clear(); + for (int i = 0; i < p_custom_defines.size(); i++) { + version->custom_defines.push_back(p_custom_defines[i].utf8()); + } + + version->dirty = true; + if (version->initialize_needed) { + _compile_version(version); + version->initialize_needed = false; + } +} + +void ShaderRD::version_set_compute_code(RID p_version, const String &p_uniforms, const String &p_compute_globals, const String &p_compute_code, const Vector<String> &p_custom_defines) { + ERR_FAIL_COND(!is_compute); + + Version *version = version_owner.getornull(p_version); + ERR_FAIL_COND(!version); + version->compute_globals = p_compute_globals.utf8(); + version->compute_code = p_compute_code.utf8(); + version->uniforms = p_uniforms.utf8(); + + version->custom_defines.clear(); + for (int i = 0; i < p_custom_defines.size(); i++) { + version->custom_defines.push_back(p_custom_defines[i].utf8()); + } + + version->dirty = true; + if (version->initialize_needed) { + _compile_version(version); + version->initialize_needed = false; + } +} + +bool ShaderRD::version_is_valid(RID p_version) { + Version *version = version_owner.getornull(p_version); + ERR_FAIL_COND_V(!version, false); + + if (version->dirty) { + _compile_version(version); + } + + return version->valid; +} + +bool ShaderRD::version_free(RID p_version) { + if (version_owner.owns(p_version)) { + Version *version = version_owner.getornull(p_version); + _clear_version(version); + version_owner.free(p_version); + } else { + return false; + } + + return true; +} + +void ShaderRD::set_variant_enabled(int p_variant, bool p_enabled) { + ERR_FAIL_COND(version_owner.get_rid_count() > 0); //versions exist + ERR_FAIL_INDEX(p_variant, variants_enabled.size()); + variants_enabled.write[p_variant] = p_enabled; +} + +bool ShaderRD::is_variant_enabled(int p_variant) const { + ERR_FAIL_INDEX_V(p_variant, variants_enabled.size(), false); + return variants_enabled[p_variant]; +} + +ShaderRD::ShaderRD() { + // Do not feel forced to use this, in most cases it makes little to no difference. + bool use_32_threads = false; + if (RD::get_singleton()->get_device_vendor_name() == "NVIDIA") { + use_32_threads = true; + } + String base_compute_define_text; + if (use_32_threads) { + base_compute_define_text = "\n#define NATIVE_LOCAL_GROUP_SIZE 32\n#define NATIVE_LOCAL_SIZE_2D_X 8\n#define NATIVE_LOCAL_SIZE_2D_Y 4\n"; + } else { + base_compute_define_text = "\n#define NATIVE_LOCAL_GROUP_SIZE 64\n#define NATIVE_LOCAL_SIZE_2D_X 8\n#define NATIVE_LOCAL_SIZE_2D_Y 8\n"; + } + + base_compute_defines = base_compute_define_text.ascii(); +} + +void ShaderRD::initialize(const Vector<String> &p_variant_defines, const String &p_general_defines) { + ERR_FAIL_COND(variant_defines.size()); + ERR_FAIL_COND(p_variant_defines.size() == 0); + + general_defines = p_general_defines.utf8(); + + for (int i = 0; i < p_variant_defines.size(); i++) { + variant_defines.push_back(p_variant_defines[i].utf8()); + variants_enabled.push_back(true); + } +} + +ShaderRD::~ShaderRD() { + List<RID> remaining; + version_owner.get_owned_list(&remaining); + if (remaining.size()) { + ERR_PRINT(itos(remaining.size()) + " shaders of type " + name + " were never freed"); + while (remaining.size()) { + version_free(remaining.front()->get()); + remaining.pop_front(); + } + } +} diff --git a/servers/rendering/renderer_rd/shader_rd.h b/servers/rendering/renderer_rd/shader_rd.h new file mode 100644 index 0000000000..e0f4dcf2d0 --- /dev/null +++ b/servers/rendering/renderer_rd/shader_rd.h @@ -0,0 +1,145 @@ +/*************************************************************************/ +/* shader_rd.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef SHADER_RD_H +#define SHADER_RD_H + +#include "core/os/mutex.h" +#include "core/templates/hash_map.h" +#include "core/templates/map.h" +#include "core/templates/rid_owner.h" +#include "core/variant/variant.h" +#include "servers/rendering_server.h" + +#include <stdio.h> +/** + @author Juan Linietsky <reduzio@gmail.com> +*/ + +class ShaderRD { + //versions + CharString general_defines; + Vector<CharString> variant_defines; + Vector<bool> variants_enabled; + + struct Version { + CharString uniforms; + CharString vertex_globals; + CharString vertex_code; + CharString compute_globals; + CharString compute_code; + CharString fragment_light; + CharString fragment_globals; + CharString fragment_code; + Vector<CharString> custom_defines; + + RID *variants; //same size as version defines + + bool valid; + bool dirty; + bool initialize_needed; + }; + + Mutex variant_set_mutex; + + void _compile_variant(uint32_t p_variant, Version *p_version); + + void _clear_version(Version *p_version); + void _compile_version(Version *p_version); + + RID_Owner<Version> version_owner; + + CharString fragment_codev; //for version and extensions + CharString fragment_code0; + CharString fragment_code1; + CharString fragment_code2; + CharString fragment_code3; + CharString fragment_code4; + + CharString vertex_codev; //for version and extensions + CharString vertex_code0; + CharString vertex_code1; + CharString vertex_code2; + CharString vertex_code3; + + bool is_compute = false; + + CharString compute_codev; //for version and extensions + CharString compute_code0; + CharString compute_code1; + CharString compute_code2; + CharString compute_code3; + + const char *name; + + CharString base_compute_defines; + +protected: + ShaderRD(); + void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name); + +public: + RID version_create(); + + void version_set_code(RID p_version, const String &p_uniforms, const String &p_vertex_globals, const String &p_vertex_code, const String &p_fragment_globals, const String &p_fragment_light, const String &p_fragment_code, const Vector<String> &p_custom_defines); + void version_set_compute_code(RID p_version, const String &p_uniforms, const String &p_compute_globals, const String &p_compute_code, const Vector<String> &p_custom_defines); + + _FORCE_INLINE_ RID version_get_shader(RID p_version, int p_variant) { + ERR_FAIL_INDEX_V(p_variant, variant_defines.size(), RID()); + ERR_FAIL_COND_V(!variants_enabled[p_variant], RID()); + + Version *version = version_owner.getornull(p_version); + ERR_FAIL_COND_V(!version, RID()); + + if (version->dirty) { + _compile_version(version); + } + + if (!version->valid) { + return RID(); + } + + return version->variants[p_variant]; + } + + bool version_is_valid(RID p_version); + + bool version_free(RID p_version); + + void set_variant_enabled(int p_variant, bool p_enabled); + bool is_variant_enabled(int p_variant) const; + + RS::ShaderNativeSourceCode version_get_native_source_code(RID p_version); + + void initialize(const Vector<String> &p_variant_defines, const String &p_general_defines = ""); + virtual ~ShaderRD(); +}; + +#endif diff --git a/servers/rendering/renderer_rd/shaders/SCsub b/servers/rendering/renderer_rd/shaders/SCsub new file mode 100644 index 0000000000..c192574ff2 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/SCsub @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +Import("env") + +if "RD_GLSL" in env["BUILDERS"]: + env.RD_GLSL("canvas.glsl") + env.RD_GLSL("canvas_occlusion.glsl") + env.RD_GLSL("canvas_sdf.glsl") + env.RD_GLSL("copy.glsl") + env.RD_GLSL("copy_to_fb.glsl") + env.RD_GLSL("cubemap_roughness.glsl") + env.RD_GLSL("cubemap_downsampler.glsl") + env.RD_GLSL("cubemap_filter.glsl") + env.RD_GLSL("scene_forward.glsl") + env.RD_GLSL("sky.glsl") + env.RD_GLSL("tonemap.glsl") + env.RD_GLSL("cube_to_dp.glsl") + env.RD_GLSL("giprobe.glsl") + env.RD_GLSL("giprobe_debug.glsl") + env.RD_GLSL("giprobe_sdf.glsl") + env.RD_GLSL("luminance_reduce.glsl") + env.RD_GLSL("bokeh_dof.glsl") + env.RD_GLSL("ssao.glsl") + env.RD_GLSL("ssao_downsample.glsl") + env.RD_GLSL("ssao_importance_map.glsl") + env.RD_GLSL("ssao_blur.glsl") + env.RD_GLSL("ssao_interleave.glsl") + env.RD_GLSL("roughness_limiter.glsl") + env.RD_GLSL("screen_space_reflection.glsl") + env.RD_GLSL("screen_space_reflection_filter.glsl") + env.RD_GLSL("screen_space_reflection_scale.glsl") + env.RD_GLSL("subsurface_scattering.glsl") + env.RD_GLSL("specular_merge.glsl") + env.RD_GLSL("gi.glsl") + env.RD_GLSL("resolve.glsl") + env.RD_GLSL("sdfgi_preprocess.glsl") + env.RD_GLSL("sdfgi_integrate.glsl") + env.RD_GLSL("sdfgi_direct_light.glsl") + env.RD_GLSL("sdfgi_debug.glsl") + env.RD_GLSL("sdfgi_debug_probes.glsl") + env.RD_GLSL("volumetric_fog.glsl") + env.RD_GLSL("particles.glsl") + env.RD_GLSL("particles_copy.glsl") + env.RD_GLSL("sort.glsl") + env.RD_GLSL("skeleton.glsl") + env.RD_GLSL("cluster_render.glsl") + env.RD_GLSL("cluster_store.glsl") + env.RD_GLSL("cluster_debug.glsl") diff --git a/servers/rendering/renderer_rd/shaders/bokeh_dof.glsl b/servers/rendering/renderer_rd/shaders/bokeh_dof.glsl new file mode 100644 index 0000000000..63f086a83d --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/bokeh_dof.glsl @@ -0,0 +1,251 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +#define BLOCK_SIZE 8 + +layout(local_size_x = BLOCK_SIZE, local_size_y = BLOCK_SIZE, local_size_z = 1) in; + +#ifdef MODE_GEN_BLUR_SIZE +layout(rgba16f, set = 0, binding = 0) uniform restrict image2D color_image; +layout(set = 1, binding = 0) uniform sampler2D source_depth; +#endif + +#if defined(MODE_BOKEH_BOX) || defined(MODE_BOKEH_HEXAGONAL) || defined(MODE_BOKEH_CIRCULAR) +layout(set = 1, binding = 0) uniform sampler2D color_texture; +layout(rgba16f, set = 0, binding = 0) uniform restrict writeonly image2D bokeh_image; +#endif + +#ifdef MODE_COMPOSITE_BOKEH +layout(rgba16f, set = 0, binding = 0) uniform restrict image2D color_image; +layout(set = 1, binding = 0) uniform sampler2D source_bokeh; +#endif + +// based on https://www.shadertoy.com/view/Xd3GDl + +layout(push_constant, binding = 1, std430) uniform Params { + ivec2 size; + float z_far; + float z_near; + + bool orthogonal; + float blur_size; + float blur_scale; + int blur_steps; + + bool blur_near_active; + float blur_near_begin; + float blur_near_end; + bool blur_far_active; + + float blur_far_begin; + float blur_far_end; + bool second_pass; + bool half_size; + + bool use_jitter; + float jitter_seed; + uint pad[2]; +} +params; + +//used to work around downsampling filter +#define DEPTH_GAP 0.0 + +#ifdef MODE_GEN_BLUR_SIZE + +float get_depth_at_pos(vec2 uv) { + float depth = textureLod(source_depth, uv, 0.0).x; + if (params.orthogonal) { + depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; + } else { + depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near)); + } + return depth; +} + +float get_blur_size(float depth) { + if (params.blur_near_active && depth < params.blur_near_begin) { + return -(1.0 - smoothstep(params.blur_near_end, params.blur_near_begin, depth)) * params.blur_size - DEPTH_GAP; //near blur is negative + } + + if (params.blur_far_active && depth > params.blur_far_begin) { + return smoothstep(params.blur_far_begin, params.blur_far_end, depth) * params.blur_size + DEPTH_GAP; + } + + return 0.0; +} + +#endif + +const float GOLDEN_ANGLE = 2.39996323; + +//note: uniform pdf rand [0;1[ +float hash12n(vec2 p) { + p = fract(p * vec2(5.3987, 5.4421)); + p += dot(p.yx, p.xy + vec2(21.5351, 14.3137)); + return fract(p.x * p.y * 95.4307); +} + +#if defined(MODE_BOKEH_BOX) || defined(MODE_BOKEH_HEXAGONAL) + +vec4 weighted_filter_dir(vec2 dir, vec2 uv, vec2 pixel_size) { + dir *= pixel_size; + vec4 color = texture(color_texture, uv); + + vec4 accum = color; + float total = 1.0; + + float blur_scale = params.blur_size / float(params.blur_steps); + + if (params.use_jitter) { + uv += dir * (hash12n(uv + params.jitter_seed) - 0.5); + } + + for (int i = -params.blur_steps; i <= params.blur_steps; i++) { + if (i == 0) { + continue; + } + float radius = float(i) * blur_scale; + vec2 suv = uv + dir * radius; + radius = abs(radius); + + vec4 sample_color = texture(color_texture, suv); + float limit; + + if (sample_color.a < color.a) { + limit = abs(sample_color.a); + } else { + limit = abs(color.a); + } + + limit -= DEPTH_GAP; + + float m = smoothstep(radius - 0.5, radius + 0.5, limit); + + accum += mix(color, sample_color, m); + + total += 1.0; + } + + return accum / total; +} + +#endif + +void main() { + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + + if (any(greaterThan(pos, params.size))) { //too large, do nothing + return; + } + + vec2 pixel_size = 1.0 / vec2(params.size); + vec2 uv = vec2(pos) / vec2(params.size); + +#ifdef MODE_GEN_BLUR_SIZE + uv += pixel_size * 0.5; + //precompute size in alpha channel + float depth = get_depth_at_pos(uv); + float size = get_blur_size(depth); + + vec4 color = imageLoad(color_image, pos); + color.a = size; + imageStore(color_image, pos, color); +#endif + +#ifdef MODE_BOKEH_BOX + + //pixel_size*=0.5; //resolution is doubled + if (params.second_pass || !params.half_size) { + uv += pixel_size * 0.5; //half pixel to read centers + } else { + uv += pixel_size * 0.25; //half pixel to read centers from full res + } + + vec2 dir = (params.second_pass ? vec2(0.0, 1.0) : vec2(1.0, 0.0)); + + vec4 color = weighted_filter_dir(dir, uv, pixel_size); + + imageStore(bokeh_image, pos, color); + +#endif + +#ifdef MODE_BOKEH_HEXAGONAL + + //pixel_size*=0.5; //resolution is doubled + if (params.second_pass || !params.half_size) { + uv += pixel_size * 0.5; //half pixel to read centers + } else { + uv += pixel_size * 0.25; //half pixel to read centers from full res + } + + vec2 dir = (params.second_pass ? normalize(vec2(1.0, 0.577350269189626)) : vec2(0.0, 1.0)); + + vec4 color = weighted_filter_dir(dir, uv, pixel_size); + + if (params.second_pass) { + dir = normalize(vec2(-1.0, 0.577350269189626)); + + vec4 color2 = weighted_filter_dir(dir, uv, pixel_size); + + color.rgb = min(color.rgb, color2.rgb); + color.a = (color.a + color2.a) * 0.5; + } + + imageStore(bokeh_image, pos, color); + +#endif + +#ifdef MODE_BOKEH_CIRCULAR + + if (params.half_size) { + pixel_size *= 0.5; //resolution is doubled + } + + uv += pixel_size * 0.5; //half pixel to read centers + + vec4 color = texture(color_texture, uv); + float accum = 1.0; + float radius = params.blur_scale; + + for (float ang = 0.0; radius < params.blur_size; ang += GOLDEN_ANGLE) { + vec2 suv = uv + vec2(cos(ang), sin(ang)) * pixel_size * radius; + vec4 sample_color = texture(color_texture, suv); + float sample_size = abs(sample_color.a); + if (sample_color.a > color.a) { + sample_size = clamp(sample_size, 0.0, abs(color.a) * 2.0); + } + + float m = smoothstep(radius - 0.5, radius + 0.5, sample_size); + color += mix(color / accum, sample_color, m); + accum += 1.0; + radius += params.blur_scale / radius; + } + + color /= accum; + + imageStore(bokeh_image, pos, color); +#endif + +#ifdef MODE_COMPOSITE_BOKEH + + uv += pixel_size * 0.5; + vec4 color = imageLoad(color_image, pos); + vec4 bokeh = texture(source_bokeh, uv); + + float mix_amount; + if (bokeh.a < color.a) { + mix_amount = min(1.0, max(0.0, max(abs(color.a), abs(bokeh.a)) - DEPTH_GAP)); + } else { + mix_amount = min(1.0, max(0.0, abs(color.a) - DEPTH_GAP)); + } + + color.rgb = mix(color.rgb, bokeh.rgb, mix_amount); //blend between hires and lowres + + color.a = 0; //reset alpha + imageStore(color_image, pos, color); +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/canvas.glsl b/servers/rendering/renderer_rd/shaders/canvas.glsl new file mode 100644 index 0000000000..3b39edc70e --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/canvas.glsl @@ -0,0 +1,672 @@ +#[vertex] + +#version 450 + +VERSION_DEFINES + +#ifdef USE_ATTRIBUTES +layout(location = 0) in vec2 vertex_attrib; +layout(location = 3) in vec4 color_attrib; +layout(location = 4) in vec2 uv_attrib; + +layout(location = 10) in uvec4 bone_attrib; +layout(location = 11) in vec4 weight_attrib; + +#endif + +#include "canvas_uniforms_inc.glsl" + +layout(location = 0) out vec2 uv_interp; +layout(location = 1) out vec4 color_interp; +layout(location = 2) out vec2 vertex_interp; + +#ifdef USE_NINEPATCH + +layout(location = 3) out vec2 pixel_size_interp; + +#endif + +#ifdef USE_MATERIAL_UNIFORMS +layout(set = 1, binding = 0, std140) uniform MaterialUniforms{ + /* clang-format off */ +MATERIAL_UNIFORMS + /* clang-format on */ +} material; +#endif + +/* clang-format off */ +VERTEX_SHADER_GLOBALS +/* clang-format on */ + +void main() { + vec4 instance_custom = vec4(0.0); +#ifdef USE_PRIMITIVE + + //weird bug, + //this works + vec2 vertex; + vec2 uv; + vec4 color; + + if (gl_VertexIndex == 0) { + vertex = draw_data.points[0]; + uv = draw_data.uvs[0]; + color = vec4(unpackHalf2x16(draw_data.colors[0]), unpackHalf2x16(draw_data.colors[1])); + } else if (gl_VertexIndex == 1) { + vertex = draw_data.points[1]; + uv = draw_data.uvs[1]; + color = vec4(unpackHalf2x16(draw_data.colors[2]), unpackHalf2x16(draw_data.colors[3])); + } else { + vertex = draw_data.points[2]; + uv = draw_data.uvs[2]; + color = vec4(unpackHalf2x16(draw_data.colors[4]), unpackHalf2x16(draw_data.colors[5])); + } + uvec4 bones = uvec4(0, 0, 0, 0); + vec4 bone_weights = vec4(0.0); + +#elif defined(USE_ATTRIBUTES) + + vec2 vertex = vertex_attrib; + vec4 color = color_attrib; + vec2 uv = uv_attrib; + + uvec4 bones = bone_attrib; + vec4 bone_weights = weight_attrib; +#else + + vec2 vertex_base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0)); + vec2 vertex_base = vertex_base_arr[gl_VertexIndex]; + + vec2 uv = draw_data.src_rect.xy + abs(draw_data.src_rect.zw) * ((draw_data.flags & FLAGS_TRANSPOSE_RECT) != 0 ? vertex_base.yx : vertex_base.xy); + vec4 color = draw_data.modulation; + vec2 vertex = draw_data.dst_rect.xy + abs(draw_data.dst_rect.zw) * mix(vertex_base, vec2(1.0, 1.0) - vertex_base, lessThan(draw_data.src_rect.zw, vec2(0.0, 0.0))); + uvec4 bones = uvec4(0, 0, 0, 0); + +#endif + + mat4 world_matrix = mat4(vec4(draw_data.world_x, 0.0, 0.0), vec4(draw_data.world_y, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(draw_data.world_ofs, 0.0, 1.0)); + +#if 0 + if (draw_data.flags & FLAGS_INSTANCING_ENABLED) { + uint offset = draw_data.flags & FLAGS_INSTANCING_STRIDE_MASK; + offset *= gl_InstanceIndex; + mat4 instance_xform = mat4( + vec4(texelFetch(instancing_buffer, offset + 0), texelFetch(instancing_buffer, offset + 1), 0.0, texelFetch(instancing_buffer, offset + 3)), + vec4(texelFetch(instancing_buffer, offset + 4), texelFetch(instancing_buffer, offset + 5), 0.0, texelFetch(instancing_buffer, offset + 7)), + vec4(0.0, 0.0, 1.0, 0.0), + vec4(0.0, 0.0, 0.0, 1.0)); + offset += 8; + if (draw_data.flags & FLAGS_INSTANCING_HAS_COLORS) { + vec4 instance_color; + if (draw_data.flags & FLAGS_INSTANCING_COLOR_8_BIT) { + uint bits = floatBitsToUint(texelFetch(instancing_buffer, offset)); + instance_color = unpackUnorm4x8(bits); + offset += 1; + } else { + instance_color = vec4(texelFetch(instancing_buffer, offset + 0), texelFetch(instancing_buffer, offset + 1), texelFetch(instancing_buffer, offset + 2), texelFetch(instancing_buffer, offset + 3)); + offset += 4; + } + + color *= instance_color; + } + if (draw_data.flags & FLAGS_INSTANCING_HAS_CUSTOM_DATA) { + if (draw_data.flags & FLAGS_INSTANCING_CUSTOM_DATA_8_BIT) { + uint bits = floatBitsToUint(texelFetch(instancing_buffer, offset)); + instance_custom = unpackUnorm4x8(bits); + } else { + instance_custom = vec4(texelFetch(instancing_buffer, offset + 0), texelFetch(instancing_buffer, offset + 1), texelFetch(instancing_buffer, offset + 2), texelFetch(instancing_buffer, offset + 3)); + } + } + } + +#endif + +#if !defined(USE_ATTRIBUTES) && !defined(USE_PRIMITIVE) + if (bool(draw_data.flags & FLAGS_USING_PARTICLES)) { + //scale by texture size + vertex /= draw_data.color_texture_pixel_size; + } +#endif + +#ifdef USE_POINT_SIZE + float point_size = 1.0; +#endif + { + /* clang-format off */ +VERTEX_SHADER_CODE + /* clang-format on */ + } + +#ifdef USE_NINEPATCH + pixel_size_interp = abs(draw_data.dst_rect.zw) * vertex_base; +#endif + +#if !defined(SKIP_TRANSFORM_USED) + vertex = (world_matrix * vec4(vertex, 0.0, 1.0)).xy; +#endif + + color_interp = color; + + if (canvas_data.use_pixel_snap) { + vertex = floor(vertex + 0.5); + // precision issue on some hardware creates artifacts within texture + // offset uv by a small amount to avoid + uv += 1e-5; + } + +#ifdef USE_ATTRIBUTES +#if 0 + if (bool(draw_data.flags & FLAGS_USE_SKELETON) && bone_weights != vec4(0.0)) { //must be a valid bone + //skeleton transform + ivec4 bone_indicesi = ivec4(bone_indices); + + uvec2 tex_ofs = bone_indicesi.x * 2; + + mat2x4 m; + m = mat2x4( + texelFetch(skeleton_buffer, tex_ofs + 0), + texelFetch(skeleton_buffer, tex_ofs + 1)) * + bone_weights.x; + + tex_ofs = bone_indicesi.y * 2; + + m += mat2x4( + texelFetch(skeleton_buffer, tex_ofs + 0), + texelFetch(skeleton_buffer, tex_ofs + 1)) * + bone_weights.y; + + tex_ofs = bone_indicesi.z * 2; + + m += mat2x4( + texelFetch(skeleton_buffer, tex_ofs + 0), + texelFetch(skeleton_buffer, tex_ofs + 1)) * + bone_weights.z; + + tex_ofs = bone_indicesi.w * 2; + + m += mat2x4( + texelFetch(skeleton_buffer, tex_ofs + 0), + texelFetch(skeleton_buffer, tex_ofs + 1)) * + bone_weights.w; + + mat4 bone_matrix = skeleton_data.skeleton_transform * transpose(mat4(m[0], m[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))) * skeleton_data.skeleton_transform_inverse; + + //outvec = bone_matrix * outvec; + } +#endif +#endif + + vertex = (canvas_data.canvas_transform * vec4(vertex, 0.0, 1.0)).xy; + + vertex_interp = vertex; + uv_interp = uv; + + gl_Position = canvas_data.screen_transform * vec4(vertex, 0.0, 1.0); + +#ifdef USE_POINT_SIZE + gl_PointSize = point_size; +#endif +} + +#[fragment] + +#version 450 + +VERSION_DEFINES + +#include "canvas_uniforms_inc.glsl" + +layout(location = 0) in vec2 uv_interp; +layout(location = 1) in vec4 color_interp; +layout(location = 2) in vec2 vertex_interp; + +#ifdef USE_NINEPATCH + +layout(location = 3) in vec2 pixel_size_interp; + +#endif + +layout(location = 0) out vec4 frag_color; + +#ifdef USE_MATERIAL_UNIFORMS +layout(set = 1, binding = 0, std140) uniform MaterialUniforms{ + /* clang-format off */ +MATERIAL_UNIFORMS + /* clang-format on */ +} material; +#endif + +vec2 screen_uv_to_sdf(vec2 p_uv) { + return canvas_data.screen_to_sdf * p_uv; +} + +float texture_sdf(vec2 p_sdf) { + vec2 uv = p_sdf * canvas_data.sdf_to_tex.xy + canvas_data.sdf_to_tex.zw; + float d = texture(sampler2D(sdf_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), uv).r; + d = d * SDF_MAX_LENGTH - 1.0; + return d * canvas_data.tex_to_sdf; +} + +vec2 texture_sdf_normal(vec2 p_sdf) { + vec2 uv = p_sdf * canvas_data.sdf_to_tex.xy + canvas_data.sdf_to_tex.zw; + + const float EPSILON = 0.001; + return normalize(vec2( + texture(sampler2D(sdf_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), uv + vec2(EPSILON, 0.0)).r - texture(sampler2D(sdf_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), uv - vec2(EPSILON, 0.0)).r, + texture(sampler2D(sdf_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), uv + vec2(0.0, EPSILON)).r - texture(sampler2D(sdf_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), uv - vec2(0.0, EPSILON)).r)); +} + +vec2 sdf_to_screen_uv(vec2 p_sdf) { + return p_sdf * canvas_data.sdf_to_screen; +} + +/* clang-format off */ +FRAGMENT_SHADER_GLOBALS +/* clang-format on */ + +#ifdef LIGHT_SHADER_CODE_USED + +vec4 light_compute( + vec3 light_vertex, + vec3 light_position, + vec3 normal, + vec4 light_color, + float light_energy, + vec4 specular_shininess, + inout vec4 shadow_modulate, + vec2 screen_uv, + vec2 uv, + vec4 color, bool is_directional) { + vec4 light = vec4(0.0); + /* clang-format off */ +LIGHT_SHADER_CODE + /* clang-format on */ + return light; +} + +#endif + +#ifdef USE_NINEPATCH + +float map_ninepatch_axis(float pixel, float draw_size, float tex_pixel_size, float margin_begin, float margin_end, int np_repeat, inout int draw_center) { + float tex_size = 1.0 / tex_pixel_size; + + if (pixel < margin_begin) { + return pixel * tex_pixel_size; + } else if (pixel >= draw_size - margin_end) { + return (tex_size - (draw_size - pixel)) * tex_pixel_size; + } else { + if (!bool(draw_data.flags & FLAGS_NINEPACH_DRAW_CENTER)) { + draw_center--; + } + + // np_repeat is passed as uniform using NinePatchRect::AxisStretchMode enum. + if (np_repeat == 0) { // Stretch. + // Convert to ratio. + float ratio = (pixel - margin_begin) / (draw_size - margin_begin - margin_end); + // Scale to source texture. + return (margin_begin + ratio * (tex_size - margin_begin - margin_end)) * tex_pixel_size; + } else if (np_repeat == 1) { // Tile. + // Convert to offset. + float ofs = mod((pixel - margin_begin), tex_size - margin_begin - margin_end); + // Scale to source texture. + return (margin_begin + ofs) * tex_pixel_size; + } else if (np_repeat == 2) { // Tile Fit. + // Calculate scale. + float src_area = draw_size - margin_begin - margin_end; + float dst_area = tex_size - margin_begin - margin_end; + float scale = max(1.0, floor(src_area / max(dst_area, 0.0000001) + 0.5)); + // Convert to ratio. + float ratio = (pixel - margin_begin) / src_area; + ratio = mod(ratio * scale, 1.0); + // Scale to source texture. + return (margin_begin + ratio * dst_area) * tex_pixel_size; + } else { // Shouldn't happen, but silences compiler warning. + return 0.0; + } + } +} + +#endif + +#ifdef USE_LIGHTING + +vec3 light_normal_compute(vec3 light_vec, vec3 normal, vec3 base_color, vec3 light_color, vec4 specular_shininess, bool specular_shininess_used) { + float cNdotL = max(0.0, dot(normal, light_vec)); + + if (specular_shininess_used) { + //blinn + vec3 view = vec3(0.0, 0.0, 1.0); // not great but good enough + vec3 half_vec = normalize(view + light_vec); + + float cNdotV = max(dot(normal, view), 0.0); + float cNdotH = max(dot(normal, half_vec), 0.0); + float cVdotH = max(dot(view, half_vec), 0.0); + float cLdotH = max(dot(light_vec, half_vec), 0.0); + float shininess = exp2(15.0 * specular_shininess.a + 1.0) * 0.25; + float blinn = pow(cNdotH, shininess); + blinn *= (shininess + 8.0) * (1.0 / (8.0 * M_PI)); + float s = (blinn) / max(4.0 * cNdotV * cNdotL, 0.75); + + return specular_shininess.rgb * light_color * s + light_color * base_color * cNdotL; + } else { + return light_color * base_color * cNdotL; + } +} + +//float distance = length(shadow_pos); +vec4 light_shadow_compute(uint light_base, vec4 light_color, vec4 shadow_uv +#ifdef LIGHT_SHADER_CODE_USED + , + vec3 shadow_modulate +#endif +) { + float shadow; + uint shadow_mode = light_array.data[light_base].flags & LIGHT_FLAGS_FILTER_MASK; + + if (shadow_mode == LIGHT_FLAGS_SHADOW_NEAREST) { + shadow = textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv, 0.0).x; + } else if (shadow_mode == LIGHT_FLAGS_SHADOW_PCF5) { + vec4 shadow_pixel_size = vec4(light_array.data[light_base].shadow_pixel_size, 0.0, 0.0, 0.0); + shadow = 0.0; + shadow += textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv - shadow_pixel_size * 2.0, 0.0).x; + shadow += textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv - shadow_pixel_size, 0.0).x; + shadow += textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv, 0.0).x; + shadow += textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv + shadow_pixel_size, 0.0).x; + shadow += textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv + shadow_pixel_size * 2.0, 0.0).x; + shadow /= 5.0; + } else { //PCF13 + vec4 shadow_pixel_size = vec4(light_array.data[light_base].shadow_pixel_size, 0.0, 0.0, 0.0); + shadow = 0.0; + shadow += textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv - shadow_pixel_size * 6.0, 0.0).x; + shadow += textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv - shadow_pixel_size * 5.0, 0.0).x; + shadow += textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv - shadow_pixel_size * 4.0, 0.0).x; + shadow += textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv - shadow_pixel_size * 3.0, 0.0).x; + shadow += textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv - shadow_pixel_size * 2.0, 0.0).x; + shadow += textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv - shadow_pixel_size, 0.0).x; + shadow += textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv, 0.0).x; + shadow += textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv + shadow_pixel_size, 0.0).x; + shadow += textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv + shadow_pixel_size * 2.0, 0.0).x; + shadow += textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv + shadow_pixel_size * 3.0, 0.0).x; + shadow += textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv + shadow_pixel_size * 4.0, 0.0).x; + shadow += textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv + shadow_pixel_size * 5.0, 0.0).x; + shadow += textureProjLod(sampler2DShadow(shadow_atlas_texture, shadow_sampler), shadow_uv + shadow_pixel_size * 6.0, 0.0).x; + shadow /= 13.0; + } + + vec4 shadow_color = unpackUnorm4x8(light_array.data[light_base].shadow_color); +#ifdef LIGHT_SHADER_CODE_USED + shadow_color.rgb *= shadow_modulate; +#endif + + shadow_color.a *= light_color.a; //respect light alpha + + return mix(light_color, shadow_color, shadow); +} + +void light_blend_compute(uint light_base, vec4 light_color, inout vec3 color) { + uint blend_mode = light_array.data[light_base].flags & LIGHT_FLAGS_BLEND_MASK; + + switch (blend_mode) { + case LIGHT_FLAGS_BLEND_MODE_ADD: { + color.rgb += light_color.rgb * light_color.a; + } break; + case LIGHT_FLAGS_BLEND_MODE_SUB: { + color.rgb -= light_color.rgb * light_color.a; + } break; + case LIGHT_FLAGS_BLEND_MODE_MIX: { + color.rgb = mix(color.rgb, light_color.rgb, light_color.a); + } break; + } +} + +#endif + +void main() { + vec4 color = color_interp; + vec2 uv = uv_interp; + vec2 vertex = vertex_interp; + +#if !defined(USE_ATTRIBUTES) && !defined(USE_PRIMITIVE) + +#ifdef USE_NINEPATCH + + int draw_center = 2; + uv = vec2( + map_ninepatch_axis(pixel_size_interp.x, abs(draw_data.dst_rect.z), draw_data.color_texture_pixel_size.x, draw_data.ninepatch_margins.x, draw_data.ninepatch_margins.z, int(draw_data.flags >> FLAGS_NINEPATCH_H_MODE_SHIFT) & 0x3, draw_center), + map_ninepatch_axis(pixel_size_interp.y, abs(draw_data.dst_rect.w), draw_data.color_texture_pixel_size.y, draw_data.ninepatch_margins.y, draw_data.ninepatch_margins.w, int(draw_data.flags >> FLAGS_NINEPATCH_V_MODE_SHIFT) & 0x3, draw_center)); + + if (draw_center == 0) { + color.a = 0.0; + } + + uv = uv * draw_data.src_rect.zw + draw_data.src_rect.xy; //apply region if needed + +#endif + if (bool(draw_data.flags & FLAGS_CLIP_RECT_UV)) { + uv = clamp(uv, draw_data.src_rect.xy, draw_data.src_rect.xy + abs(draw_data.src_rect.zw)); + } + +#endif + + color *= texture(sampler2D(color_texture, texture_sampler), uv); + + uint light_count = (draw_data.flags >> FLAGS_LIGHT_COUNT_SHIFT) & 0xF; //max 16 lights + bool using_light = light_count > 0 || canvas_data.directional_light_count > 0; + + vec3 normal; + +#if defined(NORMAL_USED) + bool normal_used = true; +#else + bool normal_used = false; +#endif + + if (normal_used || (using_light && bool(draw_data.flags & FLAGS_DEFAULT_NORMAL_MAP_USED))) { + normal.xy = texture(sampler2D(normal_texture, texture_sampler), uv).xy * vec2(2.0, -2.0) - vec2(1.0, -1.0); + normal.z = sqrt(1.0 - dot(normal.xy, normal.xy)); + normal_used = true; + } else { + normal = vec3(0.0, 0.0, 1.0); + } + + vec4 specular_shininess; + +#if defined(SPECULAR_SHININESS_USED) + + bool specular_shininess_used = true; +#else + bool specular_shininess_used = false; +#endif + + if (specular_shininess_used || (using_light && normal_used && bool(draw_data.flags & FLAGS_DEFAULT_SPECULAR_MAP_USED))) { + specular_shininess = texture(sampler2D(specular_texture, texture_sampler), uv); + specular_shininess *= unpackUnorm4x8(draw_data.specular_shininess); + specular_shininess_used = true; + } else { + specular_shininess = vec4(1.0); + } + +#if defined(SCREEN_UV_USED) + vec2 screen_uv = gl_FragCoord.xy * canvas_data.screen_pixel_size; +#else + vec2 screen_uv = vec2(0.0); +#endif + + vec3 light_vertex = vec3(vertex, 0.0); + vec2 shadow_vertex = vertex; + + { + float normal_map_depth = 1.0; + +#if defined(NORMAL_MAP_USED) + vec3 normal_map = vec3(0.0, 0.0, 1.0); + normal_used = true; +#endif + + /* clang-format off */ + +FRAGMENT_SHADER_CODE + + /* clang-format on */ + +#if defined(NORMAL_MAP_USED) + normal = mix(vec3(0.0, 0.0, 1.0), normal_map * vec3(2.0, -2.0, 1.0) - vec3(1.0, -1.0, 0.0), normal_map_depth); +#endif + } + + if (normal_used) { + //convert by item transform + normal.xy = mat2(normalize(draw_data.world_x), normalize(draw_data.world_y)) * normal.xy; + //convert by canvas transform + normal = normalize((canvas_data.canvas_normal_transform * vec4(normal, 0.0)).xyz); + } + + vec3 base_color = color.rgb; + if (bool(draw_data.flags & FLAGS_USING_LIGHT_MASK)) { + color = vec4(0.0); //invisible by default due to using light mask + } + +#ifdef MODE_LIGHT_ONLY + color = vec4(0.0); +#else + color *= canvas_data.canvas_modulation; +#endif + +#if defined(USE_LIGHTING) && !defined(MODE_UNSHADED) + + // Directional Lights + + for (uint i = 0; i < canvas_data.directional_light_count; i++) { + uint light_base = i; + + vec2 direction = light_array.data[light_base].position; + vec4 light_color = light_array.data[light_base].color; + +#ifdef LIGHT_SHADER_CODE_USED + + vec4 shadow_modulate = vec4(1.0); + light_color = light_compute(light_vertex, vec3(direction, light_array.data[light_base].height), normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, uv, color, true); +#else + + if (normal_used) { + vec3 light_vec = normalize(mix(vec3(direction, 0.0), vec3(0, 0, 1), light_array.data[light_base].height)); + light_color.rgb = light_normal_compute(light_vec, normal, base_color, light_color.rgb, specular_shininess, specular_shininess_used); + } +#endif + + if (bool(light_array.data[light_base].flags & LIGHT_FLAGS_HAS_SHADOW)) { + vec2 shadow_pos = (vec4(shadow_vertex, 0.0, 1.0) * mat4(light_array.data[light_base].shadow_matrix[0], light_array.data[light_base].shadow_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations. + + vec4 shadow_uv = vec4(shadow_pos.x, light_array.data[light_base].shadow_y_ofs, shadow_pos.y * light_array.data[light_base].shadow_zfar_inv, 1.0); + + light_color = light_shadow_compute(light_base, light_color, shadow_uv +#ifdef LIGHT_SHADER_CODE_USED + , + shadow_modulate.rgb +#endif + ); + } + + light_blend_compute(light_base, light_color, color.rgb); + } + + // Positional Lights + + for (uint i = 0; i < MAX_LIGHTS_PER_ITEM; i++) { + if (i >= light_count) { + break; + } + uint light_base; + if (i < 8) { + if (i < 4) { + light_base = draw_data.lights[0]; + } else { + light_base = draw_data.lights[1]; + } + } else { + if (i < 12) { + light_base = draw_data.lights[2]; + } else { + light_base = draw_data.lights[3]; + } + } + light_base >>= (i & 3) * 8; + light_base &= 0xFF; + + vec2 tex_uv = (vec4(vertex, 0.0, 1.0) * mat4(light_array.data[light_base].texture_matrix[0], light_array.data[light_base].texture_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations. + vec2 tex_uv_atlas = tex_uv * light_array.data[light_base].atlas_rect.zw + light_array.data[light_base].atlas_rect.xy; + vec4 light_color = textureLod(sampler2D(atlas_texture, texture_sampler), tex_uv_atlas, 0.0); + vec4 light_base_color = light_array.data[light_base].color; + +#ifdef LIGHT_SHADER_CODE_USED + + vec4 shadow_modulate = vec4(1.0); + vec3 light_position = vec3(light_array.data[light_base].position, light_array.data[light_base].height); + + light_color.rgb *= light_base_color.rgb; + light_color = light_compute(light_vertex, light_position, normal, light_color, light_base_color.a, specular_shininess, shadow_modulate, screen_uv, uv, color, false); +#else + + light_color.rgb *= light_base_color.rgb * light_base_color.a; + + if (normal_used) { + vec3 light_pos = vec3(light_array.data[light_base].position, light_array.data[light_base].height); + vec3 pos = light_vertex; + vec3 light_vec = normalize(light_pos - pos); + float cNdotL = max(0.0, dot(normal, light_vec)); + + light_color.rgb = light_normal_compute(light_vec, normal, base_color, light_color.rgb, specular_shininess, specular_shininess_used); + } +#endif + if (any(lessThan(tex_uv, vec2(0.0, 0.0))) || any(greaterThanEqual(tex_uv, vec2(1.0, 1.0)))) { + //if outside the light texture, light color is zero + light_color.a = 0.0; + } + + if (bool(light_array.data[light_base].flags & LIGHT_FLAGS_HAS_SHADOW)) { + vec2 shadow_pos = (vec4(shadow_vertex, 0.0, 1.0) * mat4(light_array.data[light_base].shadow_matrix[0], light_array.data[light_base].shadow_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations. + + vec2 pos_norm = normalize(shadow_pos); + vec2 pos_abs = abs(pos_norm); + vec2 pos_box = pos_norm / max(pos_abs.x, pos_abs.y); + vec2 pos_rot = pos_norm * mat2(vec2(0.7071067811865476, -0.7071067811865476), vec2(0.7071067811865476, 0.7071067811865476)); //is there a faster way to 45 degrees rot? + float tex_ofs; + float distance; + if (pos_rot.y > 0) { + if (pos_rot.x > 0) { + tex_ofs = pos_box.y * 0.125 + 0.125; + distance = shadow_pos.x; + } else { + tex_ofs = pos_box.x * -0.125 + (0.25 + 0.125); + distance = shadow_pos.y; + } + } else { + if (pos_rot.x < 0) { + tex_ofs = pos_box.y * -0.125 + (0.5 + 0.125); + distance = -shadow_pos.x; + } else { + tex_ofs = pos_box.x * 0.125 + (0.75 + 0.125); + distance = -shadow_pos.y; + } + } + + distance *= light_array.data[light_base].shadow_zfar_inv; + + //float distance = length(shadow_pos); + vec4 shadow_uv = vec4(tex_ofs, light_array.data[light_base].shadow_y_ofs, distance, 1.0); + + light_color = light_shadow_compute(light_base, light_color, shadow_uv +#ifdef LIGHT_SHADER_CODE_USED + , + shadow_modulate.rgb +#endif + ); + } + + light_blend_compute(light_base, light_color, color.rgb); + } +#endif + + frag_color = color; +} diff --git a/servers/rendering/renderer_rd/shaders/canvas_occlusion.glsl b/servers/rendering/renderer_rd/shaders/canvas_occlusion.glsl new file mode 100644 index 0000000000..5c25235c58 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/canvas_occlusion.glsl @@ -0,0 +1,59 @@ +#[vertex] + +#version 450 + +VERSION_DEFINES + +layout(location = 0) in highp vec3 vertex; + +layout(push_constant, binding = 0, std430) uniform Constants { + mat4 projection; + mat2x4 modelview; + vec2 direction; + float z_far; + float pad; +} +constants; + +#ifdef MODE_SHADOW +layout(location = 0) out highp float depth; +#endif + +void main() { + highp vec4 vtx = vec4(vertex, 1.0) * mat4(constants.modelview[0], constants.modelview[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); + +#ifdef MODE_SHADOW + depth = dot(constants.direction, vtx.xy); +#endif + gl_Position = constants.projection * vtx; +} + +#[fragment] + +#version 450 + +VERSION_DEFINES + +layout(push_constant, binding = 0, std430) uniform Constants { + mat4 projection; + mat2x4 modelview; + vec2 direction; + float z_far; + float pad; +} +constants; + +#ifdef MODE_SHADOW +layout(location = 0) in highp float depth; +layout(location = 0) out highp float distance_buf; +#else +layout(location = 0) out highp float sdf_buf; +#endif + +void main() { +#ifdef MODE_SHADOW + distance_buf = depth / constants.z_far; +#else + sdf_buf = 1.0; +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/canvas_sdf.glsl b/servers/rendering/renderer_rd/shaders/canvas_sdf.glsl new file mode 100644 index 0000000000..302ad03b41 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/canvas_sdf.glsl @@ -0,0 +1,135 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(r8, set = 0, binding = 1) uniform restrict readonly image2D src_pixels; +layout(r16, set = 0, binding = 2) uniform restrict writeonly image2D dst_sdf; + +layout(rg16i, set = 0, binding = 3) uniform restrict readonly iimage2D src_process; +layout(rg16i, set = 0, binding = 4) uniform restrict writeonly iimage2D dst_process; + +layout(push_constant, binding = 0, std430) uniform Params { + ivec2 size; + int stride; + int shift; + ivec2 base_size; + uvec2 pad; +} +params; + +#define SDF_MAX_LENGTH 16384.0 + +void main() { + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(pos, params.size))) { //too large, do nothing + return; + } + +#ifdef MODE_LOAD + + bool solid = imageLoad(src_pixels, pos).r > 0.5; + imageStore(dst_process, pos, solid ? ivec4(pos, 0, 0) : ivec4(ivec2(32767), 0, 0)); +#endif + +#ifdef MODE_LOAD_SHRINK + + int s = 1 << params.shift; + ivec2 base = pos << params.shift; + ivec2 center = base + ivec2(params.shift); + + ivec2 rel = ivec2(32767); + float d = 1e20; + for (int i = 0; i < s; i++) { + for (int j = 0; j < s; j++) { + ivec2 src_pos = base + ivec2(i, j); + if (any(greaterThanEqual(src_pos, params.base_size))) { + continue; + } + bool solid = imageLoad(src_pixels, src_pos).r > 0.5; + if (solid) { + float dist = length(vec2(src_pos - center)); + if (dist < d) { + d = dist; + rel = src_pos; + } + } + } + } + + imageStore(dst_process, pos, ivec4(rel, 0, 0)); +#endif + +#ifdef MODE_PROCESS + + ivec2 base = pos << params.shift; + ivec2 center = base + ivec2(params.shift); + + ivec2 rel = imageLoad(src_process, pos).xy; + + if (center != rel) { + //only process if it does not point to itself + const int ofs_table_size = 8; + const ivec2 ofs_table[ofs_table_size] = ivec2[]( + ivec2(-1, -1), + ivec2(0, -1), + ivec2(+1, -1), + + ivec2(-1, 0), + ivec2(+1, 0), + + ivec2(-1, +1), + ivec2(0, +1), + ivec2(+1, +1)); + + float dist = length(vec2(rel - center)); + for (int i = 0; i < ofs_table_size; i++) { + ivec2 src_pos = pos + ofs_table[i] * params.stride; + if (any(lessThan(src_pos, ivec2(0))) || any(greaterThanEqual(src_pos, params.size))) { + continue; + } + ivec2 src_rel = imageLoad(src_process, src_pos).xy; + float src_dist = length(vec2(src_rel - center)); + if (src_dist < dist) { + dist = src_dist; + rel = src_rel; + } + } + } + + imageStore(dst_process, pos, ivec4(rel, 0, 0)); +#endif + +#ifdef MODE_STORE + + ivec2 rel = imageLoad(src_process, pos).xy; + float d = length(vec2(rel - pos)); + if (d > 0.01) { + d += 1.0; //make it signed + } + d /= SDF_MAX_LENGTH; + d = clamp(d, 0.0, 1.0); + imageStore(dst_sdf, pos, vec4(d)); + +#endif + +#ifdef MODE_STORE_SHRINK + + ivec2 base = pos << params.shift; + ivec2 center = base + ivec2(params.shift); + + ivec2 rel = imageLoad(src_process, pos).xy; + float d = length(vec2(rel - center)); + + if (d > 0.01) { + d += 1.0; //make it signed + } + d /= SDF_MAX_LENGTH; + d = clamp(d, 0.0, 1.0); + imageStore(dst_sdf, pos, vec4(d)); + +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl b/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl new file mode 100644 index 0000000000..cf7678ea31 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl @@ -0,0 +1,162 @@ + +#define MAX_LIGHTS_PER_ITEM 16 + +#define M_PI 3.14159265359 + +#define SDF_MAX_LENGTH 16384.0 + +#define FLAGS_INSTANCING_STRIDE_MASK 0xF +#define FLAGS_INSTANCING_ENABLED (1 << 4) +#define FLAGS_INSTANCING_HAS_COLORS (1 << 5) +#define FLAGS_INSTANCING_COLOR_8BIT (1 << 6) +#define FLAGS_INSTANCING_HAS_CUSTOM_DATA (1 << 7) +#define FLAGS_INSTANCING_CUSTOM_DATA_8_BIT (1 << 8) + +#define FLAGS_CLIP_RECT_UV (1 << 9) +#define FLAGS_TRANSPOSE_RECT (1 << 10) +#define FLAGS_USING_LIGHT_MASK (1 << 11) +#define FLAGS_NINEPACH_DRAW_CENTER (1 << 12) +#define FLAGS_USING_PARTICLES (1 << 13) + +#define FLAGS_NINEPATCH_H_MODE_SHIFT 16 +#define FLAGS_NINEPATCH_V_MODE_SHIFT 18 + +#define FLAGS_LIGHT_COUNT_SHIFT 20 + +#define FLAGS_DEFAULT_NORMAL_MAP_USED (1 << 26) +#define FLAGS_DEFAULT_SPECULAR_MAP_USED (1 << 27) + +#define SAMPLER_NEAREST_CLAMP 0 +#define SAMPLER_LINEAR_CLAMP 1 +#define SAMPLER_NEAREST_WITH_MIPMAPS_CLAMP 2 +#define SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP 3 +#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_CLAMP 4 +#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_CLAMP 5 +#define SAMPLER_NEAREST_REPEAT 6 +#define SAMPLER_LINEAR_REPEAT 7 +#define SAMPLER_NEAREST_WITH_MIPMAPS_REPEAT 8 +#define SAMPLER_LINEAR_WITH_MIPMAPS_REPEAT 9 +#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_REPEAT 10 +#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_REPEAT 11 + +// Push Constant + +layout(push_constant, binding = 0, std430) uniform DrawData { + vec2 world_x; + vec2 world_y; + vec2 world_ofs; + uint flags; + uint specular_shininess; +#ifdef USE_PRIMITIVE + vec2 points[3]; + vec2 uvs[3]; + uint colors[6]; +#else + vec4 modulation; + vec4 ninepatch_margins; + vec4 dst_rect; //for built-in rect and UV + vec4 src_rect; + vec2 pad; + +#endif + vec2 color_texture_pixel_size; + uint lights[4]; +} +draw_data; + +// In vulkan, sets should always be ordered using the following logic: +// Lower Sets: Sets that change format and layout less often +// Higher sets: Sets that change format and layout very often +// This is because changing a set for another with a different layout or format, +// invalidates all the upper ones (as likely internal base offset changes) + +/* SET0: Globals */ + +// The values passed per draw primitives are cached within it + +layout(set = 0, binding = 1, std140) uniform CanvasData { + mat4 canvas_transform; + mat4 screen_transform; + mat4 canvas_normal_transform; + vec4 canvas_modulation; + vec2 screen_pixel_size; + float time; + bool use_pixel_snap; + + vec4 sdf_to_tex; + vec2 screen_to_sdf; + vec2 sdf_to_screen; + + uint directional_light_count; + float tex_to_sdf; + uint pad1; + uint pad2; +} +canvas_data; + +#define LIGHT_FLAGS_BLEND_MASK (3 << 16) +#define LIGHT_FLAGS_BLEND_MODE_ADD (0 << 16) +#define LIGHT_FLAGS_BLEND_MODE_SUB (1 << 16) +#define LIGHT_FLAGS_BLEND_MODE_MIX (2 << 16) +#define LIGHT_FLAGS_BLEND_MODE_MASK (3 << 16) +#define LIGHT_FLAGS_HAS_SHADOW (1 << 20) +#define LIGHT_FLAGS_FILTER_SHIFT 22 +#define LIGHT_FLAGS_FILTER_MASK (3 << 22) +#define LIGHT_FLAGS_SHADOW_NEAREST (0 << 22) +#define LIGHT_FLAGS_SHADOW_PCF5 (1 << 22) +#define LIGHT_FLAGS_SHADOW_PCF13 (2 << 22) + +struct Light { + mat2x4 texture_matrix; //light to texture coordinate matrix (transposed) + mat2x4 shadow_matrix; //light to shadow coordinate matrix (transposed) + vec4 color; + + uint shadow_color; // packed + uint flags; //index to light texture + float shadow_pixel_size; + float height; + + vec2 position; + float shadow_zfar_inv; + float shadow_y_ofs; + + vec4 atlas_rect; +}; + +layout(set = 0, binding = 2, std140) uniform LightData { + Light data[MAX_LIGHTS]; +} +light_array; + +layout(set = 0, binding = 3) uniform texture2D atlas_texture; +layout(set = 0, binding = 4) uniform texture2D shadow_atlas_texture; + +layout(set = 0, binding = 5) uniform sampler shadow_sampler; + +layout(set = 0, binding = 6) uniform texture2D screen_texture; +layout(set = 0, binding = 7) uniform texture2D sdf_texture; + +layout(set = 0, binding = 8) uniform sampler material_samplers[12]; + +layout(set = 0, binding = 9, std430) restrict readonly buffer GlobalVariableData { + vec4 data[]; +} +global_variables; + +/* SET1: Is reserved for the material */ + +// + +/* SET2: Instancing and Skeleton */ + +layout(set = 2, binding = 0, std430) restrict readonly buffer Transforms { + vec4 data[]; +} +transforms; + +/* SET3: Texture */ + +layout(set = 3, binding = 0) uniform texture2D color_texture; +layout(set = 3, binding = 1) uniform texture2D normal_texture; +layout(set = 3, binding = 2) uniform texture2D specular_texture; +layout(set = 3, binding = 3) uniform sampler texture_sampler; diff --git a/servers/rendering/renderer_rd/shaders/cluster_data_inc.glsl b/servers/rendering/renderer_rd/shaders/cluster_data_inc.glsl new file mode 100644 index 0000000000..3a4bf4da07 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/cluster_data_inc.glsl @@ -0,0 +1,105 @@ + +#define CLUSTER_COUNTER_SHIFT 20 +#define CLUSTER_POINTER_MASK ((1 << CLUSTER_COUNTER_SHIFT) - 1) +#define CLUSTER_COUNTER_MASK 0xfff + +struct LightData { //this structure needs to be as packed as possible + vec3 position; + float inv_radius; + + vec3 direction; + float size; + + vec3 color; + float attenuation; + + float cone_attenuation; + float cone_angle; + float specular_amount; + bool shadow_enabled; + + vec4 atlas_rect; // rect in the shadow atlas + mat4 shadow_matrix; + float shadow_bias; + float shadow_normal_bias; + float transmittance_bias; + float soft_shadow_size; // for spot, it's the size in uv coordinates of the light, for omni it's the span angle + float soft_shadow_scale; // scales the shadow kernel for blurrier shadows + uint mask; + float shadow_volumetric_fog_fade; + uint pad; + vec4 projector_rect; //projector rect in srgb decal atlas +}; + +#define REFLECTION_AMBIENT_DISABLED 0 +#define REFLECTION_AMBIENT_ENVIRONMENT 1 +#define REFLECTION_AMBIENT_COLOR 2 + +struct ReflectionData { + vec3 box_extents; + float index; + vec3 box_offset; + uint mask; + vec3 ambient; // ambient color + float intensity; + bool exterior; + bool box_project; + uint ambient_mode; + uint pad; + //0-8 is intensity,8-9 is ambient, mode + mat4 local_matrix; // up to here for spot and omni, rest is for directional + // notes: for ambientblend, use distance to edge to blend between already existing global environment +}; + +struct DirectionalLightData { + vec3 direction; + float energy; + vec3 color; + float size; + float specular; + uint mask; + float softshadow_angle; + float soft_shadow_scale; + bool blend_splits; + bool shadow_enabled; + float fade_from; + float fade_to; + uvec3 pad; + float shadow_volumetric_fog_fade; + vec4 shadow_bias; + vec4 shadow_normal_bias; + vec4 shadow_transmittance_bias; + vec4 shadow_z_range; + vec4 shadow_range_begin; + vec4 shadow_split_offsets; + mat4 shadow_matrix1; + mat4 shadow_matrix2; + mat4 shadow_matrix3; + mat4 shadow_matrix4; + vec4 shadow_color1; + vec4 shadow_color2; + vec4 shadow_color3; + vec4 shadow_color4; + vec2 uv_scale1; + vec2 uv_scale2; + vec2 uv_scale3; + vec2 uv_scale4; +}; + +struct DecalData { + mat4 xform; //to decal transform + vec3 inv_extents; + float albedo_mix; + vec4 albedo_rect; + vec4 normal_rect; + vec4 orm_rect; + vec4 emission_rect; + vec4 modulate; + float emission_energy; + uint mask; + float upper_fade; + float lower_fade; + mat3x4 normal_xform; + vec3 normal; + float normal_fade; +}; diff --git a/servers/rendering/renderer_rd/shaders/cluster_debug.glsl b/servers/rendering/renderer_rd/shaders/cluster_debug.glsl new file mode 100644 index 0000000000..70a875192c --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/cluster_debug.glsl @@ -0,0 +1,115 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +const vec3 usage_gradient[33] = vec3[]( // 1 (none) + 32 + vec3(0.14, 0.17, 0.23), + vec3(0.24, 0.44, 0.83), + vec3(0.23, 0.57, 0.84), + vec3(0.22, 0.71, 0.84), + vec3(0.22, 0.85, 0.83), + vec3(0.21, 0.85, 0.72), + vec3(0.21, 0.85, 0.57), + vec3(0.20, 0.85, 0.42), + vec3(0.20, 0.85, 0.27), + vec3(0.27, 0.86, 0.19), + vec3(0.51, 0.85, 0.19), + vec3(0.57, 0.86, 0.19), + vec3(0.62, 0.85, 0.19), + vec3(0.67, 0.86, 0.20), + vec3(0.73, 0.85, 0.20), + vec3(0.78, 0.85, 0.20), + vec3(0.83, 0.85, 0.20), + vec3(0.85, 0.82, 0.20), + vec3(0.85, 0.76, 0.20), + vec3(0.85, 0.81, 0.20), + vec3(0.85, 0.65, 0.20), + vec3(0.84, 0.60, 0.21), + vec3(0.84, 0.56, 0.21), + vec3(0.84, 0.51, 0.21), + vec3(0.84, 0.46, 0.21), + vec3(0.84, 0.41, 0.21), + vec3(0.84, 0.36, 0.21), + vec3(0.84, 0.31, 0.21), + vec3(0.84, 0.27, 0.21), + vec3(0.83, 0.22, 0.22), + vec3(0.83, 0.22, 0.27), + vec3(0.83, 0.22, 0.32), + vec3(1.00, 0.63, 0.70)); +layout(push_constant, binding = 0, std430) uniform Params { + uvec2 screen_size; + uvec2 cluster_screen_size; + + uint cluster_shift; + uint cluster_type; + float z_near; + float z_far; + + bool orthogonal; + uint max_cluster_element_count_div_32; + uint pad1; + uint pad2; +} +params; + +layout(set = 0, binding = 1, std430) buffer restrict readonly ClusterData { + uint data[]; +} +cluster_data; + +layout(rgba16f, set = 0, binding = 2) uniform restrict writeonly image2D screen_buffer; +layout(set = 0, binding = 3) uniform texture2D depth_buffer; +layout(set = 0, binding = 4) uniform sampler depth_buffer_sampler; + +void main() { + uvec2 screen_pos = gl_GlobalInvocationID.xy; + if (any(greaterThanEqual(screen_pos, params.screen_size))) { + return; + } + + uvec2 cluster_pos = screen_pos >> params.cluster_shift; + + uint offset = cluster_pos.y * params.cluster_screen_size.x + cluster_pos.x; + offset += params.cluster_screen_size.x * params.cluster_screen_size.y * params.cluster_type; + offset *= (params.max_cluster_element_count_div_32 + 32); + + //depth buffers generally can't be accessed via image API + float depth = texelFetch(sampler2D(depth_buffer, depth_buffer_sampler), ivec2(screen_pos), 0).r * 2.0 - 1.0; + + if (params.orthogonal) { + depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; + } else { + depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near)); + } + depth /= params.z_far; + + uint slice = uint(clamp(floor(depth * 32.0), 0.0, 31.0)); + uint slice_minmax = cluster_data.data[offset + params.max_cluster_element_count_div_32 + slice]; + uint item_min = slice_minmax & 0xFFFF; + uint item_max = slice_minmax >> 16; + + uint item_count = 0; + for (uint i = 0; i < params.max_cluster_element_count_div_32; i++) { + uint slice_bits = cluster_data.data[offset + i]; + while (slice_bits != 0) { + uint bit = findLSB(slice_bits); + uint item = i * 32 + bit; + if ((item >= item_min && item < item_max)) { + item_count++; + } + slice_bits &= ~(1 << bit); + } + } + + item_count = min(item_count, 32); + + vec3 color = usage_gradient[item_count]; + + color = mix(color * 1.2, color * 0.3, float(slice) / 31.0); + + imageStore(screen_buffer, ivec2(screen_pos), vec4(color, 1.0)); +} diff --git a/servers/rendering/renderer_rd/shaders/cluster_render.glsl b/servers/rendering/renderer_rd/shaders/cluster_render.glsl new file mode 100644 index 0000000000..8723ea78e4 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/cluster_render.glsl @@ -0,0 +1,168 @@ +#[vertex] + +#version 450 + +VERSION_DEFINES + +layout(location = 0) in vec3 vertex_attrib; + +layout(location = 0) out float depth_interp; +layout(location = 1) out flat uint element_index; + +layout(push_constant, binding = 0, std430) uniform Params { + uint base_index; + uint pad0; + uint pad1; + uint pad2; +} +params; + +layout(set = 0, binding = 1, std140) uniform State { + mat4 projection; + + float inv_z_far; + uint screen_to_clusters_shift; // shift to obtain coordinates in block indices + uint cluster_screen_width; // + uint cluster_data_size; // how much data for a single cluster takes + + uint cluster_depth_offset; + uint pad0; + uint pad1; + uint pad2; +} +state; + +struct RenderElement { + uint type; //0-4 + bool touches_near; + bool touches_far; + uint original_index; + mat3x4 transform_inv; + vec3 scale; + uint pad; +}; + +layout(set = 0, binding = 2, std430) buffer restrict readonly RenderElements { + RenderElement data[]; +} +render_elements; + +void main() { + element_index = params.base_index + gl_InstanceIndex; + + vec3 vertex = vertex_attrib; + vertex *= render_elements.data[element_index].scale; + + vertex = vec4(vertex, 1.0) * render_elements.data[element_index].transform_inv; + depth_interp = -vertex.z; + + gl_Position = state.projection * vec4(vertex, 1.0); +} + +#[fragment] + +#version 450 + +VERSION_DEFINES + +#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic) && defined(GL_KHR_shader_subgroup_vote) + +#extension GL_KHR_shader_subgroup_ballot : enable +#extension GL_KHR_shader_subgroup_arithmetic : enable +#extension GL_KHR_shader_subgroup_vote : enable + +#define USE_SUBGROUPS +#endif + +layout(location = 0) in float depth_interp; +layout(location = 1) in flat uint element_index; + +layout(set = 0, binding = 1, std140) uniform State { + mat4 projection; + float inv_z_far; + uint screen_to_clusters_shift; // shift to obtain coordinates in block indices + uint cluster_screen_width; // + uint cluster_data_size; // how much data for a single cluster takes + uint cluster_depth_offset; + uint pad0; + uint pad1; + uint pad2; +} +state; + +//cluster data is layout linearly, each cell contains the follow information: +// - list of bits for every element to mark as used, so (max_elem_count/32)*4 uints +// - a uint for each element to mark the depth bits used when rendering (0-31) + +layout(set = 0, binding = 3, std430) buffer restrict ClusterRender { + uint data[]; +} +cluster_render; + +void main() { + //convert from screen to cluster + uvec2 cluster = uvec2(gl_FragCoord.xy) >> state.screen_to_clusters_shift; + + //get linear cluster offset from screen poss + uint cluster_offset = cluster.x + state.cluster_screen_width * cluster.y; + //multiply by data size to position at the beginning of the element list for this cluster + cluster_offset *= state.cluster_data_size; + + //find the current element in the list and plot the bit to mark it as used + uint usage_write_offset = cluster_offset + (element_index >> 5); + uint usage_write_bit = 1 << (element_index & 0x1F); + +#ifdef USE_SUBGROUPS + + uint cluster_thread_group_index; + + if (!gl_HelperInvocation) { + //http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf + + uvec4 mask; + + while (true) { + // find the cluster offset of the first active thread + // threads that did break; go inactive and no longer count + uint first = subgroupBroadcastFirst(cluster_offset); + // update the mask for thread that match this cluster + mask = subgroupBallot(first == cluster_offset); + if (first == cluster_offset) { + // This thread belongs to the group of threads that match this offset, + // so exit the loop. + break; + } + } + + cluster_thread_group_index = subgroupBallotExclusiveBitCount(mask); + + if (cluster_thread_group_index == 0) { + atomicOr(cluster_render.data[usage_write_offset], usage_write_bit); + } + } +#else + if (!gl_HelperInvocation) { + atomicOr(cluster_render.data[usage_write_offset], usage_write_bit); + } +#endif + //find the current element in the depth usage list and mark the current depth as used + float unit_depth = depth_interp * state.inv_z_far; + + uint z_bit = clamp(uint(floor(unit_depth * 32.0)), 0, 31); + + uint z_write_offset = cluster_offset + state.cluster_depth_offset + element_index; + uint z_write_bit = 1 << z_bit; + +#ifdef USE_SUBGROUPS + if (!gl_HelperInvocation) { + z_write_bit = subgroupOr(z_write_bit); //merge all Zs + if (cluster_thread_group_index == 0) { + atomicOr(cluster_render.data[z_write_offset], z_write_bit); + } + } +#else + if (!gl_HelperInvocation) { + atomicOr(cluster_render.data[z_write_offset], z_write_bit); + } +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/cluster_store.glsl b/servers/rendering/renderer_rd/shaders/cluster_store.glsl new file mode 100644 index 0000000000..5be0893c4f --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/cluster_store.glsl @@ -0,0 +1,119 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(push_constant, binding = 0, std430) uniform Params { + uint cluster_render_data_size; // how much data for a single cluster takes + uint max_render_element_count_div_32; //divided by 32 + uvec2 cluster_screen_size; + uint render_element_count_div_32; //divided by 32 + + uint max_cluster_element_count_div_32; //divided by 32 + uint pad1; + uint pad2; +} +params; + +layout(set = 0, binding = 1, std430) buffer restrict readonly ClusterRender { + uint data[]; +} +cluster_render; + +layout(set = 0, binding = 2, std430) buffer restrict ClusterStore { + uint data[]; +} +cluster_store; + +struct RenderElement { + uint type; //0-4 + bool touches_near; + bool touches_far; + uint original_index; + mat3x4 transform_inv; + vec3 scale; + uint pad; +}; + +layout(set = 0, binding = 3, std430) buffer restrict readonly RenderElements { + RenderElement data[]; +} +render_elements; + +void main() { + uvec2 pos = gl_GlobalInvocationID.xy; + if (any(greaterThanEqual(pos, params.cluster_screen_size))) { + return; + } + + //counter for each type of render_element + + //base offset for this cluster + uint base_offset = (pos.x + params.cluster_screen_size.x * pos.y); + uint src_offset = base_offset * params.cluster_render_data_size; + + uint render_element_offset = 0; + + //check all render_elements and see which one was written to + while (render_element_offset < params.render_element_count_div_32) { + uint bits = cluster_render.data[src_offset + render_element_offset]; + while (bits != 0) { + //if bits exist, check the render_element + uint index_bit = findLSB(bits); + uint index = render_element_offset * 32 + index_bit; + uint type = render_elements.data[index].type; + + uint z_range_offset = src_offset + params.max_render_element_count_div_32 + index; + uint z_range = cluster_render.data[z_range_offset]; + + //if object was written, z was written, but check just in case + if (z_range != 0) { //should always be > 0 + + uint from_z = findLSB(z_range); + uint to_z = findMSB(z_range) + 1; + + if (render_elements.data[index].touches_near) { + from_z = 0; + } + + if (render_elements.data[index].touches_far) { + to_z = 32; + } + + // find cluster offset in the buffer used for indexing in the renderer + uint dst_offset = (base_offset + type * (params.cluster_screen_size.x * params.cluster_screen_size.y)) * (params.max_cluster_element_count_div_32 + 32); + + uint orig_index = render_elements.data[index].original_index; + //store this index in the Z slices by setting the relevant bit + for (uint i = from_z; i < to_z; i++) { + uint slice_ofs = dst_offset + params.max_cluster_element_count_div_32 + i; + + uint minmax = cluster_store.data[slice_ofs]; + + if (minmax == 0) { + minmax = 0xFFFF; //min 0, max 0xFFFF + } + + uint elem_min = min(orig_index, minmax & 0xFFFF); + uint elem_max = max(orig_index + 1, minmax >> 16); //always store plus one, so zero means range is empty when not written to + + minmax = elem_min | (elem_max << 16); + cluster_store.data[slice_ofs] = minmax; + } + + uint store_word = orig_index >> 5; + uint store_bit = orig_index & 0x1F; + + //store the actual render_element index at the end, so the rendering code can reference it + cluster_store.data[dst_offset + store_word] |= 1 << store_bit; + } + + bits &= ~(1 << index_bit); //clear the bit to continue iterating + } + + render_element_offset++; + } +} diff --git a/servers/rendering/renderer_rd/shaders/copy.glsl b/servers/rendering/renderer_rd/shaders/copy.glsl new file mode 100644 index 0000000000..cdd35dfb3f --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/copy.glsl @@ -0,0 +1,279 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#define FLAG_HORIZONTAL (1 << 0) +#define FLAG_USE_BLUR_SECTION (1 << 1) +#define FLAG_USE_ORTHOGONAL_PROJECTION (1 << 2) +#define FLAG_DOF_NEAR_FIRST_TAP (1 << 3) +#define FLAG_GLOW_FIRST_PASS (1 << 4) +#define FLAG_FLIP_Y (1 << 5) +#define FLAG_FORCE_LUMINANCE (1 << 6) +#define FLAG_COPY_ALL_SOURCE (1 << 7) +#define FLAG_HIGH_QUALITY_GLOW (1 << 8) +#define FLAG_ALPHA_TO_ONE (1 << 9) + +layout(push_constant, binding = 1, std430) uniform Params { + ivec4 section; + ivec2 target; + uint flags; + uint pad; + // Glow. + float glow_strength; + float glow_bloom; + float glow_hdr_threshold; + float glow_hdr_scale; + + float glow_exposure; + float glow_white; + float glow_luminance_cap; + float glow_auto_exposure_grey; + // DOF. + float camera_z_far; + float camera_z_near; + uint pad2[2]; + + vec4 set_color; +} +params; + +#ifdef MODE_CUBEMAP_ARRAY_TO_PANORAMA +layout(set = 0, binding = 0) uniform samplerCubeArray source_color; +#elif defined(MODE_CUBEMAP_TO_PANORAMA) +layout(set = 0, binding = 0) uniform samplerCube source_color; +#elif !defined(MODE_SET_COLOR) +layout(set = 0, binding = 0) uniform sampler2D source_color; +#endif + +#ifdef GLOW_USE_AUTO_EXPOSURE +layout(set = 1, binding = 0) uniform sampler2D source_auto_exposure; +#endif + +#if defined(MODE_LINEARIZE_DEPTH_COPY) || defined(MODE_SIMPLE_COPY_DEPTH) +layout(r32f, set = 3, binding = 0) uniform restrict writeonly image2D dest_buffer; +#elif defined(DST_IMAGE_8BIT) +layout(rgba8, set = 3, binding = 0) uniform restrict writeonly image2D dest_buffer; +#else +layout(rgba32f, set = 3, binding = 0) uniform restrict writeonly image2D dest_buffer; +#endif + +#ifdef MODE_GAUSSIAN_GLOW +shared vec4 local_cache[256]; +shared vec4 temp_cache[128]; +#endif + +void main() { + // Pixel being shaded + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + +#ifndef MODE_GAUSSIAN_GLOW // Glow needs the extra threads + if (any(greaterThanEqual(pos, params.section.zw))) { //too large, do nothing + return; + } +#endif + +#ifdef MODE_MIPMAP + + ivec2 base_pos = (pos + params.section.xy) << 1; + vec4 color = texelFetch(source_color, base_pos, 0); + color += texelFetch(source_color, base_pos + ivec2(0, 1), 0); + color += texelFetch(source_color, base_pos + ivec2(1, 0), 0); + color += texelFetch(source_color, base_pos + ivec2(1, 1), 0); + color /= 4.0; + + imageStore(dest_buffer, pos + params.target, color); +#endif + +#ifdef MODE_GAUSSIAN_BLUR + + //Simpler blur uses SIGMA2 for the gaussian kernel for a stronger effect + + if (bool(params.flags & FLAG_HORIZONTAL)) { + ivec2 base_pos = (pos + params.section.xy) << 1; + vec4 color = texelFetch(source_color, base_pos + ivec2(0, 0), 0) * 0.214607; + color += texelFetch(source_color, base_pos + ivec2(1, 0), 0) * 0.189879; + color += texelFetch(source_color, base_pos + ivec2(2, 0), 0) * 0.131514; + color += texelFetch(source_color, base_pos + ivec2(3, 0), 0) * 0.071303; + color += texelFetch(source_color, base_pos + ivec2(-1, 0), 0) * 0.189879; + color += texelFetch(source_color, base_pos + ivec2(-2, 0), 0) * 0.131514; + color += texelFetch(source_color, base_pos + ivec2(-3, 0), 0) * 0.071303; + imageStore(dest_buffer, pos + params.target, color); + } else { + ivec2 base_pos = (pos + params.section.xy); + vec4 color = texelFetch(source_color, base_pos + ivec2(0, 0), 0) * 0.38774; + color += texelFetch(source_color, base_pos + ivec2(0, 1), 0) * 0.24477; + color += texelFetch(source_color, base_pos + ivec2(0, 2), 0) * 0.06136; + color += texelFetch(source_color, base_pos + ivec2(0, -1), 0) * 0.24477; + color += texelFetch(source_color, base_pos + ivec2(0, -2), 0) * 0.06136; + imageStore(dest_buffer, pos + params.target, color); + } +#endif + +#ifdef MODE_GAUSSIAN_GLOW + + // First pass copy texture into 16x16 local memory for every 8x8 thread block + vec2 quad_center_uv = clamp(vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 3.5) / params.section.zw, vec2(0.5 / params.section.zw), vec2(1.0 - 1.5 / params.section.zw)); + uint dest_index = gl_LocalInvocationID.x * 2 + gl_LocalInvocationID.y * 2 * 16; + + if (bool(params.flags & FLAG_HIGH_QUALITY_GLOW)) { + vec2 quad_offset_uv = clamp((vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 3.0)) / params.section.zw, vec2(0.5 / params.section.zw), vec2(1.0 - 1.5 / params.section.zw)); + + local_cache[dest_index] = (textureLod(source_color, quad_center_uv, 0) + textureLod(source_color, quad_offset_uv, 0)) * 0.5; + local_cache[dest_index + 1] = (textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.z, 0.0), 0) + textureLod(source_color, quad_offset_uv + vec2(1.0 / params.section.z, 0.0), 0)) * 0.5; + local_cache[dest_index + 16] = (textureLod(source_color, quad_center_uv + vec2(0.0, 1.0 / params.section.w), 0) + textureLod(source_color, quad_offset_uv + vec2(0.0, 1.0 / params.section.w), 0)) * 0.5; + local_cache[dest_index + 16 + 1] = (textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.zw), 0) + textureLod(source_color, quad_offset_uv + vec2(1.0 / params.section.zw), 0)) * 0.5; + } else { + local_cache[dest_index] = textureLod(source_color, quad_center_uv, 0); + local_cache[dest_index + 1] = textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.z, 0.0), 0); + local_cache[dest_index + 16] = textureLod(source_color, quad_center_uv + vec2(0.0, 1.0 / params.section.w), 0); + local_cache[dest_index + 16 + 1] = textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.zw), 0); + } + + memoryBarrierShared(); + barrier(); + + // Horizontal pass. Needs to copy into 8x16 chunk of local memory so vertical pass has full resolution + uint read_index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 32 + 4; + vec4 color_top = vec4(0.0); + color_top += local_cache[read_index] * 0.174938; + color_top += local_cache[read_index + 1] * 0.165569; + color_top += local_cache[read_index + 2] * 0.140367; + color_top += local_cache[read_index + 3] * 0.106595; + color_top += local_cache[read_index - 1] * 0.165569; + color_top += local_cache[read_index - 2] * 0.140367; + color_top += local_cache[read_index - 3] * 0.106595; + + vec4 color_bottom = vec4(0.0); + color_bottom += local_cache[read_index + 16] * 0.174938; + color_bottom += local_cache[read_index + 1 + 16] * 0.165569; + color_bottom += local_cache[read_index + 2 + 16] * 0.140367; + color_bottom += local_cache[read_index + 3 + 16] * 0.106595; + color_bottom += local_cache[read_index - 1 + 16] * 0.165569; + color_bottom += local_cache[read_index - 2 + 16] * 0.140367; + color_bottom += local_cache[read_index - 3 + 16] * 0.106595; + + // rotate samples to take advantage of cache coherency + uint write_index = gl_LocalInvocationID.y * 2 + gl_LocalInvocationID.x * 16; + + temp_cache[write_index] = color_top; + temp_cache[write_index + 1] = color_bottom; + + memoryBarrierShared(); + barrier(); + + // Vertical pass + uint index = gl_LocalInvocationID.y + gl_LocalInvocationID.x * 16 + 4; + vec4 color = vec4(0.0); + + color += temp_cache[index] * 0.174938; + color += temp_cache[index + 1] * 0.165569; + color += temp_cache[index + 2] * 0.140367; + color += temp_cache[index + 3] * 0.106595; + color += temp_cache[index - 1] * 0.165569; + color += temp_cache[index - 2] * 0.140367; + color += temp_cache[index - 3] * 0.106595; + + color *= params.glow_strength; + + if (bool(params.flags & FLAG_GLOW_FIRST_PASS)) { +#ifdef GLOW_USE_AUTO_EXPOSURE + + color /= texelFetch(source_auto_exposure, ivec2(0, 0), 0).r / params.glow_auto_exposure_grey; +#endif + color *= params.glow_exposure; + + float luminance = max(color.r, max(color.g, color.b)); + float feedback = max(smoothstep(params.glow_hdr_threshold, params.glow_hdr_threshold + params.glow_hdr_scale, luminance), params.glow_bloom); + + color = min(color * feedback, vec4(params.glow_luminance_cap)); + } + + imageStore(dest_buffer, pos + params.target, color); + +#endif + +#ifdef MODE_SIMPLE_COPY + + vec4 color; + if (bool(params.flags & FLAG_COPY_ALL_SOURCE)) { + vec2 uv = vec2(pos) / vec2(params.section.zw); + if (bool(params.flags & FLAG_FLIP_Y)) { + uv.y = 1.0 - uv.y; + } + color = textureLod(source_color, uv, 0.0); + + } else { + color = texelFetch(source_color, pos + params.section.xy, 0); + + if (bool(params.flags & FLAG_FLIP_Y)) { + pos.y = params.section.w - pos.y - 1; + } + } + + if (bool(params.flags & FLAG_FORCE_LUMINANCE)) { + color.rgb = vec3(max(max(color.r, color.g), color.b)); + } + + if (bool(params.flags & FLAG_ALPHA_TO_ONE)) { + color.a = 1.0; + } + + imageStore(dest_buffer, pos + params.target, color); + +#endif + +#ifdef MODE_SIMPLE_COPY_DEPTH + + vec4 color = texelFetch(source_color, pos + params.section.xy, 0); + + if (bool(params.flags & FLAG_FLIP_Y)) { + pos.y = params.section.w - pos.y - 1; + } + + imageStore(dest_buffer, pos + params.target, vec4(color.r)); + +#endif + +#ifdef MODE_LINEARIZE_DEPTH_COPY + + float depth = texelFetch(source_color, pos + params.section.xy, 0).r; + depth = depth * 2.0 - 1.0; + depth = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - depth * (params.camera_z_far - params.camera_z_near)); + vec4 color = vec4(depth / params.camera_z_far); + + if (bool(params.flags & FLAG_FLIP_Y)) { + pos.y = params.section.w - pos.y - 1; + } + + imageStore(dest_buffer, pos + params.target, color); +#endif + +#if defined(MODE_CUBEMAP_TO_PANORAMA) || defined(MODE_CUBEMAP_ARRAY_TO_PANORAMA) + + const float PI = 3.14159265359; + vec2 uv = vec2(pos) / vec2(params.section.zw); + uv.y = 1.0 - uv.y; + float phi = uv.x * 2.0 * PI; + float theta = uv.y * PI; + + vec3 normal; + normal.x = sin(phi) * sin(theta) * -1.0; + normal.y = cos(theta); + normal.z = cos(phi) * sin(theta) * -1.0; + +#ifdef MODE_CUBEMAP_TO_PANORAMA + vec4 color = textureLod(source_color, normal, params.camera_z_far); //the biggest the lod the least the acne +#else + vec4 color = textureLod(source_color, vec4(normal, params.camera_z_far), 0.0); //the biggest the lod the least the acne +#endif + imageStore(dest_buffer, pos + params.target, color); +#endif + +#ifdef MODE_SET_COLOR + imageStore(dest_buffer, pos + params.target, params.set_color); +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/copy_to_fb.glsl b/servers/rendering/renderer_rd/shaders/copy_to_fb.glsl new file mode 100644 index 0000000000..9751e13b4e --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/copy_to_fb.glsl @@ -0,0 +1,115 @@ +#[vertex] + +#version 450 + +VERSION_DEFINES + +layout(location = 0) out vec2 uv_interp; + +layout(push_constant, binding = 1, std430) uniform Params { + vec4 section; + vec2 pixel_size; + bool flip_y; + bool use_section; + + bool force_luminance; + uint pad[3]; +} +params; + +void main() { + vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0)); + uv_interp = base_arr[gl_VertexIndex]; + + vec2 vpos = uv_interp; + if (params.use_section) { + vpos = params.section.xy + vpos * params.section.zw; + } + + gl_Position = vec4(vpos * 2.0 - 1.0, 0.0, 1.0); + + if (params.flip_y) { + uv_interp.y = 1.0 - uv_interp.y; + } +} + +#[fragment] + +#version 450 + +VERSION_DEFINES + +layout(push_constant, binding = 1, std430) uniform Params { + vec4 section; + vec2 pixel_size; + bool flip_y; + bool use_section; + + bool force_luminance; + bool alpha_to_zero; + bool srgb; + uint pad; +} +params; + +layout(location = 0) in vec2 uv_interp; + +layout(set = 0, binding = 0) uniform sampler2D source_color; +#ifdef MODE_TWO_SOURCES +layout(set = 1, binding = 0) uniform sampler2D source_color2; +#endif +layout(location = 0) out vec4 frag_color; + +vec3 linear_to_srgb(vec3 color) { + //if going to srgb, clamp from 0 to 1. + color = clamp(color, vec3(0.0), vec3(1.0)); + const vec3 a = vec3(0.055f); + return mix((vec3(1.0f) + a) * pow(color.rgb, vec3(1.0f / 2.4f)) - a, 12.92f * color.rgb, lessThan(color.rgb, vec3(0.0031308f))); +} + +void main() { + vec2 uv = uv_interp; + +#ifdef MODE_PANORAMA_TO_DP + + //obtain normal from dual paraboloid uv +#define M_PI 3.14159265359 + + float side; + uv.y = modf(uv.y * 2.0, side); + side = side * 2.0 - 1.0; + vec3 normal = vec3(uv * 2.0 - 1.0, 0.0); + normal.z = 0.5 - 0.5 * ((normal.x * normal.x) + (normal.y * normal.y)); + normal *= -side; + normal = normalize(normal); + + //now convert normal to panorama uv + + vec2 st = vec2(atan(normal.x, normal.z), acos(normal.y)); + + if (st.x < 0.0) { + st.x += M_PI * 2.0; + } + + uv = st / vec2(M_PI * 2.0, M_PI); + + if (side < 0.0) { + //uv.y = 1.0 - uv.y; + uv = 1.0 - uv; + } +#endif + vec4 color = textureLod(source_color, uv, 0.0); +#ifdef MODE_TWO_SOURCES + color += textureLod(source_color2, uv, 0.0); +#endif + if (params.force_luminance) { + color.rgb = vec3(max(max(color.r, color.g), color.b)); + } + if (params.alpha_to_zero) { + color.rgb *= color.a; + } + if (params.srgb) { + color.rgb = linear_to_srgb(color.rgb); + } + frag_color = color; +} diff --git a/servers/rendering/renderer_rd/shaders/cube_to_dp.glsl b/servers/rendering/renderer_rd/shaders/cube_to_dp.glsl new file mode 100644 index 0000000000..c3ac0bee57 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/cube_to_dp.glsl @@ -0,0 +1,84 @@ +#[vertex] + +#version 450 + +VERSION_DEFINES + +layout(push_constant, binding = 1, std430) uniform Params { + float z_far; + float z_near; + bool z_flip; + uint pad; + vec4 screen_rect; +} +params; + +layout(location = 0) out vec2 uv_interp; + +void main() { + vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0)); + uv_interp = base_arr[gl_VertexIndex]; + vec2 screen_pos = uv_interp * params.screen_rect.zw + params.screen_rect.xy; + gl_Position = vec4(screen_pos * 2.0 - 1.0, 0.0, 1.0); +} + +#[fragment] + +#version 450 + +VERSION_DEFINES + +layout(location = 0) in vec2 uv_interp; + +layout(set = 0, binding = 0) uniform samplerCube source_cube; + +layout(push_constant, binding = 1, std430) uniform Params { + float z_far; + float z_near; + bool z_flip; + uint pad; + vec4 screen_rect; +} +params; + +void main() { + vec2 uv = uv_interp; + + vec3 normal = vec3(uv * 2.0 - 1.0, 0.0); + + normal.z = 0.5 - 0.5 * ((normal.x * normal.x) + (normal.y * normal.y)); + normal = normalize(normal); + + normal.y = -normal.y; //needs to be flipped to match projection matrix + if (!params.z_flip) { + normal.z = -normal.z; + } + + float depth = texture(source_cube, normal).r; + + // absolute values for direction cosines, bigger value equals closer to basis axis + vec3 unorm = abs(normal); + + if ((unorm.x >= unorm.y) && (unorm.x >= unorm.z)) { + // x code + unorm = normal.x > 0.0 ? vec3(1.0, 0.0, 0.0) : vec3(-1.0, 0.0, 0.0); + } else if ((unorm.y > unorm.x) && (unorm.y >= unorm.z)) { + // y code + unorm = normal.y > 0.0 ? vec3(0.0, 1.0, 0.0) : vec3(0.0, -1.0, 0.0); + } else if ((unorm.z > unorm.x) && (unorm.z > unorm.y)) { + // z code + unorm = normal.z > 0.0 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 0.0, -1.0); + } else { + // oh-no we messed up code + // has to be + unorm = vec3(1.0, 0.0, 0.0); + } + + float depth_fix = 1.0 / dot(normal, unorm); + + depth = 2.0 * depth - 1.0; + float linear_depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near)); + depth = (linear_depth * depth_fix) / params.z_far; + + gl_FragDepth = depth; +} diff --git a/servers/rendering/renderer_rd/shaders/cubemap_downsampler.glsl b/servers/rendering/renderer_rd/shaders/cubemap_downsampler.glsl new file mode 100644 index 0000000000..7f269b7af3 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/cubemap_downsampler.glsl @@ -0,0 +1,191 @@ +// Copyright 2016 Activision Publishing, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#[compute] + +#version 450 + +VERSION_DEFINES + +#define BLOCK_SIZE 8 + +layout(local_size_x = BLOCK_SIZE, local_size_y = BLOCK_SIZE, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform samplerCube source_cubemap; + +layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly imageCube dest_cubemap; + +layout(push_constant, binding = 1, std430) uniform Params { + uint face_size; +} +params; + +#define M_PI 3.14159265359 + +void get_dir_0(out vec3 dir, in float u, in float v) { + dir[0] = 1.0; + dir[1] = v; + dir[2] = -u; +} + +void get_dir_1(out vec3 dir, in float u, in float v) { + dir[0] = -1.0; + dir[1] = v; + dir[2] = u; +} + +void get_dir_2(out vec3 dir, in float u, in float v) { + dir[0] = u; + dir[1] = 1.0; + dir[2] = -v; +} + +void get_dir_3(out vec3 dir, in float u, in float v) { + dir[0] = u; + dir[1] = -1.0; + dir[2] = v; +} + +void get_dir_4(out vec3 dir, in float u, in float v) { + dir[0] = u; + dir[1] = v; + dir[2] = 1.0; +} + +void get_dir_5(out vec3 dir, in float u, in float v) { + dir[0] = -u; + dir[1] = v; + dir[2] = -1.0; +} + +float calcWeight(float u, float v) { + float val = u * u + v * v + 1.0; + return val * sqrt(val); +} + +void main() { + uvec3 id = gl_GlobalInvocationID; + uint face_size = params.face_size; + + if (id.x < face_size && id.y < face_size) { + float inv_face_size = 1.0 / float(face_size); + + float u0 = (float(id.x) * 2.0 + 1.0 - 0.75) * inv_face_size - 1.0; + float u1 = (float(id.x) * 2.0 + 1.0 + 0.75) * inv_face_size - 1.0; + + float v0 = (float(id.y) * 2.0 + 1.0 - 0.75) * -inv_face_size + 1.0; + float v1 = (float(id.y) * 2.0 + 1.0 + 0.75) * -inv_face_size + 1.0; + + float weights[4]; + weights[0] = calcWeight(u0, v0); + weights[1] = calcWeight(u1, v0); + weights[2] = calcWeight(u0, v1); + weights[3] = calcWeight(u1, v1); + + const float wsum = 0.5 / (weights[0] + weights[1] + weights[2] + weights[3]); + for (int i = 0; i < 4; i++) { + weights[i] = weights[i] * wsum + .125; + } + + vec3 dir; + vec4 color; + switch (id.z) { + case 0: + get_dir_0(dir, u0, v0); + color = textureLod(source_cubemap, normalize(dir), 0.0) * weights[0]; + + get_dir_0(dir, u1, v0); + color += textureLod(source_cubemap, normalize(dir), 0.0) * weights[1]; + + get_dir_0(dir, u0, v1); + color += textureLod(source_cubemap, normalize(dir), 0.0) * weights[2]; + + get_dir_0(dir, u1, v1); + color += textureLod(source_cubemap, normalize(dir), 0.0) * weights[3]; + break; + case 1: + get_dir_1(dir, u0, v0); + color = textureLod(source_cubemap, normalize(dir), 0.0) * weights[0]; + + get_dir_1(dir, u1, v0); + color += textureLod(source_cubemap, normalize(dir), 0.0) * weights[1]; + + get_dir_1(dir, u0, v1); + color += textureLod(source_cubemap, normalize(dir), 0.0) * weights[2]; + + get_dir_1(dir, u1, v1); + color += textureLod(source_cubemap, normalize(dir), 0.0) * weights[3]; + break; + case 2: + get_dir_2(dir, u0, v0); + color = textureLod(source_cubemap, normalize(dir), 0.0) * weights[0]; + + get_dir_2(dir, u1, v0); + color += textureLod(source_cubemap, normalize(dir), 0.0) * weights[1]; + + get_dir_2(dir, u0, v1); + color += textureLod(source_cubemap, normalize(dir), 0.0) * weights[2]; + + get_dir_2(dir, u1, v1); + color += textureLod(source_cubemap, normalize(dir), 0.0) * weights[3]; + break; + case 3: + get_dir_3(dir, u0, v0); + color = textureLod(source_cubemap, normalize(dir), 0.0) * weights[0]; + + get_dir_3(dir, u1, v0); + color += textureLod(source_cubemap, normalize(dir), 0.0) * weights[1]; + + get_dir_3(dir, u0, v1); + color += textureLod(source_cubemap, normalize(dir), 0.0) * weights[2]; + + get_dir_3(dir, u1, v1); + color += textureLod(source_cubemap, normalize(dir), 0.0) * weights[3]; + break; + case 4: + get_dir_4(dir, u0, v0); + color = textureLod(source_cubemap, normalize(dir), 0.0) * weights[0]; + + get_dir_4(dir, u1, v0); + color += textureLod(source_cubemap, normalize(dir), 0.0) * weights[1]; + + get_dir_4(dir, u0, v1); + color += textureLod(source_cubemap, normalize(dir), 0.0) * weights[2]; + + get_dir_4(dir, u1, v1); + color += textureLod(source_cubemap, normalize(dir), 0.0) * weights[3]; + break; + default: + get_dir_5(dir, u0, v0); + color = textureLod(source_cubemap, normalize(dir), 0.0) * weights[0]; + + get_dir_5(dir, u1, v0); + color += textureLod(source_cubemap, normalize(dir), 0.0) * weights[1]; + + get_dir_5(dir, u0, v1); + color += textureLod(source_cubemap, normalize(dir), 0.0) * weights[2]; + + get_dir_5(dir, u1, v1); + color += textureLod(source_cubemap, normalize(dir), 0.0) * weights[3]; + break; + } + imageStore(dest_cubemap, ivec3(id), color); + } +} diff --git a/servers/rendering/renderer_rd/shaders/cubemap_filter.glsl b/servers/rendering/renderer_rd/shaders/cubemap_filter.glsl new file mode 100644 index 0000000000..987545fb76 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/cubemap_filter.glsl @@ -0,0 +1,326 @@ +// Copyright 2016 Activision Publishing, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#[compute] + +#version 450 + +VERSION_DEFINES + +#define GROUP_SIZE 64 + +layout(local_size_x = GROUP_SIZE, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform samplerCube source_cubemap; +layout(rgba16f, set = 2, binding = 0) uniform restrict writeonly imageCube dest_cubemap0; +layout(rgba16f, set = 2, binding = 1) uniform restrict writeonly imageCube dest_cubemap1; +layout(rgba16f, set = 2, binding = 2) uniform restrict writeonly imageCube dest_cubemap2; +layout(rgba16f, set = 2, binding = 3) uniform restrict writeonly imageCube dest_cubemap3; +layout(rgba16f, set = 2, binding = 4) uniform restrict writeonly imageCube dest_cubemap4; +layout(rgba16f, set = 2, binding = 5) uniform restrict writeonly imageCube dest_cubemap5; +layout(rgba16f, set = 2, binding = 6) uniform restrict writeonly imageCube dest_cubemap6; + +#ifdef USE_HIGH_QUALITY +#define NUM_TAPS 32 +#else +#define NUM_TAPS 8 +#endif + +#define BASE_RESOLUTION 128 + +#ifdef USE_HIGH_QUALITY +layout(set = 1, binding = 0, std430) buffer restrict readonly Data { + vec4[7][5][3][24] coeffs; +} +data; +#else +layout(set = 1, binding = 0, std430) buffer restrict readonly Data { + vec4[7][5][6] coeffs; +} +data; +#endif + +void get_dir(out vec3 dir, in vec2 uv, in uint face) { + switch (face) { + case 0: + dir = vec3(1.0, uv[1], -uv[0]); + break; + case 1: + dir = vec3(-1.0, uv[1], uv[0]); + break; + case 2: + dir = vec3(uv[0], 1.0, -uv[1]); + break; + case 3: + dir = vec3(uv[0], -1.0, uv[1]); + break; + case 4: + dir = vec3(uv[0], uv[1], 1.0); + break; + default: + dir = vec3(-uv[0], uv[1], -1.0); + break; + } +} + +void main() { + // INPUT: + // id.x = the linear address of the texel (ignoring face) + // id.y = the face + // -> use to index output texture + // id.x = texel x + // id.y = texel y + // id.z = face + uvec3 id = gl_GlobalInvocationID; + + // determine which texel this is +#ifndef USE_TEXTURE_ARRAY + // NOTE (macOS/MoltenVK): Do not rename, "level" variable name conflicts with the Metal "level(float lod)" mipmap sampling function name. + int mip_level = 0; + if (id.x < (128 * 128)) { + mip_level = 0; + } else if (id.x < (128 * 128 + 64 * 64)) { + mip_level = 1; + id.x -= (128 * 128); + } else if (id.x < (128 * 128 + 64 * 64 + 32 * 32)) { + mip_level = 2; + id.x -= (128 * 128 + 64 * 64); + } else if (id.x < (128 * 128 + 64 * 64 + 32 * 32 + 16 * 16)) { + mip_level = 3; + id.x -= (128 * 128 + 64 * 64 + 32 * 32); + } else if (id.x < (128 * 128 + 64 * 64 + 32 * 32 + 16 * 16 + 8 * 8)) { + mip_level = 4; + id.x -= (128 * 128 + 64 * 64 + 32 * 32 + 16 * 16); + } else if (id.x < (128 * 128 + 64 * 64 + 32 * 32 + 16 * 16 + 8 * 8 + 4 * 4)) { + mip_level = 5; + id.x -= (128 * 128 + 64 * 64 + 32 * 32 + 16 * 16 + 8 * 8); + } else if (id.x < (128 * 128 + 64 * 64 + 32 * 32 + 16 * 16 + 8 * 8 + 4 * 4 + 2 * 2)) { + mip_level = 6; + id.x -= (128 * 128 + 64 * 64 + 32 * 32 + 16 * 16 + 8 * 8 + 4 * 4); + } else { + return; + } + int res = BASE_RESOLUTION >> mip_level; +#else // Using Texture Arrays so all levels are the same resolution + int res = BASE_RESOLUTION; + int mip_level = int(id.x / (BASE_RESOLUTION * BASE_RESOLUTION)); + id.x -= mip_level * BASE_RESOLUTION * BASE_RESOLUTION; +#endif + + // determine dir / pos for the texel + vec3 dir, adir, frameZ; + { + id.z = id.y; + id.y = id.x / res; + id.x -= id.y * res; + + vec2 uv; + uv.x = (float(id.x) * 2.0 + 1.0) / float(res) - 1.0; + uv.y = -(float(id.y) * 2.0 + 1.0) / float(res) + 1.0; + + get_dir(dir, uv, id.z); + frameZ = normalize(dir); + + adir = abs(dir); + } + + // GGX gather colors + vec4 color = vec4(0.0); + for (int axis = 0; axis < 3; axis++) { + const int otherAxis0 = 1 - (axis & 1) - (axis >> 1); + const int otherAxis1 = 2 - (axis >> 1); + + float frameweight = (max(adir[otherAxis0], adir[otherAxis1]) - .75) / .25; + if (frameweight > 0.0) { + // determine frame + vec3 UpVector; + switch (axis) { + case 0: + UpVector = vec3(1, 0, 0); + break; + case 1: + UpVector = vec3(0, 1, 0); + break; + default: + UpVector = vec3(0, 0, 1); + break; + } + + vec3 frameX = normalize(cross(UpVector, frameZ)); + vec3 frameY = cross(frameZ, frameX); + + // calculate parametrization for polynomial + float Nx = dir[otherAxis0]; + float Ny = dir[otherAxis1]; + float Nz = adir[axis]; + + float NmaxXY = max(abs(Ny), abs(Nx)); + Nx /= NmaxXY; + Ny /= NmaxXY; + + float theta; + if (Ny < Nx) { + if (Ny <= -0.999) + theta = Nx; + else + theta = Ny; + } else { + if (Ny >= 0.999) + theta = -Nx; + else + theta = -Ny; + } + + float phi; + if (Nz <= -0.999) + phi = -NmaxXY; + else if (Nz >= 0.999) + phi = NmaxXY; + else + phi = Nz; + + float theta2 = theta * theta; + float phi2 = phi * phi; + + // sample + for (int iSuperTap = 0; iSuperTap < NUM_TAPS / 4; iSuperTap++) { + const int index = (NUM_TAPS / 4) * axis + iSuperTap; + +#ifdef USE_HIGH_QUALITY + vec4 coeffsDir0[3]; + vec4 coeffsDir1[3]; + vec4 coeffsDir2[3]; + vec4 coeffsLevel[3]; + vec4 coeffsWeight[3]; + + for (int iCoeff = 0; iCoeff < 3; iCoeff++) { + coeffsDir0[iCoeff] = data.coeffs[mip_level][0][iCoeff][index]; + coeffsDir1[iCoeff] = data.coeffs[mip_level][1][iCoeff][index]; + coeffsDir2[iCoeff] = data.coeffs[mip_level][2][iCoeff][index]; + coeffsLevel[iCoeff] = data.coeffs[mip_level][3][iCoeff][index]; + coeffsWeight[iCoeff] = data.coeffs[mip_level][4][iCoeff][index]; + } + + for (int iSubTap = 0; iSubTap < 4; iSubTap++) { + // determine sample attributes (dir, weight, mip_level) + vec3 sample_dir = frameX * (coeffsDir0[0][iSubTap] + coeffsDir0[1][iSubTap] * theta2 + coeffsDir0[2][iSubTap] * phi2) + frameY * (coeffsDir1[0][iSubTap] + coeffsDir1[1][iSubTap] * theta2 + coeffsDir1[2][iSubTap] * phi2) + frameZ * (coeffsDir2[0][iSubTap] + coeffsDir2[1][iSubTap] * theta2 + coeffsDir2[2][iSubTap] * phi2); + + float sample_level = coeffsLevel[0][iSubTap] + coeffsLevel[1][iSubTap] * theta2 + coeffsLevel[2][iSubTap] * phi2; + + float sample_weight = coeffsWeight[0][iSubTap] + coeffsWeight[1][iSubTap] * theta2 + coeffsWeight[2][iSubTap] * phi2; +#else + vec4 coeffsDir0 = data.coeffs[mip_level][0][index]; + vec4 coeffsDir1 = data.coeffs[mip_level][1][index]; + vec4 coeffsDir2 = data.coeffs[mip_level][2][index]; + vec4 coeffsLevel = data.coeffs[mip_level][3][index]; + vec4 coeffsWeight = data.coeffs[mip_level][4][index]; + + for (int iSubTap = 0; iSubTap < 4; iSubTap++) { + // determine sample attributes (dir, weight, mip_level) + vec3 sample_dir = frameX * coeffsDir0[iSubTap] + frameY * coeffsDir1[iSubTap] + frameZ * coeffsDir2[iSubTap]; + + float sample_level = coeffsLevel[iSubTap]; + + float sample_weight = coeffsWeight[iSubTap]; +#endif + + sample_weight *= frameweight; + + // adjust for jacobian + sample_dir /= max(abs(sample_dir[0]), max(abs(sample_dir[1]), abs(sample_dir[2]))); + sample_level += 0.75 * log2(dot(sample_dir, sample_dir)); +#ifndef USE_TEXTURE_ARRAY + sample_level += float(mip_level) / 6.0; // Hack to increase the perceived roughness and reduce upscaling artifacts +#endif + // sample cubemap + color.xyz += textureLod(source_cubemap, normalize(sample_dir), sample_level).xyz * sample_weight; + color.w += sample_weight; + } + } + } + } + color /= color.w; + + // write color + color.xyz = max(vec3(0.0), color.xyz); + color.w = 1.0; +#ifdef USE_TEXTURE_ARRAY + id.xy *= uvec2(2, 2); +#endif + + switch (mip_level) { + case 0: + imageStore(dest_cubemap0, ivec3(id), color); +#ifdef USE_TEXTURE_ARRAY + imageStore(dest_cubemap0, ivec3(id) + ivec3(1.0, 0.0, 0.0), color); + imageStore(dest_cubemap0, ivec3(id) + ivec3(0.0, 1.0, 0.0), color); + imageStore(dest_cubemap0, ivec3(id) + ivec3(1.0, 1.0, 0.0), color); +#endif + break; + case 1: + imageStore(dest_cubemap1, ivec3(id), color); +#ifdef USE_TEXTURE_ARRAY + imageStore(dest_cubemap1, ivec3(id) + ivec3(1.0, 0.0, 0.0), color); + imageStore(dest_cubemap1, ivec3(id) + ivec3(0.0, 1.0, 0.0), color); + imageStore(dest_cubemap1, ivec3(id) + ivec3(1.0, 1.0, 0.0), color); +#endif + break; + case 2: + imageStore(dest_cubemap2, ivec3(id), color); +#ifdef USE_TEXTURE_ARRAY + imageStore(dest_cubemap2, ivec3(id) + ivec3(1.0, 0.0, 0.0), color); + imageStore(dest_cubemap2, ivec3(id) + ivec3(0.0, 1.0, 0.0), color); + imageStore(dest_cubemap2, ivec3(id) + ivec3(1.0, 1.0, 0.0), color); +#endif + break; + case 3: + imageStore(dest_cubemap3, ivec3(id), color); +#ifdef USE_TEXTURE_ARRAY + imageStore(dest_cubemap3, ivec3(id) + ivec3(1.0, 0.0, 0.0), color); + imageStore(dest_cubemap3, ivec3(id) + ivec3(0.0, 1.0, 0.0), color); + imageStore(dest_cubemap3, ivec3(id) + ivec3(1.0, 1.0, 0.0), color); +#endif + break; + case 4: + imageStore(dest_cubemap4, ivec3(id), color); +#ifdef USE_TEXTURE_ARRAY + imageStore(dest_cubemap4, ivec3(id) + ivec3(1.0, 0.0, 0.0), color); + imageStore(dest_cubemap4, ivec3(id) + ivec3(0.0, 1.0, 0.0), color); + imageStore(dest_cubemap4, ivec3(id) + ivec3(1.0, 1.0, 0.0), color); +#endif + break; + case 5: + imageStore(dest_cubemap5, ivec3(id), color); +#ifdef USE_TEXTURE_ARRAY + imageStore(dest_cubemap5, ivec3(id) + ivec3(1.0, 0.0, 0.0), color); + imageStore(dest_cubemap5, ivec3(id) + ivec3(0.0, 1.0, 0.0), color); + imageStore(dest_cubemap5, ivec3(id) + ivec3(1.0, 1.0, 0.0), color); +#endif + break; + default: + imageStore(dest_cubemap6, ivec3(id), color); +#ifdef USE_TEXTURE_ARRAY + imageStore(dest_cubemap6, ivec3(id) + ivec3(1.0, 0.0, 0.0), color); + imageStore(dest_cubemap6, ivec3(id) + ivec3(0.0, 1.0, 0.0), color); + imageStore(dest_cubemap6, ivec3(id) + ivec3(1.0, 1.0, 0.0), color); +#endif + break; + } +} diff --git a/servers/rendering/renderer_rd/shaders/cubemap_roughness.glsl b/servers/rendering/renderer_rd/shaders/cubemap_roughness.glsl new file mode 100644 index 0000000000..5cbb00baa4 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/cubemap_roughness.glsl @@ -0,0 +1,142 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +#define GROUP_SIZE 8 + +layout(local_size_x = GROUP_SIZE, local_size_y = GROUP_SIZE, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform samplerCube source_cube; + +layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly imageCube dest_cubemap; + +layout(push_constant, binding = 1, std430) uniform Params { + uint face_id; + uint sample_count; + float roughness; + bool use_direct_write; + float face_size; +} +params; + +#define M_PI 3.14159265359 + +vec3 texelCoordToVec(vec2 uv, uint faceID) { + mat3 faceUvVectors[6]; + + // -x + faceUvVectors[1][0] = vec3(0.0, 0.0, 1.0); // u -> +z + faceUvVectors[1][1] = vec3(0.0, -1.0, 0.0); // v -> -y + faceUvVectors[1][2] = vec3(-1.0, 0.0, 0.0); // -x face + + // +x + faceUvVectors[0][0] = vec3(0.0, 0.0, -1.0); // u -> -z + faceUvVectors[0][1] = vec3(0.0, -1.0, 0.0); // v -> -y + faceUvVectors[0][2] = vec3(1.0, 0.0, 0.0); // +x face + + // -y + faceUvVectors[3][0] = vec3(1.0, 0.0, 0.0); // u -> +x + faceUvVectors[3][1] = vec3(0.0, 0.0, -1.0); // v -> -z + faceUvVectors[3][2] = vec3(0.0, -1.0, 0.0); // -y face + + // +y + faceUvVectors[2][0] = vec3(1.0, 0.0, 0.0); // u -> +x + faceUvVectors[2][1] = vec3(0.0, 0.0, 1.0); // v -> +z + faceUvVectors[2][2] = vec3(0.0, 1.0, 0.0); // +y face + + // -z + faceUvVectors[5][0] = vec3(-1.0, 0.0, 0.0); // u -> -x + faceUvVectors[5][1] = vec3(0.0, -1.0, 0.0); // v -> -y + faceUvVectors[5][2] = vec3(0.0, 0.0, -1.0); // -z face + + // +z + faceUvVectors[4][0] = vec3(1.0, 0.0, 0.0); // u -> +x + faceUvVectors[4][1] = vec3(0.0, -1.0, 0.0); // v -> -y + faceUvVectors[4][2] = vec3(0.0, 0.0, 1.0); // +z face + + // out = u * s_faceUv[0] + v * s_faceUv[1] + s_faceUv[2]. + vec3 result = (faceUvVectors[faceID][0] * uv.x) + (faceUvVectors[faceID][1] * uv.y) + faceUvVectors[faceID][2]; + return normalize(result); +} + +vec3 ImportanceSampleGGX(vec2 Xi, float Roughness, vec3 N) { + float a = Roughness * Roughness; // DISNEY'S ROUGHNESS [see Burley'12 siggraph] + + // Compute distribution direction + float Phi = 2.0 * M_PI * Xi.x; + float CosTheta = sqrt((1.0 - Xi.y) / (1.0 + (a * a - 1.0) * Xi.y)); + float SinTheta = sqrt(1.0 - CosTheta * CosTheta); + + // Convert to spherical direction + vec3 H; + H.x = SinTheta * cos(Phi); + H.y = SinTheta * sin(Phi); + H.z = CosTheta; + + vec3 UpVector = abs(N.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(1.0, 0.0, 0.0); + vec3 TangentX = normalize(cross(UpVector, N)); + vec3 TangentY = cross(N, TangentX); + + // Tangent to world space + return TangentX * H.x + TangentY * H.y + N * H.z; +} + +// http://graphicrants.blogspot.com.au/2013/08/specular-brdf-reference.html +float GGX(float NdotV, float a) { + float k = a / 2.0; + return NdotV / (NdotV * (1.0 - k) + k); +} + +// http://graphicrants.blogspot.com.au/2013/08/specular-brdf-reference.html +float G_Smith(float a, float nDotV, float nDotL) { + return GGX(nDotL, a * a) * GGX(nDotV, a * a); +} + +float radicalInverse_VdC(uint bits) { + bits = (bits << 16u) | (bits >> 16u); + bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u); + bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u); + bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u); + bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u); + return float(bits) * 2.3283064365386963e-10; // / 0x100000000 +} + +vec2 Hammersley(uint i, uint N) { + return vec2(float(i) / float(N), radicalInverse_VdC(i)); +} + +void main() { + uvec3 id = gl_GlobalInvocationID; + id.z += params.face_id; + + vec2 uv = ((vec2(id.xy) * 2.0 + 1.0) / (params.face_size) - 1.0); + vec3 N = texelCoordToVec(uv, id.z); + + //vec4 color = color_interp; + + if (params.use_direct_write) { + imageStore(dest_cubemap, ivec3(id), vec4(texture(source_cube, N).rgb, 1.0)); + } else { + vec4 sum = vec4(0.0, 0.0, 0.0, 0.0); + + for (uint sampleNum = 0u; sampleNum < params.sample_count; sampleNum++) { + vec2 xi = Hammersley(sampleNum, params.sample_count); + + vec3 H = ImportanceSampleGGX(xi, params.roughness, N); + vec3 V = N; + vec3 L = (2.0 * dot(V, H) * H - V); + + float ndotl = clamp(dot(N, L), 0.0, 1.0); + + if (ndotl > 0.0) { + sum.rgb += textureLod(source_cube, L, 0.0).rgb * ndotl; + sum.a += ndotl; + } + } + sum /= sum.a; + + imageStore(dest_cubemap, ivec3(id), vec4(sum.rgb, 1.0)); + } +} diff --git a/servers/rendering/renderer_rd/shaders/gi.glsl b/servers/rendering/renderer_rd/shaders/gi.glsl new file mode 100644 index 0000000000..92a5682572 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/gi.glsl @@ -0,0 +1,677 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#define M_PI 3.141592 + +#define SDFGI_MAX_CASCADES 8 + +//set 0 for SDFGI and render buffers + +layout(set = 0, binding = 1) uniform texture3D sdf_cascades[SDFGI_MAX_CASCADES]; +layout(set = 0, binding = 2) uniform texture3D light_cascades[SDFGI_MAX_CASCADES]; +layout(set = 0, binding = 3) uniform texture3D aniso0_cascades[SDFGI_MAX_CASCADES]; +layout(set = 0, binding = 4) uniform texture3D aniso1_cascades[SDFGI_MAX_CASCADES]; +layout(set = 0, binding = 5) uniform texture3D occlusion_texture; + +layout(set = 0, binding = 6) uniform sampler linear_sampler; +layout(set = 0, binding = 7) uniform sampler linear_sampler_with_mipmaps; + +struct ProbeCascadeData { + vec3 position; + float to_probe; + ivec3 probe_world_offset; + float to_cell; // 1/bounds * grid_size +}; + +layout(rgba16f, set = 0, binding = 9) uniform restrict writeonly image2D ambient_buffer; +layout(rgba16f, set = 0, binding = 10) uniform restrict writeonly image2D reflection_buffer; + +layout(set = 0, binding = 11) uniform texture2DArray lightprobe_texture; + +layout(set = 0, binding = 12) uniform texture2D depth_buffer; +layout(set = 0, binding = 13) uniform texture2D normal_roughness_buffer; +layout(set = 0, binding = 14) uniform utexture2D giprobe_buffer; + +layout(set = 0, binding = 15, std140) uniform SDFGI { + vec3 grid_size; + uint max_cascades; + + bool use_occlusion; + int probe_axis_size; + float probe_to_uvw; + float normal_bias; + + vec3 lightprobe_tex_pixel_size; + float energy; + + vec3 lightprobe_uv_offset; + float y_mult; + + vec3 occlusion_clamp; + uint pad3; + + vec3 occlusion_renormalize; + uint pad4; + + vec3 cascade_probe_size; + uint pad5; + + ProbeCascadeData cascades[SDFGI_MAX_CASCADES]; +} +sdfgi; + +#define MAX_GI_PROBES 8 + +struct GIProbeData { + mat4 xform; + vec3 bounds; + float dynamic_range; + + float bias; + float normal_bias; + bool blend_ambient; + uint texture_slot; + + float anisotropy_strength; + float ambient_occlusion; + float ambient_occlusion_size; + uint mipmaps; +}; + +layout(set = 0, binding = 16, std140) uniform GIProbes { + GIProbeData data[MAX_GI_PROBES]; +} +gi_probes; + +layout(set = 0, binding = 17) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; + +layout(push_constant, binding = 0, std430) uniform Params { + ivec2 screen_size; + float z_near; + float z_far; + + vec4 proj_info; + + vec3 ao_color; + uint max_giprobes; + + bool high_quality_vct; + bool orthogonal; + uint pad[2]; + + mat3x4 cam_rotation; +} +params; + +vec2 octahedron_wrap(vec2 v) { + vec2 signVal; + signVal.x = v.x >= 0.0 ? 1.0 : -1.0; + signVal.y = v.y >= 0.0 ? 1.0 : -1.0; + return (1.0 - abs(v.yx)) * signVal; +} + +vec2 octahedron_encode(vec3 n) { + // https://twitter.com/Stubbesaurus/status/937994790553227264 + n /= (abs(n.x) + abs(n.y) + abs(n.z)); + n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy); + n.xy = n.xy * 0.5 + 0.5; + return n.xy; +} + +vec4 blend_color(vec4 src, vec4 dst) { + vec4 res; + float sa = 1.0 - src.a; + res.a = dst.a * sa + src.a; + if (res.a == 0.0) { + res.rgb = vec3(0); + } else { + res.rgb = (dst.rgb * dst.a * sa + src.rgb * src.a) / res.a; + } + return res; +} + +vec3 reconstruct_position(ivec2 screen_pos) { + vec3 pos; + pos.z = texelFetch(sampler2D(depth_buffer, linear_sampler), screen_pos, 0).r; + + pos.z = pos.z * 2.0 - 1.0; + if (params.orthogonal) { + pos.z = ((pos.z + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; + } else { + pos.z = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - pos.z * (params.z_far - params.z_near)); + } + pos.z = -pos.z; + + pos.xy = vec2(screen_pos) * params.proj_info.xy + params.proj_info.zw; + if (!params.orthogonal) { + pos.xy *= pos.z; + } + + return pos; +} + +void sdfgi_probe_process(uint cascade, vec3 cascade_pos, vec3 cam_pos, vec3 cam_normal, vec3 cam_specular_normal, float roughness, out vec3 diffuse_light, out vec3 specular_light) { + cascade_pos += cam_normal * sdfgi.normal_bias; + + vec3 base_pos = floor(cascade_pos); + //cascade_pos += mix(vec3(0.0),vec3(0.01),lessThan(abs(cascade_pos-base_pos),vec3(0.01))) * cam_normal; + ivec3 probe_base_pos = ivec3(base_pos); + + vec4 diffuse_accum = vec4(0.0); + vec3 specular_accum; + + ivec3 tex_pos = ivec3(probe_base_pos.xy, int(cascade)); + tex_pos.x += probe_base_pos.z * sdfgi.probe_axis_size; + tex_pos.xy = tex_pos.xy * (SDFGI_OCT_SIZE + 2) + ivec2(1); + + vec3 diffuse_posf = (vec3(tex_pos) + vec3(octahedron_encode(cam_normal) * float(SDFGI_OCT_SIZE), 0.0)) * sdfgi.lightprobe_tex_pixel_size; + + vec3 specular_posf = (vec3(tex_pos) + vec3(octahedron_encode(cam_specular_normal) * float(SDFGI_OCT_SIZE), 0.0)) * sdfgi.lightprobe_tex_pixel_size; + + specular_accum = vec3(0.0); + + vec4 light_accum = vec4(0.0); + float weight_accum = 0.0; + + for (uint j = 0; j < 8; j++) { + ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); + ivec3 probe_posi = probe_base_pos; + probe_posi += offset; + + // Compute weight + + vec3 probe_pos = vec3(probe_posi); + vec3 probe_to_pos = cascade_pos - probe_pos; + vec3 probe_dir = normalize(-probe_to_pos); + + vec3 trilinear = vec3(1.0) - abs(probe_to_pos); + float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(cam_normal, probe_dir)); + + // Compute lightprobe occlusion + + if (sdfgi.use_occlusion) { + ivec3 occ_indexv = abs((sdfgi.cascades[cascade].probe_world_offset + probe_posi) & ivec3(1, 1, 1)) * ivec3(1, 2, 4); + vec4 occ_mask = mix(vec4(0.0), vec4(1.0), equal(ivec4(occ_indexv.x | occ_indexv.y), ivec4(0, 1, 2, 3))); + + vec3 occ_pos = clamp(cascade_pos, probe_pos - sdfgi.occlusion_clamp, probe_pos + sdfgi.occlusion_clamp) * sdfgi.probe_to_uvw; + occ_pos.z += float(cascade); + if (occ_indexv.z != 0) { //z bit is on, means index is >=4, so make it switch to the other half of textures + occ_pos.x += 1.0; + } + + occ_pos *= sdfgi.occlusion_renormalize; + float occlusion = dot(textureLod(sampler3D(occlusion_texture, linear_sampler), occ_pos, 0.0), occ_mask); + + weight *= max(occlusion, 0.01); + } + + // Compute lightprobe texture position + + vec3 diffuse; + vec3 pos_uvw = diffuse_posf; + pos_uvw.xy += vec2(offset.xy) * sdfgi.lightprobe_uv_offset.xy; + pos_uvw.x += float(offset.z) * sdfgi.lightprobe_uv_offset.z; + diffuse = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb; + + diffuse_accum += vec4(diffuse * weight, weight); + + { + vec3 specular = vec3(0.0); + vec3 pos_uvw = specular_posf; + pos_uvw.xy += vec2(offset.xy) * sdfgi.lightprobe_uv_offset.xy; + pos_uvw.x += float(offset.z) * sdfgi.lightprobe_uv_offset.z; + if (roughness < 0.99) { + specular = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw + vec3(0, 0, float(sdfgi.max_cascades)), 0.0).rgb; + } + if (roughness > 0.2) { + specular = mix(specular, textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb, (roughness - 0.2) * 1.25); + } + + specular_accum += specular * weight; + } + } + + if (diffuse_accum.a > 0.0) { + diffuse_accum.rgb /= diffuse_accum.a; + } + + diffuse_light = diffuse_accum.rgb; + + if (diffuse_accum.a > 0.0) { + specular_accum /= diffuse_accum.a; + } + + specular_light = specular_accum; +} + +void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, out vec4 ambient_light, out vec4 reflection_light) { + //make vertex orientation the world one, but still align to camera + vertex.y *= sdfgi.y_mult; + normal.y *= sdfgi.y_mult; + reflection.y *= sdfgi.y_mult; + + //renormalize + normal = normalize(normal); + reflection = normalize(reflection); + + vec3 cam_pos = vertex; + vec3 cam_normal = normal; + + vec4 light_accum = vec4(0.0); + float weight_accum = 0.0; + + vec4 light_blend_accum = vec4(0.0); + float weight_blend_accum = 0.0; + + float blend = -1.0; + + // helper constants, compute once + + uint cascade = 0xFFFFFFFF; + vec3 cascade_pos; + vec3 cascade_normal; + + for (uint i = 0; i < sdfgi.max_cascades; i++) { + cascade_pos = (cam_pos - sdfgi.cascades[i].position) * sdfgi.cascades[i].to_probe; + + if (any(lessThan(cascade_pos, vec3(0.0))) || any(greaterThanEqual(cascade_pos, sdfgi.cascade_probe_size))) { + continue; //skip cascade + } + + cascade = i; + break; + } + + if (cascade < SDFGI_MAX_CASCADES) { + ambient_light = vec4(0, 0, 0, 1); + reflection_light = vec4(0, 0, 0, 1); + + float blend; + vec3 diffuse, specular; + sdfgi_probe_process(cascade, cascade_pos, cam_pos, cam_normal, reflection, roughness, diffuse, specular); + + { + //process blend + float blend_from = (float(sdfgi.probe_axis_size - 1) / 2.0) - 2.5; + float blend_to = blend_from + 2.0; + + vec3 inner_pos = cam_pos * sdfgi.cascades[cascade].to_probe; + + float len = length(inner_pos); + + inner_pos = abs(normalize(inner_pos)); + len *= max(inner_pos.x, max(inner_pos.y, inner_pos.z)); + + if (len >= blend_from) { + blend = smoothstep(blend_from, blend_to, len); + } else { + blend = 0.0; + } + } + + if (blend > 0.0) { + //blend + if (cascade == sdfgi.max_cascades - 1) { + ambient_light.a = 1.0 - blend; + reflection_light.a = 1.0 - blend; + + } else { + vec3 diffuse2, specular2; + cascade_pos = (cam_pos - sdfgi.cascades[cascade + 1].position) * sdfgi.cascades[cascade + 1].to_probe; + sdfgi_probe_process(cascade + 1, cascade_pos, cam_pos, cam_normal, reflection, roughness, diffuse2, specular2); + diffuse = mix(diffuse, diffuse2, blend); + specular = mix(specular, specular2, blend); + } + } + + ambient_light.rgb = diffuse; + + if (roughness < 0.2) { + vec3 pos_to_uvw = 1.0 / sdfgi.grid_size; + vec4 light_accum = vec4(0.0); + + float blend_size = (sdfgi.grid_size.x / float(sdfgi.probe_axis_size - 1)) * 0.5; + + float radius_sizes[SDFGI_MAX_CASCADES]; + cascade = 0xFFFF; + + float base_distance = length(cam_pos); + for (uint i = 0; i < sdfgi.max_cascades; i++) { + radius_sizes[i] = (1.0 / sdfgi.cascades[i].to_cell) * (sdfgi.grid_size.x * 0.5 - blend_size); + if (cascade == 0xFFFF && base_distance < radius_sizes[i]) { + cascade = i; + } + } + + cascade = min(cascade, sdfgi.max_cascades - 1); + + float max_distance = radius_sizes[sdfgi.max_cascades - 1]; + vec3 ray_pos = cam_pos; + vec3 ray_dir = reflection; + + { + float prev_radius = cascade > 0 ? radius_sizes[cascade - 1] : 0.0; + float base_blend = (base_distance - prev_radius) / (radius_sizes[cascade] - prev_radius); + float bias = (1.0 + base_blend) * 1.1; + vec3 abs_ray_dir = abs(ray_dir); + //ray_pos += ray_dir * (bias / sdfgi.cascades[cascade].to_cell); //bias to avoid self occlusion + ray_pos += (ray_dir * 1.0 / max(abs_ray_dir.x, max(abs_ray_dir.y, abs_ray_dir.z)) + cam_normal * 1.4) * bias / sdfgi.cascades[cascade].to_cell; + } + float softness = 0.2 + min(1.0, roughness * 5.0) * 4.0; //approximation to roughness so it does not seem like a hard fade + uint i = 0; + bool found = false; + while (true) { + if (length(ray_pos) >= max_distance || light_accum.a > 0.99) { + break; + } + if (!found && i >= cascade && length(ray_pos) < radius_sizes[i]) { + uint next_i = min(i + 1, sdfgi.max_cascades - 1); + cascade = max(i, cascade); //never go down + + vec3 pos = ray_pos - sdfgi.cascades[i].position; + pos *= sdfgi.cascades[i].to_cell * pos_to_uvw; + + float fdistance = textureLod(sampler3D(sdf_cascades[i], linear_sampler), pos, 0.0).r * 255.0 - 1.1; + + vec4 hit_light = vec4(0.0); + if (fdistance < softness) { + hit_light.rgb = textureLod(sampler3D(light_cascades[i], linear_sampler), pos, 0.0).rgb; + hit_light.rgb *= 0.5; //approximation given value read is actually meant for anisotropy + hit_light.a = clamp(1.0 - (fdistance / softness), 0.0, 1.0); + hit_light.rgb *= hit_light.a; + } + + fdistance /= sdfgi.cascades[i].to_cell; + + if (i < (sdfgi.max_cascades - 1)) { + pos = ray_pos - sdfgi.cascades[next_i].position; + pos *= sdfgi.cascades[next_i].to_cell * pos_to_uvw; + + float fdistance2 = textureLod(sampler3D(sdf_cascades[next_i], linear_sampler), pos, 0.0).r * 255.0 - 1.1; + + vec4 hit_light2 = vec4(0.0); + if (fdistance2 < softness) { + hit_light2.rgb = textureLod(sampler3D(light_cascades[next_i], linear_sampler), pos, 0.0).rgb; + hit_light2.rgb *= 0.5; //approximation given value read is actually meant for anisotropy + hit_light2.a = clamp(1.0 - (fdistance2 / softness), 0.0, 1.0); + hit_light2.rgb *= hit_light2.a; + } + + float prev_radius = i == 0 ? 0.0 : radius_sizes[max(0, i - 1)]; + float blend = clamp((length(ray_pos) - prev_radius) / (radius_sizes[i] - prev_radius), 0.0, 1.0); + + fdistance2 /= sdfgi.cascades[next_i].to_cell; + + hit_light = mix(hit_light, hit_light2, blend); + fdistance = mix(fdistance, fdistance2, blend); + } + + light_accum += hit_light; + ray_pos += ray_dir * fdistance; + found = true; + } + i++; + if (i == sdfgi.max_cascades) { + i = 0; + found = false; + } + } + + vec3 light = light_accum.rgb / max(light_accum.a, 0.00001); + float alpha = min(1.0, light_accum.a); + + float b = min(1.0, roughness * 5.0); + + float sa = 1.0 - b; + + reflection_light.a = alpha * sa + b; + if (reflection_light.a == 0) { + specular = vec3(0.0); + } else { + specular = (light * alpha * sa + specular * b) / reflection_light.a; + } + } + + reflection_light.rgb = specular; + + ambient_light.rgb *= sdfgi.energy; + reflection_light.rgb *= sdfgi.energy; + } else { + ambient_light = vec4(0); + reflection_light = vec4(0); + } +} + +//standard voxel cone trace +vec4 voxel_cone_trace(texture3D probe, vec3 cell_size, vec3 pos, vec3 direction, float tan_half_angle, float max_distance, float p_bias) { + float dist = p_bias; + vec4 color = vec4(0.0); + + while (dist < max_distance && color.a < 0.95) { + float diameter = max(1.0, 2.0 * tan_half_angle * dist); + vec3 uvw_pos = (pos + dist * direction) * cell_size; + float half_diameter = diameter * 0.5; + //check if outside, then break + if (any(greaterThan(abs(uvw_pos - 0.5), vec3(0.5f + half_diameter * cell_size)))) { + break; + } + vec4 scolor = textureLod(sampler3D(probe, linear_sampler_with_mipmaps), uvw_pos, log2(diameter)); + float a = (1.0 - color.a); + color += a * scolor; + dist += half_diameter; + } + + return color; +} + +vec4 voxel_cone_trace_45_degrees(texture3D probe, vec3 cell_size, vec3 pos, vec3 direction, float max_distance, float p_bias) { + float dist = p_bias; + vec4 color = vec4(0.0); + float radius = max(0.5, dist); + float lod_level = log2(radius * 2.0); + + while (dist < max_distance && color.a < 0.95) { + vec3 uvw_pos = (pos + dist * direction) * cell_size; + + //check if outside, then break + if (any(greaterThan(abs(uvw_pos - 0.5), vec3(0.5f + radius * cell_size)))) { + break; + } + vec4 scolor = textureLod(sampler3D(probe, linear_sampler_with_mipmaps), uvw_pos, lod_level); + lod_level += 1.0; + + float a = (1.0 - color.a); + scolor *= a; + color += scolor; + dist += radius; + radius = max(0.5, dist); + } + return color; +} + +void gi_probe_compute(uint index, vec3 position, vec3 normal, vec3 ref_vec, mat3 normal_xform, float roughness, inout vec4 out_spec, inout vec4 out_diff, inout float out_blend) { + position = (gi_probes.data[index].xform * vec4(position, 1.0)).xyz; + ref_vec = normalize((gi_probes.data[index].xform * vec4(ref_vec, 0.0)).xyz); + normal = normalize((gi_probes.data[index].xform * vec4(normal, 0.0)).xyz); + + position += normal * gi_probes.data[index].normal_bias; + + //this causes corrupted pixels, i have no idea why.. + if (any(bvec2(any(lessThan(position, vec3(0.0))), any(greaterThan(position, gi_probes.data[index].bounds))))) { + return; + } + + mat3 dir_xform = mat3(gi_probes.data[index].xform) * normal_xform; + + vec3 blendv = abs(position / gi_probes.data[index].bounds * 2.0 - 1.0); + float blend = clamp(1.0 - max(blendv.x, max(blendv.y, blendv.z)), 0.0, 1.0); + //float blend=1.0; + + float max_distance = length(gi_probes.data[index].bounds); + vec3 cell_size = 1.0 / gi_probes.data[index].bounds; + + //irradiance + + vec4 light = vec4(0.0); + + if (params.high_quality_vct) { + const uint cone_dir_count = 6; + vec3 cone_dirs[cone_dir_count] = vec3[]( + vec3(0.0, 0.0, 1.0), + vec3(0.866025, 0.0, 0.5), + vec3(0.267617, 0.823639, 0.5), + vec3(-0.700629, 0.509037, 0.5), + vec3(-0.700629, -0.509037, 0.5), + vec3(0.267617, -0.823639, 0.5)); + + float cone_weights[cone_dir_count] = float[](0.25, 0.15, 0.15, 0.15, 0.15, 0.15); + float cone_angle_tan = 0.577; + + for (uint i = 0; i < cone_dir_count; i++) { + vec3 dir = normalize(dir_xform * cone_dirs[i]); + light += cone_weights[i] * voxel_cone_trace(gi_probe_textures[index], cell_size, position, dir, cone_angle_tan, max_distance, gi_probes.data[index].bias); + } + } else { + const uint cone_dir_count = 4; + vec3 cone_dirs[cone_dir_count] = vec3[]( + vec3(0.707107, 0.0, 0.707107), + vec3(0.0, 0.707107, 0.707107), + vec3(-0.707107, 0.0, 0.707107), + vec3(0.0, -0.707107, 0.707107)); + + float cone_weights[cone_dir_count] = float[](0.25, 0.25, 0.25, 0.25); + for (int i = 0; i < cone_dir_count; i++) { + vec3 dir = normalize(dir_xform * cone_dirs[i]); + light += cone_weights[i] * voxel_cone_trace_45_degrees(gi_probe_textures[index], cell_size, position, dir, max_distance, gi_probes.data[index].bias); + } + } + + if (gi_probes.data[index].ambient_occlusion > 0.001) { + float size = 1.0 + gi_probes.data[index].ambient_occlusion_size * 7.0; + + float taps, blend; + blend = modf(size, taps); + float ao = 0.0; + for (float i = 1.0; i <= taps; i++) { + vec3 ofs = (position + normal * (i * 0.5 + 1.0)) * cell_size; + ao += textureLod(sampler3D(gi_probe_textures[index], linear_sampler_with_mipmaps), ofs, i - 1.0).a * i; + } + + if (blend > 0.001) { + vec3 ofs = (position + normal * ((taps + 1.0) * 0.5 + 1.0)) * cell_size; + ao += textureLod(sampler3D(gi_probe_textures[index], linear_sampler_with_mipmaps), ofs, taps).a * (taps + 1.0) * blend; + } + + ao = 1.0 - min(1.0, ao); + + light.rgb = mix(params.ao_color, light.rgb, mix(1.0, ao, gi_probes.data[index].ambient_occlusion)); + } + + light.rgb *= gi_probes.data[index].dynamic_range; + if (!gi_probes.data[index].blend_ambient) { + light.a = 1.0; + } + + out_diff += light * blend; + + //radiance + vec4 irr_light = voxel_cone_trace(gi_probe_textures[index], cell_size, position, ref_vec, tan(roughness * 0.5 * M_PI * 0.99), max_distance, gi_probes.data[index].bias); + irr_light.rgb *= gi_probes.data[index].dynamic_range; + if (!gi_probes.data[index].blend_ambient) { + irr_light.a = 1.0; + } + + out_spec += irr_light * blend; + + out_blend += blend; +} + +vec4 fetch_normal_and_roughness(ivec2 pos) { + vec4 normal_roughness = texelFetch(sampler2D(normal_roughness_buffer, linear_sampler), pos, 0); + + normal_roughness.xyz = normalize(normal_roughness.xyz * 2.0 - 1.0); + return normal_roughness; +} + +void process_gi(ivec2 pos, vec3 vertex, inout vec4 ambient_light, inout vec4 reflection_light) { + vec4 normal_roughness = fetch_normal_and_roughness(pos); + + vec3 normal = normal_roughness.xyz; + + if (normal.length() > 0.5) { + //valid normal, can do GI + float roughness = normal_roughness.w; + vertex = mat3(params.cam_rotation) * vertex; + normal = normalize(mat3(params.cam_rotation) * normal); + vec3 reflection = normalize(reflect(normalize(vertex), normal)); + +#ifdef USE_SDFGI + sdfgi_process(vertex, normal, reflection, roughness, ambient_light, reflection_light); +#endif + +#ifdef USE_GIPROBES + { + uvec2 giprobe_tex = texelFetch(usampler2D(giprobe_buffer, linear_sampler), pos, 0).rg; + roughness *= roughness; + //find arbitrary tangent and bitangent, then build a matrix + vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); + vec3 tangent = normalize(cross(v0, normal)); + vec3 bitangent = normalize(cross(tangent, normal)); + mat3 normal_mat = mat3(tangent, bitangent, normal); + + vec4 amb_accum = vec4(0.0); + vec4 spec_accum = vec4(0.0); + float blend_accum = 0.0; + + for (uint i = 0; i < params.max_giprobes; i++) { + if (any(equal(uvec2(i), giprobe_tex))) { + gi_probe_compute(i, vertex, normal, reflection, normal_mat, roughness, spec_accum, amb_accum, blend_accum); + } + } + if (blend_accum > 0.0) { + amb_accum /= blend_accum; + spec_accum /= blend_accum; + } + +#ifdef USE_SDFGI + reflection_light = blend_color(spec_accum, reflection_light); + ambient_light = blend_color(amb_accum, ambient_light); +#else + reflection_light = spec_accum; + ambient_light = amb_accum; +#endif + } +#endif + } +} + +void main() { + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + +#ifdef MODE_HALF_RES + pos <<= 1; +#endif + if (any(greaterThanEqual(pos, params.screen_size))) { //too large, do nothing + return; + } + + vec4 ambient_light = vec4(0.0); + vec4 reflection_light = vec4(0.0); + + vec3 vertex = reconstruct_position(pos); + vertex.y = -vertex.y; + + process_gi(pos, vertex, ambient_light, reflection_light); + +#ifdef MODE_HALF_RES + pos >>= 1; +#endif + + imageStore(ambient_buffer, pos, ambient_light); + imageStore(reflection_buffer, pos, reflection_light); +} diff --git a/servers/rendering/renderer_rd/shaders/giprobe.glsl b/servers/rendering/renderer_rd/shaders/giprobe.glsl new file mode 100644 index 0000000000..b931461b31 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/giprobe.glsl @@ -0,0 +1,779 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +#ifdef MODE_DYNAMIC +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; +#else +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; +#endif + +#ifndef MODE_DYNAMIC + +#define NO_CHILDREN 0xFFFFFFFF +#define GREY_VEC vec3(0.33333, 0.33333, 0.33333) + +struct CellChildren { + uint children[8]; +}; + +layout(set = 0, binding = 1, std430) buffer CellChildrenBuffer { + CellChildren data[]; +} +cell_children; + +struct CellData { + uint position; // xyz 10 bits + uint albedo; //rgb albedo + uint emission; //rgb normalized with e as multiplier + uint normal; //RGB normal encoded +}; + +layout(set = 0, binding = 2, std430) buffer CellDataBuffer { + CellData data[]; +} +cell_data; + +#endif // MODE DYNAMIC + +#define LIGHT_TYPE_DIRECTIONAL 0 +#define LIGHT_TYPE_OMNI 1 +#define LIGHT_TYPE_SPOT 2 + +#if defined(MODE_COMPUTE_LIGHT) || defined(MODE_DYNAMIC_LIGHTING) + +struct Light { + uint type; + float energy; + float radius; + float attenuation; + + vec3 color; + float cos_spot_angle; + + vec3 position; + float inv_spot_attenuation; + + vec3 direction; + bool has_shadow; +}; + +layout(set = 0, binding = 3, std140) uniform Lights { + Light data[MAX_LIGHTS]; +} +lights; + +#endif // MODE COMPUTE LIGHT + +#ifdef MODE_SECOND_BOUNCE + +layout(set = 0, binding = 5) uniform texture3D color_texture; + +#ifdef MODE_ANISOTROPIC +layout(set = 0, binding = 7) uniform texture3D aniso_pos_texture; +layout(set = 0, binding = 8) uniform texture3D aniso_neg_texture; +#endif // MODE ANISOTROPIC + +#endif // MODE_SECOND_BOUNCE + +#ifndef MODE_DYNAMIC + +layout(push_constant, binding = 0, std430) uniform Params { + ivec3 limits; + uint stack_size; + + float emission_scale; + float propagation; + float dynamic_range; + + uint light_count; + uint cell_offset; + uint cell_count; + float aniso_strength; + uint pad; +} +params; + +layout(set = 0, binding = 4, std430) buffer Outputs { + vec4 data[]; +} +outputs; + +#endif // MODE DYNAMIC + +layout(set = 0, binding = 9) uniform texture3D texture_sdf; +layout(set = 0, binding = 10) uniform sampler texture_sampler; + +#ifdef MODE_WRITE_TEXTURE + +layout(rgba8, set = 0, binding = 5) uniform restrict writeonly image3D color_tex; + +#ifdef MODE_ANISOTROPIC + +layout(r16ui, set = 0, binding = 6) uniform restrict writeonly uimage3D aniso_pos_tex; +layout(r16ui, set = 0, binding = 7) uniform restrict writeonly uimage3D aniso_neg_tex; + +#endif + +#endif + +#ifdef MODE_DYNAMIC + +layout(push_constant, binding = 0, std430) uniform Params { + ivec3 limits; + uint light_count; //when not lighting + ivec3 x_dir; + float z_base; + ivec3 y_dir; + float z_sign; + ivec3 z_dir; + float pos_multiplier; + ivec2 rect_pos; + ivec2 rect_size; + ivec2 prev_rect_ofs; + ivec2 prev_rect_size; + bool flip_x; + bool flip_y; + float dynamic_range; + bool on_mipmap; + float propagation; + float pad[3]; +} +params; + +#ifdef MODE_DYNAMIC_LIGHTING + +layout(rgba8, set = 0, binding = 5) uniform restrict readonly image2D source_albedo; +layout(rgba8, set = 0, binding = 6) uniform restrict readonly image2D source_normal; +layout(rgba8, set = 0, binding = 7) uniform restrict readonly image2D source_orm; +//layout (set=0,binding=8) uniform texture2D source_depth; +layout(rgba16f, set = 0, binding = 11) uniform restrict image2D emission; +layout(r32f, set = 0, binding = 12) uniform restrict image2D depth; + +#endif + +#ifdef MODE_DYNAMIC_SHRINK + +layout(rgba16f, set = 0, binding = 5) uniform restrict readonly image2D source_light; +layout(r32f, set = 0, binding = 6) uniform restrict readonly image2D source_depth; + +#ifdef MODE_DYNAMIC_SHRINK_WRITE + +layout(rgba16f, set = 0, binding = 7) uniform restrict writeonly image2D light; +layout(r32f, set = 0, binding = 8) uniform restrict writeonly image2D depth; + +#endif // MODE_DYNAMIC_SHRINK_WRITE + +#ifdef MODE_DYNAMIC_SHRINK_PLOT + +layout(rgba8, set = 0, binding = 11) uniform restrict image3D color_texture; + +#ifdef MODE_ANISOTROPIC + +layout(r16ui, set = 0, binding = 12) uniform restrict writeonly uimage3D aniso_pos_texture; +layout(r16ui, set = 0, binding = 13) uniform restrict writeonly uimage3D aniso_neg_texture; + +#endif // MODE ANISOTROPIC + +#endif //MODE_DYNAMIC_SHRINK_PLOT + +#endif // MODE_DYNAMIC_SHRINK + +//layout (rgba8,set=0,binding=5) uniform restrict writeonly image3D color_tex; + +#endif // MODE DYNAMIC + +#if defined(MODE_COMPUTE_LIGHT) || defined(MODE_DYNAMIC_LIGHTING) + +float raymarch(float distance, float distance_adv, vec3 from, vec3 direction) { + vec3 cell_size = 1.0 / vec3(params.limits); + float occlusion = 1.0; + while (distance > 0.5) { //use this to avoid precision errors + float advance = texture(sampler3D(texture_sdf, texture_sampler), from * cell_size).r * 255.0 - 1.0; + if (advance < 0.0) { + occlusion = 0.0; + break; + } + + occlusion = min(advance, occlusion); + + advance = max(distance_adv, advance - mod(advance, distance_adv)); //should always advance in multiples of distance_adv + + from += direction * advance; + distance -= advance; + } + + return occlusion; //max(0.0,distance); +} + +float get_omni_attenuation(float distance, float inv_range, float decay) { + float nd = distance * inv_range; + nd *= nd; + nd *= nd; // nd^4 + nd = max(1.0 - nd, 0.0); + nd *= nd; // nd^2 + return nd * pow(max(distance, 0.0001), -decay); +} + +bool compute_light_vector(uint light, vec3 pos, out float attenuation, out vec3 light_pos) { + if (lights.data[light].type == LIGHT_TYPE_DIRECTIONAL) { + light_pos = pos - lights.data[light].direction * length(vec3(params.limits)); + attenuation = 1.0; + + } else { + light_pos = lights.data[light].position; + float distance = length(pos - light_pos); + if (distance >= lights.data[light].radius) { + return false; + } + + attenuation = get_omni_attenuation(distance, 1.0 / lights.data[light].radius, lights.data[light].attenuation); + + if (lights.data[light].type == LIGHT_TYPE_SPOT) { + vec3 rel = normalize(pos - light_pos); + float cos_spot_angle = lights.data[light].cos_spot_angle; + float cos_angle = dot(rel, lights.data[light].direction); + if (cos_angle < cos_spot_angle) { + return false; + } + + float scos = max(cos_angle, cos_spot_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cos_spot_angle)); + attenuation *= 1.0 - pow(spot_rim, lights.data[light].inv_spot_attenuation); + } + } + + return true; +} + +float get_normal_advance(vec3 p_normal) { + vec3 normal = p_normal; + vec3 unorm = abs(normal); + + if ((unorm.x >= unorm.y) && (unorm.x >= unorm.z)) { + // x code + unorm = normal.x > 0.0 ? vec3(1.0, 0.0, 0.0) : vec3(-1.0, 0.0, 0.0); + } else if ((unorm.y > unorm.x) && (unorm.y >= unorm.z)) { + // y code + unorm = normal.y > 0.0 ? vec3(0.0, 1.0, 0.0) : vec3(0.0, -1.0, 0.0); + } else if ((unorm.z > unorm.x) && (unorm.z > unorm.y)) { + // z code + unorm = normal.z > 0.0 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 0.0, -1.0); + } else { + // oh-no we messed up code + // has to be + unorm = vec3(1.0, 0.0, 0.0); + } + + return 1.0 / dot(normal, unorm); +} + +void clip_segment(vec4 plane, vec3 begin, inout vec3 end) { + vec3 segment = begin - end; + float den = dot(plane.xyz, segment); + + //printf("den is %i\n",den); + if (den < 0.0001) { + return; + } + + float dist = (dot(plane.xyz, begin) - plane.w) / den; + + if (dist < 0.0001 || dist > 1.0001) { + return; + } + + end = begin + segment * -dist; +} + +bool compute_light_at_pos(uint index, vec3 pos, vec3 normal, inout vec3 light, inout vec3 light_dir) { + float attenuation; + vec3 light_pos; + + if (!compute_light_vector(index, pos, attenuation, light_pos)) { + return false; + } + + light_dir = normalize(pos - light_pos); + + if (attenuation < 0.01 || (length(normal) > 0.2 && dot(normal, light_dir) >= 0)) { + return false; //not facing the light, or attenuation is near zero + } + + if (lights.data[index].has_shadow) { + float distance_adv = get_normal_advance(light_dir); + + vec3 to = pos; + if (length(normal) > 0.2) { + to += normal * distance_adv * 0.51; + } else { + to -= sign(light_dir) * 0.45; //go near the edge towards the light direction to avoid self occlusion + } + + //clip + clip_segment(mix(vec4(-1.0, 0.0, 0.0, 0.0), vec4(1.0, 0.0, 0.0, float(params.limits.x - 1)), bvec4(light_dir.x < 0.0)), to, light_pos); + clip_segment(mix(vec4(0.0, -1.0, 0.0, 0.0), vec4(0.0, 1.0, 0.0, float(params.limits.y - 1)), bvec4(light_dir.y < 0.0)), to, light_pos); + clip_segment(mix(vec4(0.0, 0.0, -1.0, 0.0), vec4(0.0, 0.0, 1.0, float(params.limits.z - 1)), bvec4(light_dir.z < 0.0)), to, light_pos); + + float distance = length(to - light_pos); + if (distance < 0.1) { + return false; // hit + } + + distance += distance_adv - mod(distance, distance_adv); //make it reach the center of the box always + light_pos = to - light_dir * distance; + + //from -= sign(light_dir)*0.45; //go near the edge towards the light direction to avoid self occlusion + + /*float dist = raymarch(distance,distance_adv,light_pos,light_dir); + + if (dist > distance_adv) { + return false; + } + + attenuation *= 1.0 - smoothstep(0.1*distance_adv,distance_adv,dist); + */ + + float occlusion = raymarch(distance, distance_adv, light_pos, light_dir); + + if (occlusion == 0.0) { + return false; + } + + attenuation *= occlusion; //1.0 - smoothstep(0.1*distance_adv,distance_adv,dist); + } + + light = lights.data[index].color * attenuation * lights.data[index].energy; + return true; +} + +#endif // MODE COMPUTE LIGHT + +void main() { +#ifndef MODE_DYNAMIC + + uint cell_index = gl_GlobalInvocationID.x; + if (cell_index >= params.cell_count) { + return; + } + cell_index += params.cell_offset; + + uvec3 posu = uvec3(cell_data.data[cell_index].position & 0x7FF, (cell_data.data[cell_index].position >> 11) & 0x3FF, cell_data.data[cell_index].position >> 21); + vec4 albedo = unpackUnorm4x8(cell_data.data[cell_index].albedo); + +#endif + + /////////////////COMPUTE LIGHT/////////////////////////////// + +#ifdef MODE_COMPUTE_LIGHT + + vec3 pos = vec3(posu) + vec3(0.5); + + vec3 emission = vec3(uvec3(cell_data.data[cell_index].emission & 0x1ff, (cell_data.data[cell_index].emission >> 9) & 0x1ff, (cell_data.data[cell_index].emission >> 18) & 0x1ff)) * pow(2.0, float(cell_data.data[cell_index].emission >> 27) - 15.0 - 9.0); + vec3 normal = unpackSnorm4x8(cell_data.data[cell_index].normal).xyz; + +#ifdef MODE_ANISOTROPIC + vec3 accum[6] = vec3[](vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0)); + const vec3 accum_dirs[6] = vec3[](vec3(1.0, 0.0, 0.0), vec3(-1.0, 0.0, 0.0), vec3(0.0, 1.0, 0.0), vec3(0.0, -1.0, 0.0), vec3(0.0, 0.0, 1.0), vec3(0.0, 0.0, -1.0)); +#else + vec3 accum = vec3(0.0); +#endif + + for (uint i = 0; i < params.light_count; i++) { + vec3 light; + vec3 light_dir; + if (!compute_light_at_pos(i, pos, normal.xyz, light, light_dir)) { + continue; + } + + light *= albedo.rgb; + +#ifdef MODE_ANISOTROPIC + for (uint j = 0; j < 6; j++) { + accum[j] += max(0.0, dot(accum_dirs[j], -light_dir)) * light; + } +#else + if (length(normal) > 0.2) { + accum += max(0.0, dot(normal, -light_dir)) * light; + } else { + //all directions + accum += light; + } +#endif + } + +#ifdef MODE_ANISOTROPIC + + for (uint i = 0; i < 6; i++) { + vec3 light = accum[i]; + if (length(normal) > 0.2) { + light += max(0.0, dot(accum_dirs[i], -normal)) * emission; + } else { + light += emission; + } + + outputs.data[cell_index * 6 + i] = vec4(light, 0.0); + } + +#else + outputs.data[cell_index] = vec4(accum + emission, 0.0); + +#endif + +#endif //MODE_COMPUTE_LIGHT + + /////////////////SECOND BOUNCE/////////////////////////////// + +#ifdef MODE_SECOND_BOUNCE + vec3 pos = vec3(posu) + vec3(0.5); + ivec3 ipos = ivec3(posu); + vec4 normal = unpackSnorm4x8(cell_data.data[cell_index].normal); + +#ifdef MODE_ANISOTROPIC + vec3 accum[6]; + const vec3 accum_dirs[6] = vec3[](vec3(1.0, 0.0, 0.0), vec3(-1.0, 0.0, 0.0), vec3(0.0, 1.0, 0.0), vec3(0.0, -1.0, 0.0), vec3(0.0, 0.0, 1.0), vec3(0.0, 0.0, -1.0)); + + /*vec3 src_color = texelFetch(sampler3D(color_texture,texture_sampler),ipos,0).rgb * params.dynamic_range; + vec3 src_aniso_pos = texelFetch(sampler3D(aniso_pos_texture,texture_sampler),ipos,0).rgb; + vec3 src_anisp_neg = texelFetch(sampler3D(anisp_neg_texture,texture_sampler),ipos,0).rgb; + accum[0]=src_col * src_aniso_pos.x; + accum[1]=src_col * src_aniso_neg.x; + accum[2]=src_col * src_aniso_pos.y; + accum[3]=src_col * src_aniso_neg.y; + accum[4]=src_col * src_aniso_pos.z; + accum[5]=src_col * src_aniso_neg.z;*/ + + accum[0] = outputs.data[cell_index * 6 + 0].rgb; + accum[1] = outputs.data[cell_index * 6 + 1].rgb; + accum[2] = outputs.data[cell_index * 6 + 2].rgb; + accum[3] = outputs.data[cell_index * 6 + 3].rgb; + accum[4] = outputs.data[cell_index * 6 + 4].rgb; + accum[5] = outputs.data[cell_index * 6 + 5].rgb; + +#else + vec3 accum = outputs.data[cell_index].rgb; + +#endif + + if (length(normal.xyz) > 0.2) { + vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); + vec3 tangent = normalize(cross(v0, normal.xyz)); + vec3 bitangent = normalize(cross(tangent, normal.xyz)); + mat3 normal_mat = mat3(tangent, bitangent, normal.xyz); + +#define MAX_CONE_DIRS 6 + + vec3 cone_dirs[MAX_CONE_DIRS] = vec3[]( + vec3(0.0, 0.0, 1.0), + vec3(0.866025, 0.0, 0.5), + vec3(0.267617, 0.823639, 0.5), + vec3(-0.700629, 0.509037, 0.5), + vec3(-0.700629, -0.509037, 0.5), + vec3(0.267617, -0.823639, 0.5)); + + float cone_weights[MAX_CONE_DIRS] = float[](0.25, 0.15, 0.15, 0.15, 0.15, 0.15); + float tan_half_angle = 0.577; + + for (int i = 0; i < MAX_CONE_DIRS; i++) { + vec3 direction = normal_mat * cone_dirs[i]; + vec4 color = vec4(0.0); + { + float dist = 1.5; + float max_distance = length(vec3(params.limits)); + vec3 cell_size = 1.0 / vec3(params.limits); + +#ifdef MODE_ANISOTROPIC + vec3 aniso_normal = mix(direction, normal.xyz, params.aniso_strength); +#endif + while (dist < max_distance && color.a < 0.95) { + float diameter = max(1.0, 2.0 * tan_half_angle * dist); + vec3 uvw_pos = (pos + dist * direction) * cell_size; + float half_diameter = diameter * 0.5; + //check if outside, then break + //if ( any(greaterThan(abs(uvw_pos - 0.5),vec3(0.5f + half_diameter * cell_size)) ) ) { + // break; + //} + + float log2_diameter = log2(diameter); + vec4 scolor = textureLod(sampler3D(color_texture, texture_sampler), uvw_pos, log2_diameter); +#ifdef MODE_ANISOTROPIC + + vec3 aniso_neg = textureLod(sampler3D(aniso_neg_texture, texture_sampler), uvw_pos, log2_diameter).rgb; + vec3 aniso_pos = textureLod(sampler3D(aniso_pos_texture, texture_sampler), uvw_pos, log2_diameter).rgb; + + scolor.rgb *= dot(max(vec3(0.0), (aniso_normal * aniso_pos)), vec3(1.0)) + dot(max(vec3(0.0), (-aniso_normal * aniso_neg)), vec3(1.0)); +#endif + float a = (1.0 - color.a); + color += a * scolor; + dist += half_diameter; + } + } + color *= cone_weights[i] * vec4(albedo.rgb, 1.0) * params.dynamic_range; //restore range +#ifdef MODE_ANISOTROPIC + for (uint j = 0; j < 6; j++) { + accum[j] += max(0.0, dot(accum_dirs[j], direction)) * color.rgb; + } +#else + accum += color.rgb; +#endif + } + } + +#ifdef MODE_ANISOTROPIC + + outputs.data[cell_index * 6 + 0] = vec4(accum[0], 0.0); + outputs.data[cell_index * 6 + 1] = vec4(accum[1], 0.0); + outputs.data[cell_index * 6 + 2] = vec4(accum[2], 0.0); + outputs.data[cell_index * 6 + 3] = vec4(accum[3], 0.0); + outputs.data[cell_index * 6 + 4] = vec4(accum[4], 0.0); + outputs.data[cell_index * 6 + 5] = vec4(accum[5], 0.0); +#else + outputs.data[cell_index] = vec4(accum, 0.0); + +#endif + +#endif // MODE_SECOND_BOUNCE + + /////////////////UPDATE MIPMAPS/////////////////////////////// + +#ifdef MODE_UPDATE_MIPMAPS + + { +#ifdef MODE_ANISOTROPIC + vec3 light_accum[6] = vec3[](vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0)); +#else + vec3 light_accum = vec3(0.0); +#endif + float count = 0.0; + for (uint i = 0; i < 8; i++) { + uint child_index = cell_children.data[cell_index].children[i]; + if (child_index == NO_CHILDREN) { + continue; + } +#ifdef MODE_ANISOTROPIC + light_accum[0] += outputs.data[child_index * 6 + 0].rgb; + light_accum[1] += outputs.data[child_index * 6 + 1].rgb; + light_accum[2] += outputs.data[child_index * 6 + 2].rgb; + light_accum[3] += outputs.data[child_index * 6 + 3].rgb; + light_accum[4] += outputs.data[child_index * 6 + 4].rgb; + light_accum[5] += outputs.data[child_index * 6 + 5].rgb; + +#else + light_accum += outputs.data[child_index].rgb; + +#endif + + count += 1.0; + } + + float divisor = mix(8.0, count, params.propagation); +#ifdef MODE_ANISOTROPIC + outputs.data[cell_index * 6 + 0] = vec4(light_accum[0] / divisor, 0.0); + outputs.data[cell_index * 6 + 1] = vec4(light_accum[1] / divisor, 0.0); + outputs.data[cell_index * 6 + 2] = vec4(light_accum[2] / divisor, 0.0); + outputs.data[cell_index * 6 + 3] = vec4(light_accum[3] / divisor, 0.0); + outputs.data[cell_index * 6 + 4] = vec4(light_accum[4] / divisor, 0.0); + outputs.data[cell_index * 6 + 5] = vec4(light_accum[5] / divisor, 0.0); + +#else + outputs.data[cell_index] = vec4(light_accum / divisor, 0.0); +#endif + } +#endif + + ///////////////////WRITE TEXTURE///////////////////////////// + +#ifdef MODE_WRITE_TEXTURE + { +#ifdef MODE_ANISOTROPIC + vec3 accum_total = vec3(0.0); + accum_total += outputs.data[cell_index * 6 + 0].rgb; + accum_total += outputs.data[cell_index * 6 + 1].rgb; + accum_total += outputs.data[cell_index * 6 + 2].rgb; + accum_total += outputs.data[cell_index * 6 + 3].rgb; + accum_total += outputs.data[cell_index * 6 + 4].rgb; + accum_total += outputs.data[cell_index * 6 + 5].rgb; + + float accum_total_energy = max(dot(accum_total, GREY_VEC), 0.00001); + vec3 iso_positive = vec3(dot(outputs.data[cell_index * 6 + 0].rgb, GREY_VEC), dot(outputs.data[cell_index * 6 + 2].rgb, GREY_VEC), dot(outputs.data[cell_index * 6 + 4].rgb, GREY_VEC)) / vec3(accum_total_energy); + vec3 iso_negative = vec3(dot(outputs.data[cell_index * 6 + 1].rgb, GREY_VEC), dot(outputs.data[cell_index * 6 + 3].rgb, GREY_VEC), dot(outputs.data[cell_index * 6 + 5].rgb, GREY_VEC)) / vec3(accum_total_energy); + + { + uint aniso_pos = uint(clamp(iso_positive.b * 31.0, 0.0, 31.0)); + aniso_pos |= uint(clamp(iso_positive.g * 63.0, 0.0, 63.0)) << 5; + aniso_pos |= uint(clamp(iso_positive.r * 31.0, 0.0, 31.0)) << 11; + imageStore(aniso_pos_tex, ivec3(posu), uvec4(aniso_pos)); + } + + { + uint aniso_neg = uint(clamp(iso_negative.b * 31.0, 0.0, 31.0)); + aniso_neg |= uint(clamp(iso_negative.g * 63.0, 0.0, 63.0)) << 5; + aniso_neg |= uint(clamp(iso_negative.r * 31.0, 0.0, 31.0)) << 11; + imageStore(aniso_neg_tex, ivec3(posu), uvec4(aniso_neg)); + } + + imageStore(color_tex, ivec3(posu), vec4(accum_total / params.dynamic_range, albedo.a)); + +#else + + imageStore(color_tex, ivec3(posu), vec4(outputs.data[cell_index].rgb / params.dynamic_range, albedo.a)); + +#endif + } +#endif + + ///////////////////DYNAMIC LIGHTING///////////////////////////// + +#ifdef MODE_DYNAMIC + + ivec2 pos_xy = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(pos_xy, params.rect_size))) { + return; //out of bounds + } + + ivec2 uv_xy = pos_xy; + if (params.flip_x) { + uv_xy.x = params.rect_size.x - pos_xy.x - 1; + } + if (params.flip_y) { + uv_xy.y = params.rect_size.y - pos_xy.y - 1; + } + +#ifdef MODE_DYNAMIC_LIGHTING + + { + float z = params.z_base + imageLoad(depth, uv_xy).x * params.z_sign; + + ivec3 pos = params.x_dir * (params.rect_pos.x + pos_xy.x) + params.y_dir * (params.rect_pos.y + pos_xy.y) + abs(params.z_dir) * int(z); + + vec3 normal = imageLoad(source_normal, uv_xy).xyz * 2.0 - 1.0; + normal = vec3(params.x_dir) * normal.x * mix(1.0, -1.0, params.flip_x) + vec3(params.y_dir) * normal.y * mix(1.0, -1.0, params.flip_y) - vec3(params.z_dir) * normal.z; + + vec4 albedo = imageLoad(source_albedo, uv_xy); + + //determine the position in space + + vec3 accum = vec3(0.0); + for (uint i = 0; i < params.light_count; i++) { + vec3 light; + vec3 light_dir; + if (!compute_light_at_pos(i, vec3(pos) * params.pos_multiplier, normal, light, light_dir)) { + continue; + } + + light *= albedo.rgb; + + accum += max(0.0, dot(normal, -light_dir)) * light; + } + + accum += imageLoad(emission, uv_xy).xyz; + + imageStore(emission, uv_xy, vec4(accum, albedo.a)); + imageStore(depth, uv_xy, vec4(z)); + } + +#endif // MODE DYNAMIC LIGHTING + +#ifdef MODE_DYNAMIC_SHRINK + + { + vec4 accum = vec4(0.0); + float accum_z = 0.0; + float count = 0.0; + + for (int i = 0; i < 4; i++) { + ivec2 ofs = pos_xy * 2 + ivec2(i & 1, i >> 1) - params.prev_rect_ofs; + if (any(lessThan(ofs, ivec2(0))) || any(greaterThanEqual(ofs, params.prev_rect_size))) { + continue; + } + if (params.flip_x) { + ofs.x = params.prev_rect_size.x - ofs.x - 1; + } + if (params.flip_y) { + ofs.y = params.prev_rect_size.y - ofs.y - 1; + } + + vec4 light = imageLoad(source_light, ofs); + if (light.a == 0.0) { //ignore empty + continue; + } + accum += light; + float z = imageLoad(source_depth, ofs).x; + accum_z += z * 0.5; //shrink half too + count += 1.0; + } + + if (params.on_mipmap) { + accum.rgb /= mix(8.0, count, params.propagation); + accum.a /= 8.0; + } else { + accum /= 4.0; + } + + if (count == 0.0) { + accum_z = 0.0; //avoid nan + } else { + accum_z /= count; + } + +#ifdef MODE_DYNAMIC_SHRINK_WRITE + + imageStore(light, uv_xy, accum); + imageStore(depth, uv_xy, vec4(accum_z)); +#endif + +#ifdef MODE_DYNAMIC_SHRINK_PLOT + + if (accum.a < 0.001) { + return; //do not blit if alpha is too low + } + + ivec3 pos = params.x_dir * (params.rect_pos.x + pos_xy.x) + params.y_dir * (params.rect_pos.y + pos_xy.y) + abs(params.z_dir) * int(accum_z); + + float z_frac = fract(accum_z); + + for (int i = 0; i < 2; i++) { + ivec3 pos3d = pos + abs(params.z_dir) * i; + if (any(lessThan(pos3d, ivec3(0))) || any(greaterThanEqual(pos3d, params.limits))) { + //skip if offlimits + continue; + } + vec4 color_blit = accum * (i == 0 ? 1.0 - z_frac : z_frac); + vec4 color = imageLoad(color_texture, pos3d); + color.rgb *= params.dynamic_range; + +#if 0 + color.rgb = mix(color.rgb,color_blit.rgb,color_blit.a); + color.a+=color_blit.a; +#else + + float sa = 1.0 - color_blit.a; + vec4 result; + result.a = color.a * sa + color_blit.a; + if (result.a == 0.0) { + result = vec4(0.0); + } else { + result.rgb = (color.rgb * color.a * sa + color_blit.rgb * color_blit.a) / result.a; + color = result; + } + +#endif + color.rgb /= params.dynamic_range; + imageStore(color_texture, pos3d, color); + //imageStore(color_texture,pos3d,vec4(1,1,1,1)); + +#ifdef MODE_ANISOTROPIC + //do not care about anisotropy for dynamic objects, just store full lit in all directions + imageStore(aniso_pos_texture, pos3d, uvec4(0xFFFF)); + imageStore(aniso_neg_texture, pos3d, uvec4(0xFFFF)); + +#endif // ANISOTROPIC + } +#endif // MODE_DYNAMIC_SHRINK_PLOT + } +#endif + +#endif // MODE DYNAMIC +} diff --git a/servers/rendering/renderer_rd/shaders/giprobe_debug.glsl b/servers/rendering/renderer_rd/shaders/giprobe_debug.glsl new file mode 100644 index 0000000000..515cc35507 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/giprobe_debug.glsl @@ -0,0 +1,229 @@ +#[vertex] + +#version 450 + +VERSION_DEFINES + +struct CellData { + uint position; // xyz 10 bits + uint albedo; //rgb albedo + uint emission; //rgb normalized with e as multiplier + uint normal; //RGB normal encoded +}; + +layout(set = 0, binding = 1, std140) buffer CellDataBuffer { + CellData data[]; +} +cell_data; + +layout(set = 0, binding = 2) uniform texture3D color_tex; + +layout(set = 0, binding = 3) uniform sampler tex_sampler; + +#ifdef USE_ANISOTROPY +layout(set = 0, binding = 4) uniform texture3D aniso_pos_tex; +layout(set = 0, binding = 5) uniform texture3D aniso_neg_tex; +#endif + +layout(push_constant, binding = 0, std430) uniform Params { + mat4 projection; + uint cell_offset; + float dynamic_range; + float alpha; + uint level; + ivec3 bounds; + uint pad; +} +params; + +layout(location = 0) out vec4 color_interp; + +void main() { + const vec3 cube_triangles[36] = vec3[]( + vec3(-1.0f, -1.0f, -1.0f), + vec3(-1.0f, -1.0f, 1.0f), + vec3(-1.0f, 1.0f, 1.0f), + vec3(1.0f, 1.0f, -1.0f), + vec3(-1.0f, -1.0f, -1.0f), + vec3(-1.0f, 1.0f, -1.0f), + vec3(1.0f, -1.0f, 1.0f), + vec3(-1.0f, -1.0f, -1.0f), + vec3(1.0f, -1.0f, -1.0f), + vec3(1.0f, 1.0f, -1.0f), + vec3(1.0f, -1.0f, -1.0f), + vec3(-1.0f, -1.0f, -1.0f), + vec3(-1.0f, -1.0f, -1.0f), + vec3(-1.0f, 1.0f, 1.0f), + vec3(-1.0f, 1.0f, -1.0f), + vec3(1.0f, -1.0f, 1.0f), + vec3(-1.0f, -1.0f, 1.0f), + vec3(-1.0f, -1.0f, -1.0f), + vec3(-1.0f, 1.0f, 1.0f), + vec3(-1.0f, -1.0f, 1.0f), + vec3(1.0f, -1.0f, 1.0f), + vec3(1.0f, 1.0f, 1.0f), + vec3(1.0f, -1.0f, -1.0f), + vec3(1.0f, 1.0f, -1.0f), + vec3(1.0f, -1.0f, -1.0f), + vec3(1.0f, 1.0f, 1.0f), + vec3(1.0f, -1.0f, 1.0f), + vec3(1.0f, 1.0f, 1.0f), + vec3(1.0f, 1.0f, -1.0f), + vec3(-1.0f, 1.0f, -1.0f), + vec3(1.0f, 1.0f, 1.0f), + vec3(-1.0f, 1.0f, -1.0f), + vec3(-1.0f, 1.0f, 1.0f), + vec3(1.0f, 1.0f, 1.0f), + vec3(-1.0f, 1.0f, 1.0f), + vec3(1.0f, -1.0f, 1.0f)); + + vec3 vertex = cube_triangles[gl_VertexIndex] * 0.5 + 0.5; +#ifdef MODE_DEBUG_LIGHT_FULL + uvec3 posu = uvec3(gl_InstanceIndex % params.bounds.x, (gl_InstanceIndex / params.bounds.x) % params.bounds.y, gl_InstanceIndex / (params.bounds.y * params.bounds.x)); +#else + uint cell_index = gl_InstanceIndex + params.cell_offset; + + uvec3 posu = uvec3(cell_data.data[cell_index].position & 0x7FF, (cell_data.data[cell_index].position >> 11) & 0x3FF, cell_data.data[cell_index].position >> 21); +#endif + +#ifdef MODE_DEBUG_EMISSION + color_interp.xyz = vec3(uvec3(cell_data.data[cell_index].emission & 0x1ff, (cell_data.data[cell_index].emission >> 9) & 0x1ff, (cell_data.data[cell_index].emission >> 18) & 0x1ff)) * pow(2.0, float(cell_data.data[cell_index].emission >> 27) - 15.0 - 9.0); +#endif + +#ifdef MODE_DEBUG_COLOR + color_interp.xyz = unpackUnorm4x8(cell_data.data[cell_index].albedo).xyz; +#endif + +#ifdef MODE_DEBUG_LIGHT + +#ifdef USE_ANISOTROPY + +#define POS_X 0 +#define POS_Y 1 +#define POS_Z 2 +#define NEG_X 3 +#define NEG_Y 4 +#define NEG_Z 5 + + const uint triangle_aniso[12] = uint[]( + NEG_X, + NEG_Z, + NEG_Y, + NEG_Z, + NEG_X, + NEG_Y, + POS_Z, + POS_X, + POS_X, + POS_Y, + POS_Y, + POS_Z); + + color_interp.xyz = texelFetch(sampler3D(color_tex, tex_sampler), ivec3(posu), int(params.level)).xyz * params.dynamic_range; + vec3 aniso_pos = texelFetch(sampler3D(aniso_pos_tex, tex_sampler), ivec3(posu), int(params.level)).xyz; + vec3 aniso_neg = texelFetch(sampler3D(aniso_neg_tex, tex_sampler), ivec3(posu), int(params.level)).xyz; + uint side = triangle_aniso[gl_VertexIndex / 3]; + + float strength = 0.0; + switch (side) { + case POS_X: + strength = aniso_pos.x; + break; + case POS_Y: + strength = aniso_pos.y; + break; + case POS_Z: + strength = aniso_pos.z; + break; + case NEG_X: + strength = aniso_neg.x; + break; + case NEG_Y: + strength = aniso_neg.y; + break; + case NEG_Z: + strength = aniso_neg.z; + break; + } + + color_interp.xyz *= strength; + +#else + color_interp = texelFetch(sampler3D(color_tex, tex_sampler), ivec3(posu), int(params.level)); + color_interp.xyz *params.dynamic_range; + +#endif + +#endif + float scale = (1 << params.level); + + gl_Position = params.projection * vec4((vec3(posu) + vertex) * scale, 1.0); + +#ifdef MODE_DEBUG_LIGHT_FULL + if (color_interp.a == 0.0) { + gl_Position = vec4(0.0); //force clip and not draw + } +#else + color_interp.a = params.alpha; +#endif +} + +#[fragment] + +#version 450 + +VERSION_DEFINES + +layout(location = 0) in vec4 color_interp; +layout(location = 0) out vec4 frag_color; + +void main() { + frag_color = color_interp; + +#ifdef MODE_DEBUG_LIGHT_FULL + + //there really is no alpha, so use dither + + int x = int(gl_FragCoord.x) % 4; + int y = int(gl_FragCoord.y) % 4; + int index = x + y * 4; + float limit = 0.0; + if (x < 8) { + if (index == 0) + limit = 0.0625; + if (index == 1) + limit = 0.5625; + if (index == 2) + limit = 0.1875; + if (index == 3) + limit = 0.6875; + if (index == 4) + limit = 0.8125; + if (index == 5) + limit = 0.3125; + if (index == 6) + limit = 0.9375; + if (index == 7) + limit = 0.4375; + if (index == 8) + limit = 0.25; + if (index == 9) + limit = 0.75; + if (index == 10) + limit = 0.125; + if (index == 11) + limit = 0.625; + if (index == 12) + limit = 1.0; + if (index == 13) + limit = 0.5; + if (index == 14) + limit = 0.875; + if (index == 15) + limit = 0.375; + } + if (frag_color.a < limit) { + discard; + } +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/giprobe_sdf.glsl b/servers/rendering/renderer_rd/shaders/giprobe_sdf.glsl new file mode 100644 index 0000000000..5b3dec0ee7 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/giprobe_sdf.glsl @@ -0,0 +1,181 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; + +#define MAX_DISTANCE 100000 + +#define NO_CHILDREN 0xFFFFFFFF +#define GREY_VEC vec3(0.33333, 0.33333, 0.33333) + +struct CellChildren { + uint children[8]; +}; + +layout(set = 0, binding = 1, std430) buffer CellChildrenBuffer { + CellChildren data[]; +} +cell_children; + +struct CellData { + uint position; // xyz 10 bits + uint albedo; //rgb albedo + uint emission; //rgb normalized with e as multiplier + uint normal; //RGB normal encoded +}; + +layout(set = 0, binding = 2, std430) buffer CellDataBuffer { + CellData data[]; +} +cell_data; + +layout(r8ui, set = 0, binding = 3) uniform restrict writeonly uimage3D sdf_tex; + +layout(push_constant, binding = 0, std430) uniform Params { + uint offset; + uint end; + uint pad0; + uint pad1; +} +params; + +void main() { + vec3 pos = vec3(gl_GlobalInvocationID); + float closest_dist = 100000.0; + + for (uint i = params.offset; i < params.end; i++) { + vec3 posu = vec3(uvec3(cell_data.data[i].position & 0x7FF, (cell_data.data[i].position >> 11) & 0x3FF, cell_data.data[i].position >> 21)); + float dist = length(pos - posu); + if (dist < closest_dist) { + closest_dist = dist; + } + } + + uint dist_8; + + if (closest_dist < 0.0001) { // same cell + dist_8 = 0; //equals to -1 + } else { + dist_8 = clamp(uint(closest_dist), 0, 254) + 1; //conservative, 0 is 1, so <1 is considered solid + } + + imageStore(sdf_tex, ivec3(gl_GlobalInvocationID), uvec4(dist_8)); + //imageStore(sdf_tex,pos,uvec4(pos*2,0)); +} + +#if 0 +layout(push_constant, binding = 0, std430) uniform Params { + ivec3 limits; + uint stack_size; +} +params; + +float distance_to_aabb(ivec3 pos, ivec3 aabb_pos, ivec3 aabb_size) { + vec3 delta = vec3(max(ivec3(0), max(aabb_pos - pos, pos - (aabb_pos + aabb_size - ivec3(1))))); + return length(delta); +} + +void main() { + ivec3 pos = ivec3(gl_GlobalInvocationID); + + uint stack[10] = uint[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + uint stack_indices[10] = uint[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + ivec3 stack_positions[10] = ivec3[](ivec3(0), ivec3(0), ivec3(0), ivec3(0), ivec3(0), ivec3(0), ivec3(0), ivec3(0), ivec3(0), ivec3(0)); + + const uint cell_orders[8] = uint[]( + 0x11f58d1, + 0xe2e70a, + 0xd47463, + 0xbb829c, + 0x8d11f5, + 0x70ae2e, + 0x463d47, + 0x29cbb8); + + bool cell_found = false; + bool cell_found_exact = false; + ivec3 closest_cell_pos; + float closest_distance = MAX_DISTANCE; + int stack_pos = 0; + + while (true) { + uint index = stack_indices[stack_pos] >> 24; + + if (index == 8) { + //go up + if (stack_pos == 0) { + break; //done going through octree + } + stack_pos--; + continue; + } + + stack_indices[stack_pos] = (stack_indices[stack_pos] & ((1 << 24) - 1)) | ((index + 1) << 24); + + uint cell_index = (stack_indices[stack_pos] >> (index * 3)) & 0x7; + uint child_cell = cell_children.data[stack[stack_pos]].children[cell_index]; + + if (child_cell == NO_CHILDREN) { + continue; + } + + ivec3 child_cell_size = params.limits >> (stack_pos + 1); + ivec3 child_cell_pos = stack_positions[stack_pos]; + + child_cell_pos += mix(ivec3(0), child_cell_size, bvec3(uvec3(index & 1, index & 2, index & 4) != uvec3(0))); + + bool is_leaf = stack_pos == (params.stack_size - 2); + + if (child_cell_pos == pos && is_leaf) { + //we may actually end up in the exact cell. + //if this happens, just abort + cell_found_exact = true; + break; + } + + if (cell_found) { + //discard by distance + float distance = distance_to_aabb(pos, child_cell_pos, child_cell_size); + if (distance >= closest_distance) { + continue; //pointless, just test next child + } else if (is_leaf) { + //closer than what we have AND end of stack, save and continue + closest_cell_pos = child_cell_pos; + closest_distance = distance; + continue; + } + } else if (is_leaf) { + //first solid cell we find, save and continue + closest_distance = distance_to_aabb(pos, child_cell_pos, child_cell_size); + closest_cell_pos = child_cell_pos; + cell_found = true; + continue; + } + + bvec3 direction = greaterThan((pos - (child_cell_pos + (child_cell_size >> 1))), ivec3(0)); + uint cell_order = 0; + cell_order |= mix(0, 1, direction.x); + cell_order |= mix(0, 2, direction.y); + cell_order |= mix(0, 4, direction.z); + + stack[stack_pos + 1] = child_cell; + stack_indices[stack_pos + 1] = cell_orders[cell_order]; //start counting + stack_positions[stack_pos + 1] = child_cell_pos; + stack_pos++; //go up stack + } + + uint dist_8; + + if (cell_found_exact) { + dist_8 = 0; //equals to -1 + } else { + float closest_distance = length(vec3(pos - closest_cell_pos)); + dist_8 = clamp(uint(closest_distance), 0, 254) + 1; //conservative, 0 is 1, so <1 is considered solid + } + + imageStore(sdf_tex, pos, uvec4(dist_8)); +} +#endif diff --git a/servers/rendering/renderer_rd/shaders/giprobe_write.glsl b/servers/rendering/renderer_rd/shaders/giprobe_write.glsl new file mode 100644 index 0000000000..56b3b7ccb4 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/giprobe_write.glsl @@ -0,0 +1,323 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +#define NO_CHILDREN 0xFFFFFFFF +#define GREY_VEC vec3(0.33333, 0.33333, 0.33333) + +struct CellChildren { + uint children[8]; +}; + +layout(set = 0, binding = 1, std430) buffer CellChildrenBuffer { + CellChildren data[]; +} +cell_children; + +struct CellData { + uint position; // xyz 10 bits + uint albedo; //rgb albedo + uint emission; //rgb normalized with e as multiplier + uint normal; //RGB normal encoded +}; + +layout(set = 0, binding = 2, std430) buffer CellDataBuffer { + CellData data[]; +} +cell_data; + +#define LIGHT_TYPE_DIRECTIONAL 0 +#define LIGHT_TYPE_OMNI 1 +#define LIGHT_TYPE_SPOT 2 + +#ifdef MODE_COMPUTE_LIGHT + +struct Light { + uint type; + float energy; + float radius; + float attenuation; + + vec3 color; + float cos_spot_angle; + + vec3 position; + float inv_spot_attenuation; + + vec3 direction; + bool has_shadow; +}; + +layout(set = 0, binding = 3, std140) uniform Lights { + Light data[MAX_LIGHTS]; +} +lights; + +#endif + +layout(push_constant, binding = 0, std430) uniform Params { + ivec3 limits; + uint stack_size; + + float emission_scale; + float propagation; + float dynamic_range; + + uint light_count; + uint cell_offset; + uint cell_count; + uint pad[2]; +} +params; + +layout(set = 0, binding = 4, std140) uniform Outputs { + vec4 data[]; +} +output; + +#ifdef MODE_COMPUTE_LIGHT + +uint raymarch(float distance, float distance_adv, vec3 from, vec3 direction) { + uint result = NO_CHILDREN; + + ivec3 size = ivec3(max(max(params.limits.x, params.limits.y), params.limits.z)); + + while (distance > -distance_adv) { //use this to avoid precision errors + uint cell = 0; + + ivec3 pos = ivec3(from); + + if (all(greaterThanEqual(pos, ivec3(0))) && all(lessThan(pos, size))) { + ivec3 ofs = ivec3(0); + ivec3 half_size = size / 2; + + for (int i = 0; i < params.stack_size - 1; i++) { + bvec3 greater = greaterThanEqual(pos, ofs + half_size); + + ofs += mix(ivec3(0), half_size, greater); + + uint child = 0; //wonder if this can be done faster + if (greater.x) { + child |= 1; + } + if (greater.y) { + child |= 2; + } + if (greater.z) { + child |= 4; + } + + cell = cell_children.data[cell].children[child]; + if (cell == NO_CHILDREN) { + break; + } + + half_size >>= ivec3(1); + } + + if (cell != NO_CHILDREN) { + return cell; //found cell! + } + } + + from += direction * distance_adv; + distance -= distance_adv; + } + + return NO_CHILDREN; +} + +bool compute_light_vector(uint light, uint cell, vec3 pos, out float attenuation, out vec3 light_pos) { + if (lights.data[light].type == LIGHT_TYPE_DIRECTIONAL) { + light_pos = pos - lights.data[light].direction * length(vec3(params.limits)); + attenuation = 1.0; + } else { + light_pos = lights.data[light].position; + float distance = length(pos - light_pos); + if (distance >= lights.data[light].radius) { + return false; + } + + attenuation = pow(clamp(1.0 - distance / lights.data[light].radius, 0.0001, 1.0), lights.data[light].attenuation); + + if (lights.data[light].type == LIGHT_TYPE_SPOT) { + vec3 rel = normalize(pos - light_pos); + float cos_spot_angle = lights.data[light].cos_spot_angle; + float cos_angle = dot(rel, lights.data[light].direction); + if (cos_angle < cos_spot_angle) { + return false; + } + + float scos = max(cos_angle, cos_spot_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cos_spot_angle)); + attenuation *= 1.0 - pow(spot_rim, lights.data[light].inv_spot_attenuation); + } + } + + return true; +} + +float get_normal_advance(vec3 p_normal) { + vec3 normal = p_normal; + vec3 unorm = abs(normal); + + if ((unorm.x >= unorm.y) && (unorm.x >= unorm.z)) { + // x code + unorm = normal.x > 0.0 ? vec3(1.0, 0.0, 0.0) : vec3(-1.0, 0.0, 0.0); + } else if ((unorm.y > unorm.x) && (unorm.y >= unorm.z)) { + // y code + unorm = normal.y > 0.0 ? vec3(0.0, 1.0, 0.0) : vec3(0.0, -1.0, 0.0); + } else if ((unorm.z > unorm.x) && (unorm.z > unorm.y)) { + // z code + unorm = normal.z > 0.0 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 0.0, -1.0); + } else { + // oh-no we messed up code + // has to be + unorm = vec3(1.0, 0.0, 0.0); + } + + return 1.0 / dot(normal, unorm); +} + +#endif + +void main() { + uint cell_index = gl_GlobalInvocationID.x; + if (cell_index >= params.cell_count) { + return; + } + cell_index += params.cell_offset; + + uvec3 posu = uvec3(cell_data.data[cell_index].position & 0x7FF, (cell_data.data[cell_index].position >> 11) & 0x3FF, cell_data.data[cell_index].position >> 21); + vec4 albedo = unpackUnorm4x8(cell_data.data[cell_index].albedo); + +#ifdef MODE_COMPUTE_LIGHT + + vec3 pos = vec3(posu) + vec3(0.5); + + vec3 emission = vec3(ivec3(cell_data.data[cell_index].emission & 0x3FF, (cell_data.data[cell_index].emission >> 10) & 0x7FF, cell_data.data[cell_index].emission >> 21)) * params.emission_scale; + vec4 normal = unpackSnorm4x8(cell_data.data[cell_index].normal); + +#ifdef MODE_ANISOTROPIC + vec3 accum[6] = vec3[](vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0)); + const vec3 accum_dirs[6] = vec3[](vec3(1.0, 0.0, 0.0), vec3(-1.0, 0.0, 0.0), vec3(0.0, 1.0, 0.0), vec3(0.0, -1.0, 0.0), vec3(0.0, 0.0, 1.0), vec3(0.0, 0.0, -1.0)); +#else + vec3 accum = vec3(0.0); +#endif + + for (uint i = 0; i < params.light_count; i++) { + float attenuation; + vec3 light_pos; + + if (!compute_light_vector(i, cell_index, pos, attenuation, light_pos)) { + continue; + } + + vec3 light_dir = pos - light_pos; + float distance = length(light_dir); + light_dir = normalize(light_dir); + + if (length(normal.xyz) > 0.2 && dot(normal.xyz, light_dir) >= 0) { + continue; //not facing the light + } + + if (lights.data[i].has_shadow) { + float distance_adv = get_normal_advance(light_dir); + + distance += distance_adv - mod(distance, distance_adv); //make it reach the center of the box always + + vec3 from = pos - light_dir * distance; //approximate + from -= sign(light_dir) * 0.45; //go near the edge towards the light direction to avoid self occlusion + + uint result = raymarch(distance, distance_adv, from, light_dir); + + if (result != cell_index) { + continue; //was occluded + } + } + + vec3 light = lights.data[i].color * albedo.rgb * attenuation * lights.data[i].energy; + +#ifdef MODE_ANISOTROPIC + for (uint j = 0; j < 6; j++) { + accum[j] += max(0.0, dot(accum_dir, -light_dir)) * light + emission; + } +#else + if (length(normal.xyz) > 0.2) { + accum += max(0.0, dot(normal.xyz, -light_dir)) * light + emission; + } else { + //all directions + accum += light + emission; + } +#endif + } + +#ifdef MODE_ANISOTROPIC + + output.data[cell_index * 6 + 0] = vec4(accum[0], 0.0); + output.data[cell_index * 6 + 1] = vec4(accum[1], 0.0); + output.data[cell_index * 6 + 2] = vec4(accum[2], 0.0); + output.data[cell_index * 6 + 3] = vec4(accum[3], 0.0); + output.data[cell_index * 6 + 4] = vec4(accum[4], 0.0); + output.data[cell_index * 6 + 5] = vec4(accum[5], 0.0); +#else + output.data[cell_index] = vec4(accum, 0.0); + +#endif + +#endif //MODE_COMPUTE_LIGHT + +#ifdef MODE_UPDATE_MIPMAPS + + { +#ifdef MODE_ANISOTROPIC + vec3 light_accum[6] = vec3[](vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0)); +#else + vec3 light_accum = vec3(0.0); +#endif + float count = 0.0; + for (uint i = 0; i < 8; i++) { + uint child_index = cell_children.data[cell_index].children[i]; + if (child_index == NO_CHILDREN) { + continue; + } +#ifdef MODE_ANISOTROPIC + light_accum[1] += output.data[child_index * 6 + 0].rgb; + light_accum[2] += output.data[child_index * 6 + 1].rgb; + light_accum[3] += output.data[child_index * 6 + 2].rgb; + light_accum[4] += output.data[child_index * 6 + 3].rgb; + light_accum[5] += output.data[child_index * 6 + 4].rgb; + light_accum[6] += output.data[child_index * 6 + 5].rgb; + +#else + light_accum += output.data[child_index].rgb; + +#endif + + count += 1.0; + } + + float divisor = mix(8.0, count, params.propagation); +#ifdef MODE_ANISOTROPIC + output.data[cell_index * 6 + 0] = vec4(light_accum[0] / divisor, 0.0); + output.data[cell_index * 6 + 1] = vec4(light_accum[1] / divisor, 0.0); + output.data[cell_index * 6 + 2] = vec4(light_accum[2] / divisor, 0.0); + output.data[cell_index * 6 + 3] = vec4(light_accum[3] / divisor, 0.0); + output.data[cell_index * 6 + 4] = vec4(light_accum[4] / divisor, 0.0); + output.data[cell_index * 6 + 5] = vec4(light_accum[5] / divisor, 0.0); + +#else + output.data[cell_index] = vec4(light_accum / divisor, 0.0); +#endif + } +#endif + +#ifdef MODE_WRITE_TEXTURE + { + } +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/luminance_reduce.glsl b/servers/rendering/renderer_rd/shaders/luminance_reduce.glsl new file mode 100644 index 0000000000..8a11c35b78 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/luminance_reduce.glsl @@ -0,0 +1,82 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +#define BLOCK_SIZE 8 + +layout(local_size_x = BLOCK_SIZE, local_size_y = BLOCK_SIZE, local_size_z = 1) in; + +shared float tmp_data[BLOCK_SIZE * BLOCK_SIZE]; + +#ifdef READ_TEXTURE + +//use for main texture +layout(set = 0, binding = 0) uniform sampler2D source_texture; + +#else + +//use for intermediate textures +layout(r32f, set = 0, binding = 0) uniform restrict readonly image2D source_luminance; + +#endif + +layout(r32f, set = 1, binding = 0) uniform restrict writeonly image2D dest_luminance; + +#ifdef WRITE_LUMINANCE +layout(set = 2, binding = 0) uniform sampler2D prev_luminance; +#endif + +layout(push_constant, binding = 1, std430) uniform Params { + ivec2 source_size; + float max_luminance; + float min_luminance; + float exposure_adjust; + float pad[3]; +} +params; + +void main() { + uint t = gl_LocalInvocationID.y * BLOCK_SIZE + gl_LocalInvocationID.x; + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + + if (any(lessThan(pos, params.source_size))) { +#ifdef READ_TEXTURE + vec3 v = texelFetch(source_texture, pos, 0).rgb; + tmp_data[t] = max(v.r, max(v.g, v.b)); +#else + tmp_data[t] = imageLoad(source_luminance, pos).r; +#endif + } else { + tmp_data[t] = 0.0; + } + + groupMemoryBarrier(); + barrier(); + + uint size = (BLOCK_SIZE * BLOCK_SIZE) >> 1; + + do { + if (t < size) { + tmp_data[t] += tmp_data[t + size]; + } + groupMemoryBarrier(); + barrier(); + + size >>= 1; + } while (size >= 1); + + if (t == 0) { + //compute rect size + ivec2 rect_size = min(params.source_size - pos, ivec2(BLOCK_SIZE)); + float avg = tmp_data[0] / float(rect_size.x * rect_size.y); + //float avg = tmp_data[0] / float(BLOCK_SIZE*BLOCK_SIZE); + pos /= ivec2(BLOCK_SIZE); +#ifdef WRITE_LUMINANCE + float prev_lum = texelFetch(prev_luminance, ivec2(0, 0), 0).r; //1 pixel previous exposure + avg = clamp(prev_lum + (avg - prev_lum) * params.exposure_adjust, params.min_luminance, params.max_luminance); +#endif + imageStore(dest_luminance, pos, vec4(avg)); + } +} diff --git a/servers/rendering/renderer_rd/shaders/particles.glsl b/servers/rendering/renderer_rd/shaders/particles.glsl new file mode 100644 index 0000000000..cb6d8dc7f6 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/particles.glsl @@ -0,0 +1,549 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +#define SAMPLER_NEAREST_CLAMP 0 +#define SAMPLER_LINEAR_CLAMP 1 +#define SAMPLER_NEAREST_WITH_MIPMAPS_CLAMP 2 +#define SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP 3 +#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_CLAMP 4 +#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_CLAMP 5 +#define SAMPLER_NEAREST_REPEAT 6 +#define SAMPLER_LINEAR_REPEAT 7 +#define SAMPLER_NEAREST_WITH_MIPMAPS_REPEAT 8 +#define SAMPLER_LINEAR_WITH_MIPMAPS_REPEAT 9 +#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_REPEAT 10 +#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_REPEAT 11 + +/* SET 0: GLOBAL DATA */ + +layout(set = 0, binding = 1) uniform sampler material_samplers[12]; + +layout(set = 0, binding = 2, std430) restrict readonly buffer GlobalVariableData { + vec4 data[]; +} +global_variables; + +/* Set 1: FRAME AND PARTICLE DATA */ + +// a frame history is kept for trail deterministic behavior + +#define MAX_ATTRACTORS 32 + +#define ATTRACTOR_TYPE_SPHERE 0 +#define ATTRACTOR_TYPE_BOX 1 +#define ATTRACTOR_TYPE_VECTOR_FIELD 2 + +struct Attractor { + mat4 transform; + vec3 extents; //exents or radius + uint type; + uint texture_index; //texture index for vector field + float strength; + float attenuation; + float directionality; +}; + +#define MAX_COLLIDERS 32 + +#define COLLIDER_TYPE_SPHERE 0 +#define COLLIDER_TYPE_BOX 1 +#define COLLIDER_TYPE_SDF 2 +#define COLLIDER_TYPE_HEIGHT_FIELD 3 + +struct Collider { + mat4 transform; + vec3 extents; //exents or radius + uint type; + + uint texture_index; //texture index for vector field + float scale; + uint pad[2]; +}; + +struct FrameParams { + bool emitting; + float system_phase; + float prev_system_phase; + uint cycle; + + float explosiveness; + float randomness; + float time; + float delta; + + uint random_seed; + uint attractor_count; + uint collider_count; + float particle_size; + + mat4 emission_transform; + + Attractor attractors[MAX_ATTRACTORS]; + Collider colliders[MAX_COLLIDERS]; +}; + +layout(set = 1, binding = 0, std430) restrict buffer FrameHistory { + FrameParams data[]; +} +frame_history; + +struct ParticleData { + mat4 xform; + vec3 velocity; + bool is_active; + vec4 color; + vec4 custom; +}; + +layout(set = 1, binding = 1, std430) restrict buffer Particles { + ParticleData data[]; +} +particles; + +#define EMISSION_FLAG_HAS_POSITION 1 +#define EMISSION_FLAG_HAS_ROTATION_SCALE 2 +#define EMISSION_FLAG_HAS_VELOCITY 4 +#define EMISSION_FLAG_HAS_COLOR 8 +#define EMISSION_FLAG_HAS_CUSTOM 16 + +struct ParticleEmission { + mat4 xform; + vec3 velocity; + uint flags; + vec4 color; + vec4 custom; +}; + +layout(set = 1, binding = 2, std430) restrict buffer SourceEmission { + int particle_count; + uint pad0; + uint pad1; + uint pad2; + ParticleEmission data[]; +} +src_particles; + +layout(set = 1, binding = 3, std430) restrict buffer DestEmission { + int particle_count; + int particle_max; + uint pad1; + uint pad2; + ParticleEmission data[]; +} +dst_particles; + +/* SET 2: COLLIDER/ATTRACTOR TEXTURES */ + +#define MAX_3D_TEXTURES 7 + +layout(set = 2, binding = 0) uniform texture3D sdf_vec_textures[MAX_3D_TEXTURES]; +layout(set = 2, binding = 1) uniform texture2D height_field_texture; + +/* SET 3: MATERIAL */ + +#ifdef USE_MATERIAL_UNIFORMS +layout(set = 3, binding = 0, std140) uniform MaterialUniforms{ + /* clang-format off */ +MATERIAL_UNIFORMS + /* clang-format on */ +} material; +#endif + +layout(push_constant, binding = 0, std430) uniform Params { + float lifetime; + bool clear; + uint total_particles; + uint trail_size; + bool use_fractional_delta; + bool sub_emitter_mode; + bool can_emit; + uint pad; +} +params; + +uint hash(uint x) { + x = ((x >> uint(16)) ^ x) * uint(0x45d9f3b); + x = ((x >> uint(16)) ^ x) * uint(0x45d9f3b); + x = (x >> uint(16)) ^ x; + return x; +} + +bool emit_subparticle(mat4 p_xform, vec3 p_velocity, vec4 p_color, vec4 p_custom, uint p_flags) { + if (!params.can_emit) { + return false; + } + + bool valid = false; + + int dst_index = atomicAdd(dst_particles.particle_count, 1); + + if (dst_index >= dst_particles.particle_max) { + atomicAdd(dst_particles.particle_count, -1); + return false; + } + + dst_particles.data[dst_index].xform = p_xform; + dst_particles.data[dst_index].velocity = p_velocity; + dst_particles.data[dst_index].color = p_color; + dst_particles.data[dst_index].custom = p_custom; + dst_particles.data[dst_index].flags = p_flags; + + return true; +} + +/* clang-format off */ + +COMPUTE_SHADER_GLOBALS + +/* clang-format on */ + +void main() { + uint particle = gl_GlobalInvocationID.x; + + if (particle >= params.total_particles * params.trail_size) { + return; //discard + } + + uint index = particle / params.trail_size; + uint frame = (particle % params.trail_size); + +#define FRAME frame_history.data[frame] +#define PARTICLE particles.data[particle] + + bool apply_forces = true; + bool apply_velocity = true; + float local_delta = FRAME.delta; + + float mass = 1.0; + + bool restart = false; + + bool restart_position = false; + bool restart_rotation_scale = false; + bool restart_velocity = false; + bool restart_color = false; + bool restart_custom = false; + + if (params.clear) { + PARTICLE.color = vec4(1.0); + PARTICLE.custom = vec4(0.0); + PARTICLE.velocity = vec3(0.0); + PARTICLE.is_active = false; + PARTICLE.xform = mat4( + vec4(1.0, 0.0, 0.0, 0.0), + vec4(0.0, 1.0, 0.0, 0.0), + vec4(0.0, 0.0, 1.0, 0.0), + vec4(0.0, 0.0, 0.0, 1.0)); + } + + bool collided = false; + vec3 collision_normal = vec3(0.0); + float collision_depth = 0.0; + + vec3 attractor_force = vec3(0.0); + +#if !defined(DISABLE_VELOCITY) + + if (PARTICLE.is_active) { + PARTICLE.xform[3].xyz += PARTICLE.velocity * local_delta; + } +#endif + + /* Process physics if active */ + + if (PARTICLE.is_active) { + for (uint i = 0; i < FRAME.attractor_count; i++) { + vec3 dir; + float amount; + vec3 rel_vec = PARTICLE.xform[3].xyz - FRAME.attractors[i].transform[3].xyz; + vec3 local_pos = rel_vec * mat3(FRAME.attractors[i].transform); + + switch (FRAME.attractors[i].type) { + case ATTRACTOR_TYPE_SPHERE: { + dir = normalize(rel_vec); + float d = length(local_pos) / FRAME.attractors[i].extents.x; + if (d > 1.0) { + continue; + } + amount = max(0.0, 1.0 - d); + } break; + case ATTRACTOR_TYPE_BOX: { + dir = normalize(rel_vec); + + vec3 abs_pos = abs(local_pos / FRAME.attractors[i].extents); + float d = max(abs_pos.x, max(abs_pos.y, abs_pos.z)); + if (d > 1.0) { + continue; + } + amount = max(0.0, 1.0 - d); + + } break; + case ATTRACTOR_TYPE_VECTOR_FIELD: { + vec3 uvw_pos = (local_pos / FRAME.attractors[i].extents) * 2.0 - 1.0; + if (any(lessThan(uvw_pos, vec3(0.0))) || any(greaterThan(uvw_pos, vec3(1.0)))) { + continue; + } + vec3 s = texture(sampler3D(sdf_vec_textures[FRAME.attractors[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos).xyz; + dir = mat3(FRAME.attractors[i].transform) * normalize(s); //revert direction + amount = length(s); + + } break; + } + amount = pow(amount, FRAME.attractors[i].attenuation); + dir = normalize(mix(dir, FRAME.attractors[i].transform[2].xyz, FRAME.attractors[i].directionality)); + attractor_force -= amount * dir * FRAME.attractors[i].strength; + } + + float particle_size = FRAME.particle_size; + +#ifdef USE_COLLISON_SCALE + + particle_size *= dot(vec3(length(PARTICLE.xform[0].xyz), length(PARTICLE.xform[1].xyz), length(PARTICLE.xform[2].xyz)), vec3(0.33333333333)); + +#endif + + for (uint i = 0; i < FRAME.collider_count; i++) { + vec3 normal; + float depth; + bool col = false; + + vec3 rel_vec = PARTICLE.xform[3].xyz - FRAME.colliders[i].transform[3].xyz; + vec3 local_pos = rel_vec * mat3(FRAME.colliders[i].transform); + + switch (FRAME.colliders[i].type) { + case COLLIDER_TYPE_SPHERE: { + float d = length(rel_vec) - (particle_size + FRAME.colliders[i].extents.x); + + if (d < 0.0) { + col = true; + depth = -d; + normal = normalize(rel_vec); + } + + } break; + case COLLIDER_TYPE_BOX: { + vec3 abs_pos = abs(local_pos); + vec3 sgn_pos = sign(local_pos); + + if (any(greaterThan(abs_pos, FRAME.colliders[i].extents))) { + //point outside box + + vec3 closest = min(abs_pos, FRAME.colliders[i].extents); + vec3 rel = abs_pos - closest; + depth = length(rel) - particle_size; + if (depth < 0.0) { + col = true; + normal = mat3(FRAME.colliders[i].transform) * (normalize(rel) * sgn_pos); + depth = -depth; + } + } else { + //point inside box + vec3 axis_len = FRAME.colliders[i].extents - abs_pos; + // there has to be a faster way to do this? + if (all(lessThan(axis_len.xx, axis_len.yz))) { + normal = vec3(1, 0, 0); + } else if (all(lessThan(axis_len.yy, axis_len.xz))) { + normal = vec3(0, 1, 0); + } else { + normal = vec3(0, 0, 1); + } + + col = true; + depth = dot(normal * axis_len, vec3(1)) + particle_size; + normal = mat3(FRAME.colliders[i].transform) * (normal * sgn_pos); + } + + } break; + case COLLIDER_TYPE_SDF: { + vec3 apos = abs(local_pos); + float extra_dist = 0.0; + if (any(greaterThan(apos, FRAME.colliders[i].extents))) { //outside + vec3 mpos = min(apos, FRAME.colliders[i].extents); + extra_dist = distance(mpos, apos); + } + + if (extra_dist > particle_size) { + continue; + } + + vec3 uvw_pos = (local_pos / FRAME.colliders[i].extents) * 0.5 + 0.5; + float s = texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos).r; + s *= FRAME.colliders[i].scale; + s += extra_dist; + if (s < particle_size) { + col = true; + depth = particle_size - s; + const float EPSILON = 0.001; + normal = mat3(FRAME.colliders[i].transform) * + normalize( + vec3( + texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos + vec3(EPSILON, 0.0, 0.0)).r - texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos - vec3(EPSILON, 0.0, 0.0)).r, + texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos + vec3(0.0, EPSILON, 0.0)).r - texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos - vec3(0.0, EPSILON, 0.0)).r, + texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos + vec3(0.0, 0.0, EPSILON)).r - texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos - vec3(0.0, 0.0, EPSILON)).r)); + } + + } break; + case COLLIDER_TYPE_HEIGHT_FIELD: { + vec3 local_pos_bottom = local_pos; + local_pos_bottom.y -= particle_size; + + if (any(greaterThan(abs(local_pos_bottom), FRAME.colliders[i].extents))) { + continue; + } + + const float DELTA = 1.0 / 8192.0; + + vec3 uvw_pos = vec3(local_pos_bottom / FRAME.colliders[i].extents) * 0.5 + 0.5; + + float y = 1.0 - texture(sampler2D(height_field_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos.xz).r; + + if (y > uvw_pos.y) { + //inside heightfield + + vec3 pos1 = (vec3(uvw_pos.x, y, uvw_pos.z) * 2.0 - 1.0) * FRAME.colliders[i].extents; + vec3 pos2 = (vec3(uvw_pos.x + DELTA, 1.0 - texture(sampler2D(height_field_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos.xz + vec2(DELTA, 0)).r, uvw_pos.z) * 2.0 - 1.0) * FRAME.colliders[i].extents; + vec3 pos3 = (vec3(uvw_pos.x, 1.0 - texture(sampler2D(height_field_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos.xz + vec2(0, DELTA)).r, uvw_pos.z + DELTA) * 2.0 - 1.0) * FRAME.colliders[i].extents; + + normal = normalize(cross(pos1 - pos2, pos1 - pos3)); + float local_y = (vec3(local_pos / FRAME.colliders[i].extents) * 0.5 + 0.5).y; + + col = true; + depth = dot(normal, pos1) - dot(normal, local_pos_bottom); + } + + } break; + } + + if (col) { + if (!collided) { + collided = true; + collision_normal = normal; + collision_depth = depth; + } else { + vec3 c = collision_normal * collision_depth; + c += normal * max(0.0, depth - dot(normal, c)); + collision_normal = normalize(c); + collision_depth = length(c); + } + } + } + } + + if (params.sub_emitter_mode) { + if (!PARTICLE.is_active) { + int src_index = atomicAdd(src_particles.particle_count, -1) - 1; + + if (src_index >= 0) { + PARTICLE.is_active = true; + restart = true; + + if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_POSITION)) { + PARTICLE.xform[3] = src_particles.data[src_index].xform[3]; + } else { + PARTICLE.xform[3] = vec4(0, 0, 0, 1); + restart_position = true; + } + if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_ROTATION_SCALE)) { + PARTICLE.xform[0] = src_particles.data[src_index].xform[0]; + PARTICLE.xform[1] = src_particles.data[src_index].xform[1]; + PARTICLE.xform[2] = src_particles.data[src_index].xform[2]; + } else { + PARTICLE.xform[0] = vec4(1, 0, 0, 0); + PARTICLE.xform[1] = vec4(0, 1, 0, 0); + PARTICLE.xform[2] = vec4(0, 0, 1, 0); + restart_rotation_scale = true; + } + if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_VELOCITY)) { + PARTICLE.velocity = src_particles.data[src_index].velocity; + } else { + PARTICLE.velocity = vec3(0); + restart_velocity = true; + } + if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_COLOR)) { + PARTICLE.color = src_particles.data[src_index].color; + } else { + PARTICLE.color = vec4(1); + restart_color = true; + } + + if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_CUSTOM)) { + PARTICLE.custom = src_particles.data[src_index].custom; + } else { + PARTICLE.custom = vec4(0); + restart_custom = true; + } + } + } + + } else if (FRAME.emitting) { + float restart_phase = float(index) / float(params.total_particles); + + if (FRAME.randomness > 0.0) { + uint seed = FRAME.cycle; + if (restart_phase >= FRAME.system_phase) { + seed -= uint(1); + } + seed *= uint(params.total_particles); + seed += uint(index); + float random = float(hash(seed) % uint(65536)) / 65536.0; + restart_phase += FRAME.randomness * random * 1.0 / float(params.total_particles); + } + + restart_phase *= (1.0 - FRAME.explosiveness); + + if (FRAME.system_phase > FRAME.prev_system_phase) { + // restart_phase >= prev_system_phase is used so particles emit in the first frame they are processed + + if (restart_phase >= FRAME.prev_system_phase && restart_phase < FRAME.system_phase) { + restart = true; + if (params.use_fractional_delta) { + local_delta = (FRAME.system_phase - restart_phase) * params.lifetime; + } + } + + } else if (FRAME.delta > 0.0) { + if (restart_phase >= FRAME.prev_system_phase) { + restart = true; + if (params.use_fractional_delta) { + local_delta = (1.0 - restart_phase + FRAME.system_phase) * params.lifetime; + } + + } else if (restart_phase < FRAME.system_phase) { + restart = true; + if (params.use_fractional_delta) { + local_delta = (FRAME.system_phase - restart_phase) * params.lifetime; + } + } + } + + uint current_cycle = FRAME.cycle; + + if (FRAME.system_phase < restart_phase) { + current_cycle -= uint(1); + } + + uint particle_number = current_cycle * uint(params.total_particles) + particle; + + if (restart) { + PARTICLE.is_active = FRAME.emitting; + restart_position = true; + restart_rotation_scale = true; + restart_velocity = true; + restart_color = true; + restart_custom = true; + } + } + + if (PARTICLE.is_active) { + /* clang-format off */ + +COMPUTE_SHADER_CODE + + /* clang-format on */ + } +} diff --git a/servers/rendering/renderer_rd/shaders/particles_copy.glsl b/servers/rendering/renderer_rd/shaders/particles_copy.glsl new file mode 100644 index 0000000000..6c782b6045 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/particles_copy.glsl @@ -0,0 +1,82 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +struct ParticleData { + mat4 xform; + vec3 velocity; + bool is_active; + vec4 color; + vec4 custom; +}; + +layout(set = 0, binding = 1, std430) restrict readonly buffer Particles { + ParticleData data[]; +} +particles; + +layout(set = 0, binding = 2, std430) restrict writeonly buffer Transforms { + vec4 data[]; +} +instances; + +#ifdef USE_SORT_BUFFER + +layout(set = 1, binding = 0, std430) restrict buffer SortBuffer { + vec2 data[]; +} +sort_buffer; + +#endif // USE_SORT_BUFFER + +layout(push_constant, binding = 0, std430) uniform Params { + vec3 sort_direction; + uint total_particles; +} +params; + +void main() { +#ifdef MODE_FILL_SORT_BUFFER + + uint particle = gl_GlobalInvocationID.x; + if (particle >= params.total_particles) { + return; //discard + } + + sort_buffer.data[particle].x = dot(params.sort_direction, particles.data[particle].xform[3].xyz); + sort_buffer.data[particle].y = float(particle); +#endif + +#ifdef MODE_FILL_INSTANCES + + uint particle = gl_GlobalInvocationID.x; + uint write_offset = gl_GlobalInvocationID.x * (3 + 1 + 1); //xform + color + custom + + if (particle >= params.total_particles) { + return; //discard + } + +#ifdef USE_SORT_BUFFER + particle = uint(sort_buffer.data[particle].y); //use index from sort buffer +#endif + + mat4 txform; + + if (particles.data[particle].is_active) { + txform = transpose(particles.data[particle].xform); + } else { + txform = mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); //zero scale, becomes invisible + } + + instances.data[write_offset + 0] = txform[0]; + instances.data[write_offset + 1] = txform[1]; + instances.data[write_offset + 2] = txform[2]; + instances.data[write_offset + 3] = particles.data[particle].color; + instances.data[write_offset + 4] = particles.data[particle].custom; + +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/resolve.glsl b/servers/rendering/renderer_rd/shaders/resolve.glsl new file mode 100644 index 0000000000..e83c4ca93b --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/resolve.glsl @@ -0,0 +1,220 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#ifdef MODE_RESOLVE_GI +layout(set = 0, binding = 0) uniform sampler2DMS source_depth; +layout(set = 0, binding = 1) uniform sampler2DMS source_normal_roughness; + +layout(r32f, set = 1, binding = 0) uniform restrict writeonly image2D dest_depth; +layout(rgba8, set = 1, binding = 1) uniform restrict writeonly image2D dest_normal_roughness; + +#ifdef GIPROBE_RESOLVE +layout(set = 2, binding = 0) uniform usampler2DMS source_giprobe; +layout(rg8ui, set = 3, binding = 0) uniform restrict writeonly uimage2D dest_giprobe; +#endif + +#endif + +layout(push_constant, binding = 16, std430) uniform Params { + ivec2 screen_size; + int sample_count; + uint pad; +} +params; + +void main() { + // Pixel being shaded + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(pos, params.screen_size))) { //too large, do nothing + return; + } + +#ifdef MODE_RESOLVE_GI + + float best_depth = 1e20; + vec4 best_normal_roughness = vec4(0.0); +#ifdef GIPROBE_RESOLVE + uvec2 best_giprobe; +#endif + +#if 0 + + for(int i=0;i<params.sample_count;i++) { + float depth = texelFetch(source_depth,pos,i).r; + if (depth < best_depth) { //use the depth closest to camera + best_depth = depth; + best_normal_roughness = texelFetch(source_normal_roughness,pos,i); + +#ifdef GIPROBE_RESOLVE + best_giprobe = texelFetch(source_giprobe,pos,i).rg; +#endif + } + } + +#else + +#if 1 + + vec4 group1; + vec4 group2; + vec4 group3; + vec4 group4; + int best_index = 0; + + //2X + group1.x = texelFetch(source_depth, pos, 0).r; + group1.y = texelFetch(source_depth, pos, 1).r; + + //4X + if (params.sample_count >= 4) { + group1.z = texelFetch(source_depth, pos, 2).r; + group1.w = texelFetch(source_depth, pos, 3).r; + } + //8X + if (params.sample_count >= 8) { + group2.x = texelFetch(source_depth, pos, 4).r; + group2.y = texelFetch(source_depth, pos, 5).r; + group2.z = texelFetch(source_depth, pos, 6).r; + group2.w = texelFetch(source_depth, pos, 7).r; + } + //16X + if (params.sample_count >= 16) { + group3.x = texelFetch(source_depth, pos, 8).r; + group3.y = texelFetch(source_depth, pos, 9).r; + group3.z = texelFetch(source_depth, pos, 10).r; + group3.w = texelFetch(source_depth, pos, 11).r; + + group4.x = texelFetch(source_depth, pos, 12).r; + group4.y = texelFetch(source_depth, pos, 13).r; + group4.z = texelFetch(source_depth, pos, 14).r; + group4.w = texelFetch(source_depth, pos, 15).r; + } + + if (params.sample_count == 2) { + best_index = (pos.x & 1) ^ ((pos.y >> 1) & 1); //not much can be done here + } else if (params.sample_count == 4) { + vec4 freq = vec4(equal(group1, vec4(group1.x))); + freq += vec4(equal(group1, vec4(group1.y))); + freq += vec4(equal(group1, vec4(group1.z))); + freq += vec4(equal(group1, vec4(group1.w))); + + float min_f = freq.x; + best_index = 0; + if (freq.y < min_f) { + best_index = 1; + min_f = freq.y; + } + if (freq.z < min_f) { + best_index = 2; + min_f = freq.z; + } + if (freq.w < min_f) { + best_index = 3; + } + } else if (params.sample_count == 8) { + vec4 freq0 = vec4(equal(group1, vec4(group1.x))); + vec4 freq1 = vec4(equal(group2, vec4(group1.x))); + freq0 += vec4(equal(group1, vec4(group1.y))); + freq1 += vec4(equal(group2, vec4(group1.y))); + freq0 += vec4(equal(group1, vec4(group1.z))); + freq1 += vec4(equal(group2, vec4(group1.z))); + freq0 += vec4(equal(group1, vec4(group1.w))); + freq1 += vec4(equal(group2, vec4(group1.w))); + freq0 += vec4(equal(group1, vec4(group2.x))); + freq1 += vec4(equal(group2, vec4(group2.x))); + freq0 += vec4(equal(group1, vec4(group2.y))); + freq1 += vec4(equal(group2, vec4(group2.y))); + freq0 += vec4(equal(group1, vec4(group2.z))); + freq1 += vec4(equal(group2, vec4(group2.z))); + freq0 += vec4(equal(group1, vec4(group2.w))); + freq1 += vec4(equal(group2, vec4(group2.w))); + + float min_f0 = freq0.x; + int best_index0 = 0; + if (freq0.y < min_f0) { + best_index0 = 1; + min_f0 = freq0.y; + } + if (freq0.z < min_f0) { + best_index0 = 2; + min_f0 = freq0.z; + } + if (freq0.w < min_f0) { + best_index0 = 3; + min_f0 = freq0.w; + } + + float min_f1 = freq1.x; + int best_index1 = 4; + if (freq1.y < min_f1) { + best_index1 = 5; + min_f1 = freq1.y; + } + if (freq1.z < min_f1) { + best_index1 = 6; + min_f1 = freq1.z; + } + if (freq1.w < min_f1) { + best_index1 = 7; + min_f1 = freq1.w; + } + + best_index = mix(best_index0, best_index1, min_f0 < min_f1); + } + +#else + float depths[16]; + int depth_indices[16]; + int depth_amount[16]; + int depth_count = 0; + + for (int i = 0; i < params.sample_count; i++) { + float depth = texelFetch(source_depth, pos, i).r; + int depth_index = -1; + for (int j = 0; j < depth_count; j++) { + if (abs(depths[j] - depth) < 0.000001) { + depth_index = j; + break; + } + } + + if (depth_index == -1) { + depths[depth_count] = depth; + depth_indices[depth_count] = i; + depth_amount[depth_count] = 1; + depth_count += 1; + } else { + depth_amount[depth_index] += 1; + } + } + + int depth_least = 0xFFFF; + int best_index = 0; + for (int j = 0; j < depth_count; j++) { + if (depth_amount[j] < depth_least) { + best_index = depth_indices[j]; + depth_least = depth_amount[j]; + } + } +#endif + best_depth = texelFetch(source_depth, pos, best_index).r; + best_normal_roughness = texelFetch(source_normal_roughness, pos, best_index); +#ifdef GIPROBE_RESOLVE + best_giprobe = texelFetch(source_giprobe, pos, best_index).rg; +#endif + +#endif + + imageStore(dest_depth, pos, vec4(best_depth)); + imageStore(dest_normal_roughness, pos, vec4(best_normal_roughness)); +#ifdef GIPROBE_RESOLVE + imageStore(dest_giprobe, pos, uvec4(best_giprobe, 0, 0)); +#endif + +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/roughness_limiter.glsl b/servers/rendering/renderer_rd/shaders/roughness_limiter.glsl new file mode 100644 index 0000000000..464895928a --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/roughness_limiter.glsl @@ -0,0 +1,70 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform sampler2D source_normal; +layout(r8, set = 1, binding = 0) uniform restrict writeonly image2D dest_roughness; + +layout(push_constant, binding = 1, std430) uniform Params { + ivec2 screen_size; + float curve; + uint pad; +} +params; + +#define HALF_PI 1.5707963267948966 + +void main() { + // Pixel being shaded + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThan(pos, params.screen_size))) { //too large, do nothing + return; + } + + vec3 normal_accum = vec3(0.0); + float accum = 0.0; + for (int i = 0; i <= 1; i++) { + for (int j = 0; j <= 1; j++) { + normal_accum += normalize(texelFetch(source_normal, pos + ivec2(i, j), 0).xyz * 2.0 - 1.0); + accum += 1.0; + } + } + + normal_accum /= accum; + + float r = length(normal_accum); + + float limit; + + if (r < 1.0) { + float threshold = 0.4; + + /* + //Formula from Filament, does not make sense to me. + + float r2 = r * r; + float kappa = (3.0f * r - r * r2) / (1.0f - r2); + float variance = 0.25f / kappa; + limit = sqrt(min(2.0f * variance, threshold * threshold)); + */ + /* + //Formula based on probability distribution graph + + float width = acos(max(0.0,r)); // convert to angle (width) + float roughness = pow(width,1.7)*0.854492; //approximate (crappy) formula to convert to roughness + limit = min(sqrt(roughness), threshold); //convert to perceptual roughness and apply threshold + */ + + limit = min(sqrt(pow(acos(max(0.0, r)) / HALF_PI, params.curve)), threshold); //convert to perceptual roughness and apply threshold + + //limit = 0.5; + } else { + limit = 0.0; + } + + imageStore(dest_roughness, pos, vec4(limit)); +} diff --git a/servers/rendering/renderer_rd/shaders/scene_forward.glsl b/servers/rendering/renderer_rd/shaders/scene_forward.glsl new file mode 100644 index 0000000000..1cea9bf8db --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/scene_forward.glsl @@ -0,0 +1,3293 @@ +#[vertex] + +#version 450 + +VERSION_DEFINES + +#include "scene_forward_inc.glsl" + +/* INPUT ATTRIBS */ + +layout(location = 0) in vec3 vertex_attrib; + +//only for pure render depth when normal is not used + +#ifdef NORMAL_USED +layout(location = 1) in vec3 normal_attrib; +#endif + +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) +layout(location = 2) in vec4 tangent_attrib; +#endif + +#if defined(COLOR_USED) +layout(location = 3) in vec4 color_attrib; +#endif + +#ifdef UV_USED +layout(location = 4) in vec2 uv_attrib; +#endif + +#if defined(UV2_USED) || defined(USE_LIGHTMAP) || defined(MODE_RENDER_MATERIAL) +layout(location = 5) in vec2 uv2_attrib; +#endif + +#if defined(CUSTOM0_USED) +layout(location = 6) in vec4 custom0_attrib; +#endif + +#if defined(CUSTOM1_USED) +layout(location = 7) in vec4 custom1_attrib; +#endif + +#if defined(CUSTOM2_USED) +layout(location = 8) in vec4 custom2_attrib; +#endif + +#if defined(CUSTOM3_USED) +layout(location = 9) in vec4 custom3_attrib; +#endif + +#if defined(BONES_USED) +layout(location = 10) in uvec4 bone_attrib; +#endif + +#if defined(WEIGHTS_USED) +layout(location = 11) in vec4 weight_attrib; +#endif + +/* Varyings */ + +layout(location = 0) out vec3 vertex_interp; + +#ifdef NORMAL_USED +layout(location = 1) out vec3 normal_interp; +#endif + +#if defined(COLOR_USED) +layout(location = 2) out vec4 color_interp; +#endif + +#ifdef UV_USED +layout(location = 3) out vec2 uv_interp; +#endif + +#if defined(UV2_USED) || defined(USE_LIGHTMAP) +layout(location = 4) out vec2 uv2_interp; +#endif + +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) +layout(location = 5) out vec3 tangent_interp; +layout(location = 6) out vec3 binormal_interp; +#endif + +#ifdef USE_MATERIAL_UNIFORMS +layout(set = MATERIAL_UNIFORM_SET, binding = 0, std140) uniform MaterialUniforms{ + /* clang-format off */ +MATERIAL_UNIFORMS + /* clang-format on */ +} material; +#endif + +invariant gl_Position; + +#ifdef MODE_DUAL_PARABOLOID + +layout(location = 8) out float dp_clip; + +#endif + +layout(location = 9) out flat uint instance_index; + +/* clang-format off */ + +VERTEX_SHADER_GLOBALS + +/* clang-format on */ + +void main() { + vec4 instance_custom = vec4(0.0); +#if defined(COLOR_USED) + color_interp = color_attrib; +#endif + + instance_index = draw_call.instance_index; + + bool is_multimesh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH); + if (!is_multimesh) { + instance_index += gl_InstanceIndex; + } + + mat4 world_matrix = instances.data[instance_index].transform; + + mat3 world_normal_matrix; + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) { + world_normal_matrix = inverse(mat3(world_matrix)); + } else { + world_normal_matrix = mat3(world_matrix); + } + + if (is_multimesh) { + //multimesh, instances are for it + uint offset = (instances.data[instance_index].flags >> INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT) & INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK; + offset *= gl_InstanceIndex; + + mat4 matrix; + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) { + matrix = mat4(transforms.data[offset + 0], transforms.data[offset + 1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); + offset += 2; + } else { + matrix = mat4(transforms.data[offset + 0], transforms.data[offset + 1], transforms.data[offset + 2], vec4(0.0, 0.0, 0.0, 1.0)); + offset += 3; + } + + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_HAS_COLOR)) { +#ifdef COLOR_USED + color_interp *= transforms.data[offset]; +#endif + offset += 1; + } + + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA)) { + instance_custom = transforms.data[offset]; + } + + //transpose + matrix = transpose(matrix); + world_matrix = world_matrix * matrix; + world_normal_matrix = world_normal_matrix * mat3(matrix); + } + + vec3 vertex = vertex_attrib; +#ifdef NORMAL_USED + vec3 normal = normal_attrib * 2.0 - 1.0; +#endif + +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) + vec3 tangent = tangent_attrib.xyz * 2.0 - 1.0; + float binormalf = tangent_attrib.a * 2.0 - 1.0; + vec3 binormal = normalize(cross(normal, tangent) * binormalf); +#endif + +#if 0 + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_SKELETON)) { + //multimesh, instances are for it + + uvec2 bones_01 = uvec2(bone_attrib.x & 0xFFFF, bone_attrib.x >> 16) * 3; + uvec2 bones_23 = uvec2(bone_attrib.y & 0xFFFF, bone_attrib.y >> 16) * 3; + vec2 weights_01 = unpackUnorm2x16(bone_attrib.z); + vec2 weights_23 = unpackUnorm2x16(bone_attrib.w); + + mat4 m = mat4(transforms.data[bones_01.x], transforms.data[bones_01.x + 1], transforms.data[bones_01.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.x; + m += mat4(transforms.data[bones_01.y], transforms.data[bones_01.y + 1], transforms.data[bones_01.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.y; + m += mat4(transforms.data[bones_23.x], transforms.data[bones_23.x + 1], transforms.data[bones_23.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.x; + m += mat4(transforms.data[bones_23.y], transforms.data[bones_23.y + 1], transforms.data[bones_23.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.y; + + //reverse order because its transposed + vertex = (vec4(vertex, 1.0) * m).xyz; + normal = (vec4(normal, 0.0) * m).xyz; + +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) + + tangent = (vec4(tangent, 0.0) * m).xyz; + binormal = (vec4(binormal, 0.0) * m).xyz; +#endif + } +#endif + +#ifdef UV_USED + uv_interp = uv_attrib; +#endif + +#if defined(UV2_USED) || defined(USE_LIGHTMAP) + uv2_interp = uv2_attrib; +#endif + +#ifdef OVERRIDE_POSITION + vec4 position; +#endif + + mat4 projection_matrix = scene_data.projection_matrix; + +//using world coordinates +#if !defined(SKIP_TRANSFORM_USED) && defined(VERTEX_WORLD_COORDS_USED) + + vertex = (world_matrix * vec4(vertex, 1.0)).xyz; + + normal = world_normal_matrix * normal; + +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) + + tangent = world_normal_matrix * tangent; + binormal = world_normal_matrix * binormal; + +#endif +#endif + + float roughness = 1.0; + + mat4 modelview = scene_data.inv_camera_matrix * world_matrix; + mat3 modelview_normal = mat3(scene_data.inv_camera_matrix) * world_normal_matrix; + + { + /* clang-format off */ + +VERTEX_SHADER_CODE + + /* clang-format on */ + } + +// using local coordinates (default) +#if !defined(SKIP_TRANSFORM_USED) && !defined(VERTEX_WORLD_COORDS_USED) + + vertex = (modelview * vec4(vertex, 1.0)).xyz; +#ifdef NORMAL_USED + normal = modelview_normal * normal; +#endif + +#endif + +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) + + binormal = modelview_normal * binormal; + tangent = modelview_normal * tangent; +#endif + +//using world coordinates +#if !defined(SKIP_TRANSFORM_USED) && defined(VERTEX_WORLD_COORDS_USED) + + vertex = (scene_data.inv_camera_matrix * vec4(vertex, 1.0)).xyz; + normal = mat3(scene_data.inverse_normal_matrix) * normal; + +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) + + binormal = mat3(scene_data.camera_inverse_binormal_matrix) * binormal; + tangent = mat3(scene_data.camera_inverse_tangent_matrix) * tangent; +#endif +#endif + + vertex_interp = vertex; +#ifdef NORMAL_USED + normal_interp = normal; +#endif + +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) + tangent_interp = tangent; + binormal_interp = binormal; +#endif + +#ifdef MODE_RENDER_DEPTH + +#ifdef MODE_DUAL_PARABOLOID + + vertex_interp.z *= scene_data.dual_paraboloid_side; + + dp_clip = vertex_interp.z; //this attempts to avoid noise caused by objects sent to the other parabolloid side due to bias + + //for dual paraboloid shadow mapping, this is the fastest but least correct way, as it curves straight edges + + vec3 vtx = vertex_interp; + float distance = length(vtx); + vtx = normalize(vtx); + vtx.xy /= 1.0 - vtx.z; + vtx.z = (distance / scene_data.z_far); + vtx.z = vtx.z * 2.0 - 1.0; + vertex_interp = vtx; + +#endif + +#endif //MODE_RENDER_DEPTH + +#ifdef OVERRIDE_POSITION + gl_Position = position; +#else + gl_Position = projection_matrix * vec4(vertex_interp, 1.0); +#endif + +#ifdef MODE_RENDER_DEPTH + if (scene_data.pancake_shadows) { + if (gl_Position.z <= 0.00001) { + gl_Position.z = 0.00001; + } + } +#endif +#ifdef MODE_RENDER_MATERIAL + if (scene_data.material_uv2_mode) { + vec2 uv_offset = unpackHalf2x16(draw_call.uv_offset); + gl_Position.xy = (uv2_attrib.xy + uv_offset) * 2.0 - 1.0; + gl_Position.z = 0.00001; + gl_Position.w = 1.0; + } +#endif +} + +#[fragment] + +#version 450 + +VERSION_DEFINES + +#include "scene_forward_inc.glsl" + +/* Varyings */ + +layout(location = 0) in vec3 vertex_interp; + +#ifdef NORMAL_USED +layout(location = 1) in vec3 normal_interp; +#endif + +#if defined(COLOR_USED) +layout(location = 2) in vec4 color_interp; +#endif + +#ifdef UV_USED +layout(location = 3) in vec2 uv_interp; +#endif + +#if defined(UV2_USED) || defined(USE_LIGHTMAP) +layout(location = 4) in vec2 uv2_interp; +#endif + +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) +layout(location = 5) in vec3 tangent_interp; +layout(location = 6) in vec3 binormal_interp; +#endif + +#ifdef MODE_DUAL_PARABOLOID + +layout(location = 8) in float dp_clip; + +#endif + +layout(location = 9) in flat uint instance_index; + +//defines to keep compatibility with vertex + +#define world_matrix instances.data[instance_index].transform +#define projection_matrix scene_data.projection_matrix + +#if defined(ENABLE_SSS) && defined(ENABLE_TRANSMITTANCE) +//both required for transmittance to be enabled +#define LIGHT_TRANSMITTANCE_USED +#endif + +#ifdef USE_MATERIAL_UNIFORMS +layout(set = MATERIAL_UNIFORM_SET, binding = 0, std140) uniform MaterialUniforms{ + /* clang-format off */ +MATERIAL_UNIFORMS + /* clang-format on */ +} material; +#endif + +/* clang-format off */ + +FRAGMENT_SHADER_GLOBALS + +/* clang-format on */ + +#ifdef MODE_RENDER_DEPTH + +#ifdef MODE_RENDER_MATERIAL + +layout(location = 0) out vec4 albedo_output_buffer; +layout(location = 1) out vec4 normal_output_buffer; +layout(location = 2) out vec4 orm_output_buffer; +layout(location = 3) out vec4 emission_output_buffer; +layout(location = 4) out float depth_output_buffer; + +#endif + +#ifdef MODE_RENDER_NORMAL_ROUGHNESS +layout(location = 0) out vec4 normal_roughness_output_buffer; + +#ifdef MODE_RENDER_GIPROBE +layout(location = 1) out uvec2 giprobe_buffer; +#endif + +#endif //MODE_RENDER_NORMAL +#else // RENDER DEPTH + +#ifdef MODE_MULTIPLE_RENDER_TARGETS + +layout(location = 0) out vec4 diffuse_buffer; //diffuse (rgb) and roughness +layout(location = 1) out vec4 specular_buffer; //specular and SSS (subsurface scatter) +#else + +layout(location = 0) out vec4 frag_color; +#endif + +#endif // RENDER DEPTH + +#ifdef ALPHA_HASH_USED + +float hash_2d(vec2 p) { + return fract(1.0e4 * sin(17.0 * p.x + 0.1 * p.y) * + (0.1 + abs(sin(13.0 * p.y + p.x)))); +} + +float hash_3d(vec3 p) { + return hash_2d(vec2(hash_2d(p.xy), p.z)); +} + +float compute_alpha_hash_threshold(vec3 pos, float hash_scale) { + vec3 dx = dFdx(pos); + vec3 dy = dFdx(pos); + float delta_max_sqr = max(length(dx), length(dy)); + float pix_scale = 1.0 / (hash_scale * delta_max_sqr); + + vec2 pix_scales = + vec2(exp2(floor(log2(pix_scale))), exp2(ceil(log2(pix_scale)))); + + vec2 a_thresh = vec2(hash_3d(floor(pix_scales.x * pos.xyz)), + hash_3d(floor(pix_scales.y * pos.xyz))); + + float lerp_factor = fract(log2(pix_scale)); + + float a_interp = (1.0 - lerp_factor) * a_thresh.x + lerp_factor * a_thresh.y; + + float min_lerp = min(lerp_factor, 1.0 - lerp_factor); + + vec3 cases = vec3(a_interp * a_interp / (2.0 * min_lerp * (1.0 - min_lerp)), + (a_interp - 0.5 * min_lerp) / (1.0 - min_lerp), + 1.0 - ((1.0 - a_interp) * (1.0 - a_interp) / + (2.0 * min_lerp * (1.0 - min_lerp)))); + + float alpha_hash_threshold = + (lerp_factor < (1.0 - min_lerp)) ? ((lerp_factor < min_lerp) ? cases.x : cases.y) : cases.z; + + return clamp(alpha_hash_threshold, 0.0, 1.0); +} + +#endif // ALPHA_HASH_USED + +#ifdef ALPHA_ANTIALIASING_EDGE_USED + +float calc_mip_level(vec2 texture_coord) { + vec2 dx = dFdx(texture_coord); + vec2 dy = dFdy(texture_coord); + float delta_max_sqr = max(dot(dx, dx), dot(dy, dy)); + return max(0.0, 0.5 * log2(delta_max_sqr)); +} + +float compute_alpha_antialiasing_edge(float input_alpha, vec2 texture_coord, float alpha_edge) { + input_alpha *= 1.0 + max(0, calc_mip_level(texture_coord)) * 0.25; // 0.25 mip scale, magic number + input_alpha = (input_alpha - alpha_edge) / max(fwidth(input_alpha), 0.0001) + 0.5; + return clamp(input_alpha, 0.0, 1.0); +} + +#endif // ALPHA_ANTIALIASING_USED + +// This returns the G_GGX function divided by 2 cos_theta_m, where in practice cos_theta_m is either N.L or N.V. +// We're dividing this factor off because the overall term we'll end up looks like +// (see, for example, the first unnumbered equation in B. Burley, "Physically Based Shading at Disney", SIGGRAPH 2012): +// +// F(L.V) D(N.H) G(N.L) G(N.V) / (4 N.L N.V) +// +// We're basically regouping this as +// +// F(L.V) D(N.H) [G(N.L)/(2 N.L)] [G(N.V) / (2 N.V)] +// +// and thus, this function implements the [G(N.m)/(2 N.m)] part with m = L or V. +// +// The contents of the D and G (G1) functions (GGX) are taken from +// E. Heitz, "Understanding the Masking-Shadowing Function in Microfacet-Based BRDFs", J. Comp. Graph. Tech. 3 (2) (2014). +// Eqns 71-72 and 85-86 (see also Eqns 43 and 80). + +#if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) + +float G_GGX_2cos(float cos_theta_m, float alpha) { + // Schlick's approximation + // C. Schlick, "An Inexpensive BRDF Model for Physically-based Rendering", Computer Graphics Forum. 13 (3): 233 (1994) + // Eq. (19), although see Heitz (2014) the about the problems with his derivation. + // It nevertheless approximates GGX well with k = alpha/2. + float k = 0.5 * alpha; + return 0.5 / (cos_theta_m * (1.0 - k) + k); + + // float cos2 = cos_theta_m * cos_theta_m; + // float sin2 = (1.0 - cos2); + // return 1.0 / (cos_theta_m + sqrt(cos2 + alpha * alpha * sin2)); +} + +float D_GGX(float cos_theta_m, float alpha) { + float alpha2 = alpha * alpha; + float d = 1.0 + (alpha2 - 1.0) * cos_theta_m * cos_theta_m; + return alpha2 / (M_PI * d * d); +} + +float G_GGX_anisotropic_2cos(float cos_theta_m, float alpha_x, float alpha_y, float cos_phi, float sin_phi) { + float cos2 = cos_theta_m * cos_theta_m; + float sin2 = (1.0 - cos2); + float s_x = alpha_x * cos_phi; + float s_y = alpha_y * sin_phi; + return 1.0 / max(cos_theta_m + sqrt(cos2 + (s_x * s_x + s_y * s_y) * sin2), 0.001); +} + +float D_GGX_anisotropic(float cos_theta_m, float alpha_x, float alpha_y, float cos_phi, float sin_phi) { + float cos2 = cos_theta_m * cos_theta_m; + float sin2 = (1.0 - cos2); + float r_x = cos_phi / alpha_x; + float r_y = sin_phi / alpha_y; + float d = cos2 + sin2 * (r_x * r_x + r_y * r_y); + return 1.0 / max(M_PI * alpha_x * alpha_y * d * d, 0.001); +} + +float SchlickFresnel(float u) { + float m = 1.0 - u; + float m2 = m * m; + return m2 * m2 * m; // pow(m,5) +} + +float GTR1(float NdotH, float a) { + if (a >= 1.0) + return 1.0 / M_PI; + float a2 = a * a; + float t = 1.0 + (a2 - 1.0) * NdotH * NdotH; + return (a2 - 1.0) / (M_PI * log(a2) * t); +} + +vec3 F0(float metallic, float specular, vec3 albedo) { + float dielectric = 0.16 * specular * specular; + // use albedo * metallic as colored specular reflectance at 0 angle for metallic materials; + // see https://google.github.io/filament/Filament.md.html + return mix(vec3(dielectric), albedo, vec3(metallic)); +} + +void light_compute(vec3 N, vec3 L, vec3 V, vec3 light_color, float attenuation, vec3 f0, uint orms, float specular_amount, +#ifdef LIGHT_BACKLIGHT_USED + vec3 backlight, +#endif +#ifdef LIGHT_TRANSMITTANCE_USED + vec4 transmittance_color, + float transmittance_depth, + float transmittance_curve, + float transmittance_boost, + float transmittance_z, +#endif +#ifdef LIGHT_RIM_USED + float rim, float rim_tint, vec3 rim_color, +#endif +#ifdef LIGHT_CLEARCOAT_USED + float clearcoat, float clearcoat_gloss, +#endif +#ifdef LIGHT_ANISOTROPY_USED + vec3 B, vec3 T, float anisotropy, +#endif +#ifdef USE_SOFT_SHADOWS + float A, +#endif +#ifdef USE_SHADOW_TO_OPACITY + inout float alpha, +#endif + inout vec3 diffuse_light, inout vec3 specular_light) { + +#if defined(USE_LIGHT_SHADER_CODE) + // light is written by the light shader + + vec3 normal = N; + vec3 light = L; + vec3 view = V; + + /* clang-format off */ + +LIGHT_SHADER_CODE + + /* clang-format on */ + +#else + +#ifdef USE_SOFT_SHADOWS + float NdotL = min(A + dot(N, L), 1.0); +#else + float NdotL = dot(N, L); +#endif + float cNdotL = max(NdotL, 0.0); // clamped NdotL + float NdotV = dot(N, V); + float cNdotV = max(NdotV, 0.0); + +#if defined(DIFFUSE_BURLEY) || defined(SPECULAR_BLINN) || defined(SPECULAR_SCHLICK_GGX) || defined(LIGHT_CLEARCOAT_USED) + vec3 H = normalize(V + L); +#endif + +#if defined(SPECULAR_BLINN) || defined(SPECULAR_SCHLICK_GGX) || defined(LIGHT_CLEARCOAT_USED) +#ifdef USE_SOFT_SHADOWS + float cNdotH = clamp(A + dot(N, H), 0.0, 1.0); +#else + float cNdotH = clamp(dot(N, H), 0.0, 1.0); +#endif +#endif + +#if defined(DIFFUSE_BURLEY) || defined(SPECULAR_SCHLICK_GGX) || defined(LIGHT_CLEARCOAT_USED) +#ifdef USE_SOFT_SHADOWS + float cLdotH = clamp(A + dot(L, H), 0.0, 1.0); +#else + float cLdotH = clamp(dot(L, H), 0.0, 1.0); +#endif +#endif + + float metallic = unpackUnorm4x8(orms).z; + if (metallic < 1.0) { + float roughness = unpackUnorm4x8(orms).y; + +#if defined(DIFFUSE_OREN_NAYAR) + vec3 diffuse_brdf_NL; +#else + float diffuse_brdf_NL; // BRDF times N.L for calculating diffuse radiance +#endif + +#if defined(DIFFUSE_LAMBERT_WRAP) + // energy conserving lambert wrap shader + diffuse_brdf_NL = max(0.0, (NdotL + roughness) / ((1.0 + roughness) * (1.0 + roughness))); +#elif defined(DIFFUSE_TOON) + + diffuse_brdf_NL = smoothstep(-roughness, max(roughness, 0.01), NdotL); + +#elif defined(DIFFUSE_BURLEY) + + { + float FD90_minus_1 = 2.0 * cLdotH * cLdotH * roughness - 0.5; + float FdV = 1.0 + FD90_minus_1 * SchlickFresnel(cNdotV); + float FdL = 1.0 + FD90_minus_1 * SchlickFresnel(cNdotL); + diffuse_brdf_NL = (1.0 / M_PI) * FdV * FdL * cNdotL; + /* + float energyBias = mix(roughness, 0.0, 0.5); + float energyFactor = mix(roughness, 1.0, 1.0 / 1.51); + float fd90 = energyBias + 2.0 * VoH * VoH * roughness; + float f0 = 1.0; + float lightScatter = f0 + (fd90 - f0) * pow(1.0 - cNdotL, 5.0); + float viewScatter = f0 + (fd90 - f0) * pow(1.0 - cNdotV, 5.0); + + diffuse_brdf_NL = lightScatter * viewScatter * energyFactor; + */ + } +#else + // lambert + diffuse_brdf_NL = cNdotL * (1.0 / M_PI); +#endif + + diffuse_light += light_color * diffuse_brdf_NL * attenuation; + +#if defined(LIGHT_BACKLIGHT_USED) + diffuse_light += light_color * (vec3(1.0 / M_PI) - diffuse_brdf_NL) * backlight * attenuation; +#endif + +#if defined(LIGHT_RIM_USED) + float rim_light = pow(max(0.0, 1.0 - cNdotV), max(0.0, (1.0 - roughness) * 16.0)); + diffuse_light += rim_light * rim * mix(vec3(1.0), rim_color, rim_tint) * light_color; +#endif + +#ifdef LIGHT_TRANSMITTANCE_USED + +#ifdef SSS_MODE_SKIN + + { + float scale = 8.25 / transmittance_depth; + float d = scale * abs(transmittance_z); + float dd = -d * d; + vec3 profile = vec3(0.233, 0.455, 0.649) * exp(dd / 0.0064) + + vec3(0.1, 0.336, 0.344) * exp(dd / 0.0484) + + vec3(0.118, 0.198, 0.0) * exp(dd / 0.187) + + vec3(0.113, 0.007, 0.007) * exp(dd / 0.567) + + vec3(0.358, 0.004, 0.0) * exp(dd / 1.99) + + vec3(0.078, 0.0, 0.0) * exp(dd / 7.41); + + diffuse_light += profile * transmittance_color.a * light_color * clamp(transmittance_boost - NdotL, 0.0, 1.0) * (1.0 / M_PI); + } +#else + + if (transmittance_depth > 0.0) { + float fade = clamp(abs(transmittance_z / transmittance_depth), 0.0, 1.0); + + fade = pow(max(0.0, 1.0 - fade), transmittance_curve); + fade *= clamp(transmittance_boost - NdotL, 0.0, 1.0); + + diffuse_light += transmittance_color.rgb * light_color * (1.0 / M_PI) * transmittance_color.a * fade; + } + +#endif //SSS_MODE_SKIN + +#endif //LIGHT_TRANSMITTANCE_USED + } + + float roughness = unpackUnorm4x8(orms).y; + if (roughness > 0.0) { // FIXME: roughness == 0 should not disable specular light entirely + + // D + +#if defined(SPECULAR_BLINN) + + //normalized blinn + float shininess = exp2(15.0 * (1.0 - roughness) + 1.0) * 0.25; + float blinn = pow(cNdotH, shininess) * cNdotL; + blinn *= (shininess + 8.0) * (1.0 / (8.0 * M_PI)); + float intensity = blinn; + + specular_light += light_color * intensity * attenuation * specular_amount; + +#elif defined(SPECULAR_PHONG) + + vec3 R = normalize(-reflect(L, N)); + float cRdotV = clamp(A + dot(R, V), 0.0, 1.0); + float shininess = exp2(15.0 * (1.0 - roughness) + 1.0) * 0.25; + float phong = pow(cRdotV, shininess); + phong *= (shininess + 8.0) * (1.0 / (8.0 * M_PI)); + float intensity = (phong) / max(4.0 * cNdotV * cNdotL, 0.75); + + specular_light += light_color * intensity * attenuation * specular_amount; + +#elif defined(SPECULAR_TOON) + + vec3 R = normalize(-reflect(L, N)); + float RdotV = dot(R, V); + float mid = 1.0 - roughness; + mid *= mid; + float intensity = smoothstep(mid - roughness * 0.5, mid + roughness * 0.5, RdotV) * mid; + diffuse_light += light_color * intensity * attenuation * specular_amount; // write to diffuse_light, as in toon shading you generally want no reflection + +#elif defined(SPECULAR_DISABLED) + // none.. + +#elif defined(SPECULAR_SCHLICK_GGX) + // shlick+ggx as default + +#if defined(LIGHT_ANISOTROPY_USED) + + float alpha_ggx = roughness * roughness; + float aspect = sqrt(1.0 - anisotropy * 0.9); + float ax = alpha_ggx / aspect; + float ay = alpha_ggx * aspect; + float XdotH = dot(T, H); + float YdotH = dot(B, H); + float D = D_GGX_anisotropic(cNdotH, ax, ay, XdotH, YdotH); + float G = G_GGX_anisotropic_2cos(cNdotL, ax, ay, XdotH, YdotH) * G_GGX_anisotropic_2cos(cNdotV, ax, ay, XdotH, YdotH); + +#else + float alpha_ggx = roughness * roughness; + float D = D_GGX(cNdotH, alpha_ggx); + float G = G_GGX_2cos(cNdotL, alpha_ggx) * G_GGX_2cos(cNdotV, alpha_ggx); +#endif + // F + float cLdotH5 = SchlickFresnel(cLdotH); + vec3 F = mix(vec3(cLdotH5), vec3(1.0), f0); + + vec3 specular_brdf_NL = cNdotL * D * F * G; + + specular_light += specular_brdf_NL * light_color * attenuation * specular_amount; +#endif + +#if defined(LIGHT_CLEARCOAT_USED) + +#if !defined(SPECULAR_SCHLICK_GGX) + float cLdotH5 = SchlickFresnel(cLdotH); +#endif + float Dr = GTR1(cNdotH, mix(.1, .001, clearcoat_gloss)); + float Fr = mix(.04, 1.0, cLdotH5); + float Gr = G_GGX_2cos(cNdotL, .25) * G_GGX_2cos(cNdotV, .25); + + float clearcoat_specular_brdf_NL = 0.25 * clearcoat * Gr * Fr * Dr * cNdotL; + + specular_light += clearcoat_specular_brdf_NL * light_color * attenuation * specular_amount; +#endif + } + +#ifdef USE_SHADOW_TO_OPACITY + alpha = min(alpha, clamp(1.0 - attenuation), 0.0, 1.0)); +#endif + +#endif //defined(USE_LIGHT_SHADER_CODE) +} + +#ifndef USE_NO_SHADOWS + +// Interleaved Gradient Noise +// http://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare +float quick_hash(vec2 pos) { + const vec3 magic = vec3(0.06711056f, 0.00583715f, 52.9829189f); + return fract(magic.z * fract(dot(pos, magic.xy))); +} + +float sample_directional_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, vec4 coord) { + vec2 pos = coord.xy; + float depth = coord.z; + + //if only one sample is taken, take it from the center + if (scene_data.directional_soft_shadow_samples == 1) { + return textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos, depth, 1.0)); + } + + mat2 disk_rotation; + { + float r = quick_hash(gl_FragCoord.xy) * 2.0 * M_PI; + float sr = sin(r); + float cr = cos(r); + disk_rotation = mat2(vec2(cr, -sr), vec2(sr, cr)); + } + + float avg = 0.0; + + for (uint i = 0; i < scene_data.directional_soft_shadow_samples; i++) { + avg += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos + shadow_pixel_size * (disk_rotation * scene_data.directional_soft_shadow_kernel[i].xy), depth, 1.0)); + } + + return avg * (1.0 / float(scene_data.directional_soft_shadow_samples)); +} + +float sample_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, vec4 coord) { + vec2 pos = coord.xy; + float depth = coord.z; + + //if only one sample is taken, take it from the center + if (scene_data.soft_shadow_samples == 1) { + return textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos, depth, 1.0)); + } + + mat2 disk_rotation; + { + float r = quick_hash(gl_FragCoord.xy) * 2.0 * M_PI; + float sr = sin(r); + float cr = cos(r); + disk_rotation = mat2(vec2(cr, -sr), vec2(sr, cr)); + } + + float avg = 0.0; + + for (uint i = 0; i < scene_data.soft_shadow_samples; i++) { + avg += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos + shadow_pixel_size * (disk_rotation * scene_data.soft_shadow_kernel[i].xy), depth, 1.0)); + } + + return avg * (1.0 / float(scene_data.soft_shadow_samples)); +} + +float sample_directional_soft_shadow(texture2D shadow, vec3 pssm_coord, vec2 tex_scale) { + //find blocker + float blocker_count = 0.0; + float blocker_average = 0.0; + + mat2 disk_rotation; + { + float r = quick_hash(gl_FragCoord.xy) * 2.0 * M_PI; + float sr = sin(r); + float cr = cos(r); + disk_rotation = mat2(vec2(cr, -sr), vec2(sr, cr)); + } + + for (uint i = 0; i < scene_data.directional_penumbra_shadow_samples; i++) { + vec2 suv = pssm_coord.xy + (disk_rotation * scene_data.directional_penumbra_shadow_kernel[i].xy) * tex_scale; + float d = textureLod(sampler2D(shadow, material_samplers[SAMPLER_LINEAR_CLAMP]), suv, 0.0).r; + if (d < pssm_coord.z) { + blocker_average += d; + blocker_count += 1.0; + } + } + + if (blocker_count > 0.0) { + //blockers found, do soft shadow + blocker_average /= blocker_count; + float penumbra = (pssm_coord.z - blocker_average) / blocker_average; + tex_scale *= penumbra; + + float s = 0.0; + for (uint i = 0; i < scene_data.directional_penumbra_shadow_samples; i++) { + vec2 suv = pssm_coord.xy + (disk_rotation * scene_data.directional_penumbra_shadow_kernel[i].xy) * tex_scale; + s += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(suv, pssm_coord.z, 1.0)); + } + + return s / float(scene_data.directional_penumbra_shadow_samples); + + } else { + //no blockers found, so no shadow + return 1.0; + } +} + +#endif //USE_NO_SHADOWS + +float get_omni_attenuation(float distance, float inv_range, float decay) { + float nd = distance * inv_range; + nd *= nd; + nd *= nd; // nd^4 + nd = max(1.0 - nd, 0.0); + nd *= nd; // nd^2 + return nd * pow(max(distance, 0.0001), -decay); +} + +float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { +#ifndef USE_NO_SHADOWS + if (omni_lights.data[idx].shadow_enabled) { + // there is a shadowmap + + vec3 light_rel_vec = omni_lights.data[idx].position - vertex; + float light_length = length(light_rel_vec); + + vec4 v = vec4(vertex, 1.0); + + vec4 splane = (omni_lights.data[idx].shadow_matrix * v); + float shadow_len = length(splane.xyz); //need to remember shadow len from here + + { + vec3 nofs = normal_interp * omni_lights.data[idx].shadow_normal_bias / omni_lights.data[idx].inv_radius; + nofs *= (1.0 - max(0.0, dot(normalize(light_rel_vec), normalize(normal_interp)))); + v.xyz += nofs; + splane = (omni_lights.data[idx].shadow_matrix * v); + } + + float shadow; + +#ifdef USE_SOFT_SHADOWS + if (omni_lights.data[idx].soft_shadow_size > 0.0) { + //soft shadow + + //find blocker + + float blocker_count = 0.0; + float blocker_average = 0.0; + + mat2 disk_rotation; + { + float r = quick_hash(gl_FragCoord.xy) * 2.0 * M_PI; + float sr = sin(r); + float cr = cos(r); + disk_rotation = mat2(vec2(cr, -sr), vec2(sr, cr)); + } + + vec3 normal = normalize(splane.xyz); + vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); + vec3 tangent = normalize(cross(v0, normal)); + vec3 bitangent = normalize(cross(tangent, normal)); + float z_norm = shadow_len * omni_lights.data[idx].inv_radius; + + tangent *= omni_lights.data[idx].soft_shadow_size * omni_lights.data[idx].soft_shadow_scale; + bitangent *= omni_lights.data[idx].soft_shadow_size * omni_lights.data[idx].soft_shadow_scale; + + for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) { + vec2 disk = disk_rotation * scene_data.penumbra_shadow_kernel[i].xy; + + vec3 pos = splane.xyz + tangent * disk.x + bitangent * disk.y; + + pos = normalize(pos); + vec4 uv_rect = omni_lights.data[idx].atlas_rect; + + if (pos.z >= 0.0) { + pos.z += 1.0; + uv_rect.y += uv_rect.w; + } else { + pos.z = 1.0 - pos.z; + } + + pos.xy /= pos.z; + + pos.xy = pos.xy * 0.5 + 0.5; + pos.xy = uv_rect.xy + pos.xy * uv_rect.zw; + + float d = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), pos.xy, 0.0).r; + if (d < z_norm) { + blocker_average += d; + blocker_count += 1.0; + } + } + + if (blocker_count > 0.0) { + //blockers found, do soft shadow + blocker_average /= blocker_count; + float penumbra = (z_norm - blocker_average) / blocker_average; + tangent *= penumbra; + bitangent *= penumbra; + + z_norm -= omni_lights.data[idx].inv_radius * omni_lights.data[idx].shadow_bias; + + shadow = 0.0; + for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) { + vec2 disk = disk_rotation * scene_data.penumbra_shadow_kernel[i].xy; + vec3 pos = splane.xyz + tangent * disk.x + bitangent * disk.y; + + pos = normalize(pos); + vec4 uv_rect = omni_lights.data[idx].atlas_rect; + + if (pos.z >= 0.0) { + pos.z += 1.0; + uv_rect.y += uv_rect.w; + } else { + pos.z = 1.0 - pos.z; + } + + pos.xy /= pos.z; + + pos.xy = pos.xy * 0.5 + 0.5; + pos.xy = uv_rect.xy + pos.xy * uv_rect.zw; + shadow += textureProj(sampler2DShadow(shadow_atlas, shadow_sampler), vec4(pos.xy, z_norm, 1.0)); + } + + shadow /= float(scene_data.penumbra_shadow_samples); + + } else { + //no blockers found, so no shadow + shadow = 1.0; + } + } else { +#endif + splane.xyz = normalize(splane.xyz); + vec4 clamp_rect = omni_lights.data[idx].atlas_rect; + + if (splane.z >= 0.0) { + splane.z += 1.0; + + clamp_rect.y += clamp_rect.w; + + } else { + splane.z = 1.0 - splane.z; + } + + splane.xy /= splane.z; + + splane.xy = splane.xy * 0.5 + 0.5; + splane.z = (shadow_len - omni_lights.data[idx].shadow_bias) * omni_lights.data[idx].inv_radius; + splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; + splane.w = 1.0; //needed? i think it should be 1 already + shadow = sample_pcf_shadow(shadow_atlas, omni_lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, splane); +#ifdef USE_SOFT_SHADOWS + } +#endif + + return shadow; + } +#endif + + return 1.0; +} + +void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 f0, uint orms, float shadow, +#ifdef LIGHT_BACKLIGHT_USED + vec3 backlight, +#endif +#ifdef LIGHT_TRANSMITTANCE_USED + vec4 transmittance_color, + float transmittance_depth, + float transmittance_curve, + float transmittance_boost, +#endif +#ifdef LIGHT_RIM_USED + float rim, float rim_tint, vec3 rim_color, +#endif +#ifdef LIGHT_CLEARCOAT_USED + float clearcoat, float clearcoat_gloss, +#endif +#ifdef LIGHT_ANISOTROPY_USED + vec3 binormal, vec3 tangent, float anisotropy, +#endif +#ifdef USE_SHADOW_TO_OPACITY + inout float alpha, +#endif + inout vec3 diffuse_light, inout vec3 specular_light) { + vec3 light_rel_vec = omni_lights.data[idx].position - vertex; + float light_length = length(light_rel_vec); + float omni_attenuation = get_omni_attenuation(light_length, omni_lights.data[idx].inv_radius, omni_lights.data[idx].attenuation); + float light_attenuation = omni_attenuation; + vec3 color = omni_lights.data[idx].color; + +#ifdef USE_SOFT_SHADOWS + float size_A = 0.0; + + if (omni_lights.data[idx].size > 0.0) { + float t = omni_lights.data[idx].size / max(0.001, light_length); + size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t)); + } +#endif + +#ifdef LIGHT_TRANSMITTANCE_USED + float transmittance_z = transmittance_depth; //no transmittance by default + transmittance_color.a *= light_attenuation; + { + vec4 clamp_rect = omni_lights.data[idx].atlas_rect; + + //redo shadowmapping, but shrink the model a bit to avoid arctifacts + vec4 splane = (omni_lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * omni_lights.data[idx].transmittance_bias, 1.0)); + + shadow_len = length(splane.xyz); + splane = normalize(splane.xyz); + + if (splane.z >= 0.0) { + splane.z += 1.0; + + } else { + splane.z = 1.0 - splane.z; + } + + splane.xy /= splane.z; + splane.xy = splane.xy * 0.5 + 0.5; + splane.z = shadow_len * omni_lights.data[idx].inv_radius; + splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; + splane.w = 1.0; //needed? i think it should be 1 already + + float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r; + transmittance_z = (splane.z - shadow_z) / omni_lights.data[idx].inv_radius; + } +#endif + +#if 0 + + if (omni_lights.data[idx].projector_rect != vec4(0.0)) { + vec3 local_v = (omni_lights.data[idx].shadow_matrix * vec4(vertex, 1.0)).xyz; + local_v = normalize(local_v); + + vec4 atlas_rect = omni_lights.data[idx].projector_rect; + + if (local_v.z >= 0.0) { + local_v.z += 1.0; + atlas_rect.y += atlas_rect.w; + + } else { + local_v.z = 1.0 - local_v.z; + } + + local_v.xy /= local_v.z; + local_v.xy = local_v.xy * 0.5 + 0.5; + vec2 proj_uv = local_v.xy * atlas_rect.zw; + + vec2 proj_uv_ddx; + vec2 proj_uv_ddy; + { + vec3 local_v_ddx = (omni_lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddx, 1.0)).xyz; + local_v_ddx = normalize(local_v_ddx); + + if (local_v_ddx.z >= 0.0) { + local_v_ddx.z += 1.0; + } else { + local_v_ddx.z = 1.0 - local_v_ddx.z; + } + + local_v_ddx.xy /= local_v_ddx.z; + local_v_ddx.xy = local_v_ddx.xy * 0.5 + 0.5; + + proj_uv_ddx = local_v_ddx.xy * atlas_rect.zw - proj_uv; + + vec3 local_v_ddy = (omni_lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddy, 1.0)).xyz; + local_v_ddy = normalize(local_v_ddy); + + if (local_v_ddy.z >= 0.0) { + local_v_ddy.z += 1.0; + } else { + local_v_ddy.z = 1.0 - local_v_ddy.z; + } + + local_v_ddy.xy /= local_v_ddy.z; + local_v_ddy.xy = local_v_ddy.xy * 0.5 + 0.5; + + proj_uv_ddy = local_v_ddy.xy * atlas_rect.zw - proj_uv; + } + + vec4 proj = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), proj_uv + atlas_rect.xy, proj_uv_ddx, proj_uv_ddy); + no_shadow = mix(no_shadow, proj.rgb, proj.a); + } +#endif + + light_attenuation *= shadow; + + light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms, omni_lights.data[idx].specular_amount, +#ifdef LIGHT_BACKLIGHT_USED + backlight, +#endif +#ifdef LIGHT_TRANSMITTANCE_USED + transmittance_color, + transmittance_depth, + transmittance_curve, + transmittance_boost, + transmittance_z, +#endif +#ifdef LIGHT_RIM_USED + rim * omni_attenuation, rim_tint, rim_color, +#endif +#ifdef LIGHT_CLEARCOAT_USED + clearcoat, clearcoat_gloss, +#endif +#ifdef LIGHT_ANISOTROPY_USED + binormal, tangent, anisotropy, +#endif +#ifdef USE_SOFT_SHADOWS + size_A, +#endif +#ifdef USE_SHADOW_TO_OPACITY + alpha, +#endif + diffuse_light, + specular_light); +} + +float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) { +#ifndef USE_NO_SHADOWS + if (spot_lights.data[idx].shadow_enabled) { + vec3 light_rel_vec = spot_lights.data[idx].position - vertex; + float light_length = length(light_rel_vec); + vec3 spot_dir = spot_lights.data[idx].direction; + //there is a shadowmap + vec4 v = vec4(vertex, 1.0); + + v.xyz -= spot_dir * spot_lights.data[idx].shadow_bias; + + float z_norm = dot(spot_dir, -light_rel_vec) * spot_lights.data[idx].inv_radius; + + float depth_bias_scale = 1.0 / (max(0.0001, z_norm)); //the closer to the light origin, the more you have to offset to reach 1px in the map + vec3 normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(spot_dir, -normalize(normal_interp)))) * spot_lights.data[idx].shadow_normal_bias * depth_bias_scale; + normal_bias -= spot_dir * dot(spot_dir, normal_bias); //only XY, no Z + v.xyz += normal_bias; + + //adjust with bias + z_norm = dot(spot_dir, v.xyz - spot_lights.data[idx].position) * spot_lights.data[idx].inv_radius; + + float shadow; + + vec4 splane = (spot_lights.data[idx].shadow_matrix * v); + splane /= splane.w; + +#ifdef USE_SOFT_SHADOWS + if (spot_lights.data[idx].soft_shadow_size > 0.0) { + //soft shadow + + //find blocker + + vec2 shadow_uv = splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy; + + float blocker_count = 0.0; + float blocker_average = 0.0; + + mat2 disk_rotation; + { + float r = quick_hash(gl_FragCoord.xy) * 2.0 * M_PI; + float sr = sin(r); + float cr = cos(r); + disk_rotation = mat2(vec2(cr, -sr), vec2(sr, cr)); + } + + float uv_size = spot_lights.data[idx].soft_shadow_size * z_norm * spot_lights.data[idx].soft_shadow_scale; + vec2 clamp_max = spot_lights.data[idx].atlas_rect.xy + spot_lights.data[idx].atlas_rect.zw; + for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) { + vec2 suv = shadow_uv + (disk_rotation * scene_data.penumbra_shadow_kernel[i].xy) * uv_size; + suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max); + float d = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), suv, 0.0).r; + if (d < z_norm) { + blocker_average += d; + blocker_count += 1.0; + } + } + + if (blocker_count > 0.0) { + //blockers found, do soft shadow + blocker_average /= blocker_count; + float penumbra = (z_norm - blocker_average) / blocker_average; + uv_size *= penumbra; + + shadow = 0.0; + for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) { + vec2 suv = shadow_uv + (disk_rotation * scene_data.penumbra_shadow_kernel[i].xy) * uv_size; + suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max); + shadow += textureProj(sampler2DShadow(shadow_atlas, shadow_sampler), vec4(suv, z_norm, 1.0)); + } + + shadow /= float(scene_data.penumbra_shadow_samples); + + } else { + //no blockers found, so no shadow + shadow = 1.0; + } + + } else { +#endif + //hard shadow + vec4 shadow_uv = vec4(splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy, splane.z, 1.0); + + shadow = sample_pcf_shadow(shadow_atlas, spot_lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, shadow_uv); +#ifdef USE_SOFT_SHADOWS + } +#endif + + return shadow; + } + +#endif //USE_NO_SHADOWS + + return 1.0; +} + +void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 f0, uint orms, float shadow, +#ifdef LIGHT_BACKLIGHT_USED + vec3 backlight, +#endif +#ifdef LIGHT_TRANSMITTANCE_USED + vec4 transmittance_color, + float transmittance_depth, + float transmittance_curve, + float transmittance_boost, +#endif +#ifdef LIGHT_RIM_USED + float rim, float rim_tint, vec3 rim_color, +#endif +#ifdef LIGHT_CLEARCOAT_USED + float clearcoat, float clearcoat_gloss, +#endif +#ifdef LIGHT_ANISOTROPY_USED + vec3 binormal, vec3 tangent, float anisotropy, +#endif +#ifdef USE_SHADOW_TO_OPACITY + inout float alpha, +#endif + inout vec3 diffuse_light, + inout vec3 specular_light) { + vec3 light_rel_vec = spot_lights.data[idx].position - vertex; + float light_length = length(light_rel_vec); + float spot_attenuation = get_omni_attenuation(light_length, spot_lights.data[idx].inv_radius, spot_lights.data[idx].attenuation); + vec3 spot_dir = spot_lights.data[idx].direction; + float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_lights.data[idx].cone_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_lights.data[idx].cone_angle)); + spot_attenuation *= 1.0 - pow(spot_rim, spot_lights.data[idx].cone_attenuation); + float light_attenuation = spot_attenuation; + vec3 color = spot_lights.data[idx].color; + float specular_amount = spot_lights.data[idx].specular_amount; + +#ifdef USE_SOFT_SHADOWS + float size_A = 0.0; + + if (spot_lights.data[idx].size > 0.0) { + float t = spot_lights.data[idx].size / max(0.001, light_length); + size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t)); + } +#endif + + /* + if (spot_lights.data[idx].atlas_rect!=vec4(0.0)) { + //use projector texture + } + */ + +#ifdef LIGHT_TRANSMITTANCE_USED + float transmittance_z = transmittance_depth; + transmittance_color.a *= light_attenuation; + { + splane = (spot_lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * spot_lights.data[idx].transmittance_bias, 1.0)); + splane /= splane.w; + splane.xy = splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy; + + float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r; + //reconstruct depth + shadow_z /= spot_lights.data[idx].inv_radius; + //distance to light plane + float z = dot(spot_dir, -light_rel_vec); + transmittance_z = z - shadow_z; + } +#endif //LIGHT_TRANSMITTANCE_USED + + light_attenuation *= shadow; + + light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms, spot_lights.data[idx].specular_amount, +#ifdef LIGHT_BACKLIGHT_USED + backlight, +#endif +#ifdef LIGHT_TRANSMITTANCE_USED + transmittance_color, + transmittance_depth, + transmittance_curve, + transmittance_boost, + transmittance_z, +#endif +#ifdef LIGHT_RIM_USED + rim * spot_attenuation, rim_tint, rim_color, +#endif +#ifdef LIGHT_CLEARCOAT_USED + clearcoat, clearcoat_gloss, +#endif +#ifdef LIGHT_ANISOTROPY_USED + binormal, tangent, anisotropy, +#endif +#ifdef USE_SOFT_SHADOW + size_A, +#endif +#ifdef USE_SHADOW_TO_OPACITY + alpha, +#endif + diffuse_light, specular_light); +} + +void reflection_process(uint ref_index, vec3 vertex, vec3 normal, float roughness, vec3 ambient_light, vec3 specular_light, inout vec4 ambient_accum, inout vec4 reflection_accum) { + vec3 box_extents = reflections.data[ref_index].box_extents; + vec3 local_pos = (reflections.data[ref_index].local_matrix * vec4(vertex, 1.0)).xyz; + + if (any(greaterThan(abs(local_pos), box_extents))) { //out of the reflection box + return; + } + + vec3 ref_vec = normalize(reflect(vertex, normal)); + + vec3 inner_pos = abs(local_pos / box_extents); + float blend = max(inner_pos.x, max(inner_pos.y, inner_pos.z)); + //make blend more rounded + blend = mix(length(inner_pos), blend, blend); + blend *= blend; + blend = max(0.0, 1.0 - blend); + + if (reflections.data[ref_index].intensity > 0.0) { // compute reflection + + vec3 local_ref_vec = (reflections.data[ref_index].local_matrix * vec4(ref_vec, 0.0)).xyz; + + if (reflections.data[ref_index].box_project) { //box project + + vec3 nrdir = normalize(local_ref_vec); + vec3 rbmax = (box_extents - local_pos) / nrdir; + vec3 rbmin = (-box_extents - local_pos) / nrdir; + + vec3 rbminmax = mix(rbmin, rbmax, greaterThan(nrdir, vec3(0.0, 0.0, 0.0))); + + float fa = min(min(rbminmax.x, rbminmax.y), rbminmax.z); + vec3 posonbox = local_pos + nrdir * fa; + local_ref_vec = posonbox - reflections.data[ref_index].box_offset; + } + + vec4 reflection; + + reflection.rgb = textureLod(samplerCubeArray(reflection_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(local_ref_vec, reflections.data[ref_index].index), roughness * MAX_ROUGHNESS_LOD).rgb; + + if (reflections.data[ref_index].exterior) { + reflection.rgb = mix(specular_light, reflection.rgb, blend); + } + + reflection.rgb *= reflections.data[ref_index].intensity; //intensity + reflection.a = blend; + reflection.rgb *= reflection.a; + + reflection_accum += reflection; + } + + switch (reflections.data[ref_index].ambient_mode) { + case REFLECTION_AMBIENT_DISABLED: { + //do nothing + } break; + case REFLECTION_AMBIENT_ENVIRONMENT: { + //do nothing + vec3 local_amb_vec = (reflections.data[ref_index].local_matrix * vec4(normal, 0.0)).xyz; + + vec4 ambient_out; + + ambient_out.rgb = textureLod(samplerCubeArray(reflection_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(local_amb_vec, reflections.data[ref_index].index), MAX_ROUGHNESS_LOD).rgb; + ambient_out.a = blend; + if (reflections.data[ref_index].exterior) { + ambient_out.rgb = mix(ambient_light, ambient_out.rgb, blend); + } + + ambient_out.rgb *= ambient_out.a; + ambient_accum += ambient_out; + } break; + case REFLECTION_AMBIENT_COLOR: { + vec4 ambient_out; + ambient_out.a = blend; + ambient_out.rgb = reflections.data[ref_index].ambient; + if (reflections.data[ref_index].exterior) { + ambient_out.rgb = mix(ambient_light, ambient_out.rgb, blend); + } + ambient_out.rgb *= ambient_out.a; + ambient_accum += ambient_out; + } break; + } +} + +#ifdef USE_FORWARD_GI + +//standard voxel cone trace +vec4 voxel_cone_trace(texture3D probe, vec3 cell_size, vec3 pos, vec3 direction, float tan_half_angle, float max_distance, float p_bias) { + float dist = p_bias; + vec4 color = vec4(0.0); + + while (dist < max_distance && color.a < 0.95) { + float diameter = max(1.0, 2.0 * tan_half_angle * dist); + vec3 uvw_pos = (pos + dist * direction) * cell_size; + float half_diameter = diameter * 0.5; + //check if outside, then break + if (any(greaterThan(abs(uvw_pos - 0.5), vec3(0.5f + half_diameter * cell_size)))) { + break; + } + vec4 scolor = textureLod(sampler3D(probe, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uvw_pos, log2(diameter)); + float a = (1.0 - color.a); + color += a * scolor; + dist += half_diameter; + } + + return color; +} + +vec4 voxel_cone_trace_45_degrees(texture3D probe, vec3 cell_size, vec3 pos, vec3 direction, float tan_half_angle, float max_distance, float p_bias) { + float dist = p_bias; + vec4 color = vec4(0.0); + float radius = max(0.5, tan_half_angle * dist); + float lod_level = log2(radius * 2.0); + + while (dist < max_distance && color.a < 0.95) { + vec3 uvw_pos = (pos + dist * direction) * cell_size; + + //check if outside, then break + if (any(greaterThan(abs(uvw_pos - 0.5), vec3(0.5f + radius * cell_size)))) { + break; + } + vec4 scolor = textureLod(sampler3D(probe, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uvw_pos, lod_level); + lod_level += 1.0; + + float a = (1.0 - color.a); + scolor *= a; + color += scolor; + dist += radius; + radius = max(0.5, tan_half_angle * dist); + } + + return color; +} + +void gi_probe_compute(uint index, vec3 position, vec3 normal, vec3 ref_vec, mat3 normal_xform, float roughness, vec3 ambient, vec3 environment, inout vec4 out_spec, inout vec4 out_diff) { + position = (gi_probes.data[index].xform * vec4(position, 1.0)).xyz; + ref_vec = normalize((gi_probes.data[index].xform * vec4(ref_vec, 0.0)).xyz); + normal = normalize((gi_probes.data[index].xform * vec4(normal, 0.0)).xyz); + + position += normal * gi_probes.data[index].normal_bias; + + //this causes corrupted pixels, i have no idea why.. + if (any(bvec2(any(lessThan(position, vec3(0.0))), any(greaterThan(position, gi_probes.data[index].bounds))))) { + return; + } + + vec3 blendv = abs(position / gi_probes.data[index].bounds * 2.0 - 1.0); + float blend = clamp(1.0 - max(blendv.x, max(blendv.y, blendv.z)), 0.0, 1.0); + //float blend=1.0; + + float max_distance = length(gi_probes.data[index].bounds); + vec3 cell_size = 1.0 / gi_probes.data[index].bounds; + + //radiance + +#define MAX_CONE_DIRS 4 + + vec3 cone_dirs[MAX_CONE_DIRS] = vec3[]( + vec3(0.707107, 0.0, 0.707107), + vec3(0.0, 0.707107, 0.707107), + vec3(-0.707107, 0.0, 0.707107), + vec3(0.0, -0.707107, 0.707107)); + + float cone_weights[MAX_CONE_DIRS] = float[](0.25, 0.25, 0.25, 0.25); + float cone_angle_tan = 0.98269; + + vec3 light = vec3(0.0); + + for (int i = 0; i < MAX_CONE_DIRS; i++) { + vec3 dir = normalize((gi_probes.data[index].xform * vec4(normal_xform * cone_dirs[i], 0.0)).xyz); + + vec4 cone_light = voxel_cone_trace_45_degrees(gi_probe_textures[index], cell_size, position, dir, cone_angle_tan, max_distance, gi_probes.data[index].bias); + + if (gi_probes.data[index].blend_ambient) { + cone_light.rgb = mix(ambient, cone_light.rgb, min(1.0, cone_light.a / 0.95)); + } + + light += cone_weights[i] * cone_light.rgb; + } + + light *= gi_probes.data[index].dynamic_range; + out_diff += vec4(light * blend, blend); + + //irradiance + vec4 irr_light = voxel_cone_trace(gi_probe_textures[index], cell_size, position, ref_vec, tan(roughness * 0.5 * M_PI * 0.99), max_distance, gi_probes.data[index].bias); + if (gi_probes.data[index].blend_ambient) { + irr_light.rgb = mix(environment, irr_light.rgb, min(1.0, irr_light.a / 0.95)); + } + irr_light.rgb *= gi_probes.data[index].dynamic_range; + //irr_light=vec3(0.0); + + out_spec += vec4(irr_light.rgb * blend, blend); +} + +vec2 octahedron_wrap(vec2 v) { + vec2 signVal; + signVal.x = v.x >= 0.0 ? 1.0 : -1.0; + signVal.y = v.y >= 0.0 ? 1.0 : -1.0; + return (1.0 - abs(v.yx)) * signVal; +} + +vec2 octahedron_encode(vec3 n) { + // https://twitter.com/Stubbesaurus/status/937994790553227264 + n /= (abs(n.x) + abs(n.y) + abs(n.z)); + n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy); + n.xy = n.xy * 0.5 + 0.5; + return n.xy; +} + +void sdfgi_process(uint cascade, vec3 cascade_pos, vec3 cam_pos, vec3 cam_normal, vec3 cam_specular_normal, bool use_specular, float roughness, out vec3 diffuse_light, out vec3 specular_light, out float blend) { + cascade_pos += cam_normal * sdfgi.normal_bias; + + vec3 base_pos = floor(cascade_pos); + //cascade_pos += mix(vec3(0.0),vec3(0.01),lessThan(abs(cascade_pos-base_pos),vec3(0.01))) * cam_normal; + ivec3 probe_base_pos = ivec3(base_pos); + + vec4 diffuse_accum = vec4(0.0); + vec3 specular_accum; + + ivec3 tex_pos = ivec3(probe_base_pos.xy, int(cascade)); + tex_pos.x += probe_base_pos.z * sdfgi.probe_axis_size; + tex_pos.xy = tex_pos.xy * (SDFGI_OCT_SIZE + 2) + ivec2(1); + + vec3 diffuse_posf = (vec3(tex_pos) + vec3(octahedron_encode(cam_normal) * float(SDFGI_OCT_SIZE), 0.0)) * sdfgi.lightprobe_tex_pixel_size; + + vec3 specular_posf; + + if (use_specular) { + specular_accum = vec3(0.0); + specular_posf = (vec3(tex_pos) + vec3(octahedron_encode(cam_specular_normal) * float(SDFGI_OCT_SIZE), 0.0)) * sdfgi.lightprobe_tex_pixel_size; + } + + vec4 light_accum = vec4(0.0); + float weight_accum = 0.0; + + for (uint j = 0; j < 8; j++) { + ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); + ivec3 probe_posi = probe_base_pos; + probe_posi += offset; + + // Compute weight + + vec3 probe_pos = vec3(probe_posi); + vec3 probe_to_pos = cascade_pos - probe_pos; + vec3 probe_dir = normalize(-probe_to_pos); + + vec3 trilinear = vec3(1.0) - abs(probe_to_pos); + float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(cam_normal, probe_dir)); + + // Compute lightprobe occlusion + + if (sdfgi.use_occlusion) { + ivec3 occ_indexv = abs((sdfgi.cascades[cascade].probe_world_offset + probe_posi) & ivec3(1, 1, 1)) * ivec3(1, 2, 4); + vec4 occ_mask = mix(vec4(0.0), vec4(1.0), equal(ivec4(occ_indexv.x | occ_indexv.y), ivec4(0, 1, 2, 3))); + + vec3 occ_pos = clamp(cascade_pos, probe_pos - sdfgi.occlusion_clamp, probe_pos + sdfgi.occlusion_clamp) * sdfgi.probe_to_uvw; + occ_pos.z += float(cascade); + if (occ_indexv.z != 0) { //z bit is on, means index is >=4, so make it switch to the other half of textures + occ_pos.x += 1.0; + } + + occ_pos *= sdfgi.occlusion_renormalize; + float occlusion = dot(textureLod(sampler3D(sdfgi_occlusion_cascades, material_samplers[SAMPLER_LINEAR_CLAMP]), occ_pos, 0.0), occ_mask); + + weight *= max(occlusion, 0.01); + } + + // Compute lightprobe texture position + + vec3 diffuse; + vec3 pos_uvw = diffuse_posf; + pos_uvw.xy += vec2(offset.xy) * sdfgi.lightprobe_uv_offset.xy; + pos_uvw.x += float(offset.z) * sdfgi.lightprobe_uv_offset.z; + diffuse = textureLod(sampler2DArray(sdfgi_lightprobe_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), pos_uvw, 0.0).rgb; + + diffuse_accum += vec4(diffuse * weight, weight); + + if (use_specular) { + vec3 specular = vec3(0.0); + vec3 pos_uvw = specular_posf; + pos_uvw.xy += vec2(offset.xy) * sdfgi.lightprobe_uv_offset.xy; + pos_uvw.x += float(offset.z) * sdfgi.lightprobe_uv_offset.z; + if (roughness < 0.99) { + specular = textureLod(sampler2DArray(sdfgi_lightprobe_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), pos_uvw + vec3(0, 0, float(sdfgi.max_cascades)), 0.0).rgb; + } + if (roughness > 0.5) { + specular = mix(specular, textureLod(sampler2DArray(sdfgi_lightprobe_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), pos_uvw, 0.0).rgb, (roughness - 0.5) * 2.0); + } + + specular_accum += specular * weight; + } + } + + if (diffuse_accum.a > 0.0) { + diffuse_accum.rgb /= diffuse_accum.a; + } + + diffuse_light = diffuse_accum.rgb; + + if (use_specular) { + if (diffuse_accum.a > 0.0) { + specular_accum /= diffuse_accum.a; + } + + specular_light = specular_accum; + } + + { + //process blend + float blend_from = (float(sdfgi.probe_axis_size - 1) / 2.0) - 2.5; + float blend_to = blend_from + 2.0; + + vec3 inner_pos = cam_pos * sdfgi.cascades[cascade].to_probe; + + float len = length(inner_pos); + + inner_pos = abs(normalize(inner_pos)); + len *= max(inner_pos.x, max(inner_pos.y, inner_pos.z)); + + if (len >= blend_from) { + blend = smoothstep(blend_from, blend_to, len); + } else { + blend = 0.0; + } + } +} + +#endif //USE_FORWARD_GI + +#endif //!defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) + +#ifndef MODE_RENDER_DEPTH + +#ifndef LOW_END_MODE + +vec4 volumetric_fog_process(vec2 screen_uv, float z) { + vec3 fog_pos = vec3(screen_uv, z * scene_data.volumetric_fog_inv_length); + if (fog_pos.z < 0.0) { + return vec4(0.0); + } else if (fog_pos.z < 1.0) { + fog_pos.z = pow(fog_pos.z, scene_data.volumetric_fog_detail_spread); + } + + return texture(sampler3D(volumetric_fog_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), fog_pos); +} +#endif + +vec4 fog_process(vec3 vertex) { + vec3 fog_color = scene_data.fog_light_color; + + if (scene_data.fog_aerial_perspective > 0.0) { + vec3 sky_fog_color = vec3(0.0); + vec3 cube_view = scene_data.radiance_inverse_xform * vertex; + // mip_level always reads from the second mipmap and higher so the fog is always slightly blurred + float mip_level = mix(1.0 / MAX_ROUGHNESS_LOD, 1.0, 1.0 - (abs(vertex.z) - scene_data.z_near) / (scene_data.z_far - scene_data.z_near)); +#ifdef USE_RADIANCE_CUBEMAP_ARRAY + float lod, blend; + blend = modf(mip_level * MAX_ROUGHNESS_LOD, lod); + sky_fog_color = texture(samplerCubeArray(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(cube_view, lod)).rgb; + sky_fog_color = mix(sky_fog_color, texture(samplerCubeArray(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(cube_view, lod + 1)).rgb, blend); +#else + sky_fog_color = textureLod(samplerCube(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), cube_view, mip_level * MAX_ROUGHNESS_LOD).rgb; +#endif //USE_RADIANCE_CUBEMAP_ARRAY + fog_color = mix(fog_color, sky_fog_color, scene_data.fog_aerial_perspective); + } + + if (scene_data.fog_sun_scatter > 0.001) { + vec4 sun_scatter = vec4(0.0); + float sun_total = 0.0; + vec3 view = normalize(vertex); + + for (uint i = 0; i < scene_data.directional_light_count; i++) { + vec3 light_color = directional_lights.data[i].color * directional_lights.data[i].energy; + float light_amount = pow(max(dot(view, directional_lights.data[i].direction), 0.0), 8.0); + fog_color += light_color * light_amount * scene_data.fog_sun_scatter; + } + } + + float fog_amount = 1.0 - exp(min(0.0, vertex.z * scene_data.fog_density)); + + if (abs(scene_data.fog_height_density) > 0.001) { + float y = (scene_data.camera_matrix * vec4(vertex, 1.0)).y; + + float y_dist = scene_data.fog_height - y; + + float vfog_amount = clamp(exp(y_dist * scene_data.fog_height_density), 0.0, 1.0); + + fog_amount = max(vfog_amount, fog_amount); + } + + return vec4(fog_color, fog_amount); +} + +void cluster_get_item_range(uint p_offset, out uint item_min, out uint item_max, out uint item_from, out uint item_to) { + uint item_min_max = cluster_buffer.data[p_offset]; + item_min = item_min_max & 0xFFFF; + item_max = item_min_max >> 16; + ; + + item_from = item_min >> 5; + item_to = (item_max == 0) ? 0 : ((item_max - 1) >> 5) + 1; //side effect of how it is stored, as item_max 0 means no elements +} + +uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) { + int local_min = clamp(int(z_min) - int(i) * 32, 0, 31); + int mask_width = min(int(z_max) - int(z_min), 32 - local_min); + return bitfieldInsert(uint(0), uint(0xFFFFFFFF), local_min, mask_width); +} + +float blur_shadow(float shadow) { + return shadow; +#if 0 + //disabling for now, will investigate later + float interp_shadow = shadow; + if (gl_HelperInvocation) { + interp_shadow = -4.0; // technically anything below -4 will do but just to make sure + } + + uvec2 fc2 = uvec2(gl_FragCoord.xy); + interp_shadow -= dFdx(interp_shadow) * (float(fc2.x & 1) - 0.5); + interp_shadow -= dFdy(interp_shadow) * (float(fc2.y & 1) - 0.5); + + if (interp_shadow >= 0.0) { + shadow = interp_shadow; + } + return shadow; +#endif +} + +#endif //!MODE_RENDER DEPTH + +void main() { +#ifdef MODE_DUAL_PARABOLOID + + if (dp_clip > 0.0) + discard; +#endif + + //lay out everything, whathever is unused is optimized away anyway + vec3 vertex = vertex_interp; + vec3 view = -normalize(vertex_interp); + vec3 albedo = vec3(1.0); + vec3 backlight = vec3(0.0); + vec4 transmittance_color = vec4(0.0); + float transmittance_depth = 0.0; + float transmittance_curve = 1.0; + float transmittance_boost = 0.0; + float metallic = 0.0; + float specular = 0.5; + vec3 emission = vec3(0.0); + float roughness = 1.0; + float rim = 0.0; + float rim_tint = 0.0; + float clearcoat = 0.0; + float clearcoat_gloss = 0.0; + float anisotropy = 0.0; + vec2 anisotropy_flow = vec2(1.0, 0.0); + vec4 fog = vec4(0.0); +#if defined(CUSTOM_RADIANCE_USED) + vec4 custom_radiance = vec4(0.0); +#endif +#if defined(CUSTOM_IRRADIANCE_USED) + vec4 custom_irradiance = vec4(0.0); +#endif + + float ao = 1.0; + float ao_light_affect = 0.0; + + float alpha = 1.0; + +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) + vec3 binormal = normalize(binormal_interp); + vec3 tangent = normalize(tangent_interp); +#else + vec3 binormal = vec3(0.0); + vec3 tangent = vec3(0.0); +#endif + +#ifdef NORMAL_USED + vec3 normal = normalize(normal_interp); + +#if defined(DO_SIDE_CHECK) + if (!gl_FrontFacing) { + normal = -normal; + } +#endif + +#endif //NORMAL_USED + +#ifdef UV_USED + vec2 uv = uv_interp; +#endif + +#if defined(UV2_USED) || defined(USE_LIGHTMAP) + vec2 uv2 = uv2_interp; +#endif + +#if defined(COLOR_USED) + vec4 color = color_interp; +#endif + +#if defined(NORMAL_MAP_USED) + + vec3 normal_map = vec3(0.5); +#endif + + float normal_map_depth = 1.0; + + vec2 screen_uv = gl_FragCoord.xy * scene_data.screen_pixel_size + scene_data.screen_pixel_size * 0.5; //account for center + + float sss_strength = 0.0; + +#ifdef ALPHA_SCISSOR_USED + float alpha_scissor_threshold = 1.0; +#endif // ALPHA_SCISSOR_USED + +#ifdef ALPHA_HASH_USED + float alpha_hash_scale = 1.0; +#endif // ALPHA_HASH_USED + +#ifdef ALPHA_ANTIALIASING_EDGE_USED + float alpha_antialiasing_edge = 0.0; + vec2 alpha_texture_coordinate = vec2(0.0, 0.0); +#endif // ALPHA_ANTIALIASING_EDGE_USED + + { + /* clang-format off */ + +FRAGMENT_SHADER_CODE + + /* clang-format on */ + } + +#ifdef LIGHT_TRANSMITTANCE_USED +#ifdef SSS_MODE_SKIN + transmittance_color.a = sss_strength; +#else + transmittance_color.a *= sss_strength; +#endif +#endif + +#ifndef USE_SHADOW_TO_OPACITY + +#ifdef ALPHA_SCISSOR_USED + if (alpha < alpha_scissor_threshold) { + discard; + } +#endif // ALPHA_SCISSOR_USED + +// alpha hash can be used in unison with alpha antialiasing +#ifdef ALPHA_HASH_USED + if (alpha < compute_alpha_hash_threshold(vertex, alpha_hash_scale)) { + discard; + } +#endif // ALPHA_HASH_USED + +// If we are not edge antialiasing, we need to remove the output alpha channel from scissor and hash +#if (defined(ALPHA_SCISSOR_USED) || defined(ALPHA_HASH_USED)) && !defined(ALPHA_ANTIALIASING_EDGE_USED) + alpha = 1.0; +#endif + +#ifdef ALPHA_ANTIALIASING_EDGE_USED +// If alpha scissor is used, we must further the edge threshold, otherwise we wont get any edge feather +#ifdef ALPHA_SCISSOR_USED + alpha_antialiasing_edge = clamp(alpha_scissor_threshold + alpha_antialiasing_edge, 0.0, 1.0); +#endif + alpha = compute_alpha_antialiasing_edge(alpha, alpha_texture_coordinate, alpha_antialiasing_edge); +#endif // ALPHA_ANTIALIASING_EDGE_USED + +#ifdef USE_OPAQUE_PREPASS + if (alpha < opaque_prepass_threshold) { + discard; + } +#endif // USE_OPAQUE_PREPASS + +#endif // !USE_SHADOW_TO_OPACITY + +#ifdef NORMAL_MAP_USED + + normal_map.xy = normal_map.xy * 2.0 - 1.0; + normal_map.z = sqrt(max(0.0, 1.0 - dot(normal_map.xy, normal_map.xy))); //always ignore Z, as it can be RG packed, Z may be pos/neg, etc. + + normal = normalize(mix(normal, tangent * normal_map.x + binormal * normal_map.y + normal * normal_map.z, normal_map_depth)); + +#endif + +#ifdef LIGHT_ANISOTROPY_USED + + if (anisotropy > 0.01) { + //rotation matrix + mat3 rot = mat3(tangent, binormal, normal); + //make local to space + tangent = normalize(rot * vec3(anisotropy_flow.x, anisotropy_flow.y, 0.0)); + binormal = normalize(rot * vec3(-anisotropy_flow.y, anisotropy_flow.x, 0.0)); + } + +#endif + +#ifdef ENABLE_CLIP_ALPHA + if (albedo.a < 0.99) { + //used for doublepass and shadowmapping + discard; + } +#endif + + /////////////////////// FOG ////////////////////// +#ifndef MODE_RENDER_DEPTH + +#ifndef CUSTOM_FOG_USED + // fog must be processed as early as possible and then packed. + // to maximize VGPR usage + // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky. + + if (scene_data.fog_enabled) { + fog = fog_process(vertex); + } + +#ifndef LOW_END_MODE + if (scene_data.volumetric_fog_enabled) { + vec4 volumetric_fog = volumetric_fog_process(screen_uv, -vertex.z); + if (scene_data.fog_enabled) { + //must use the full blending equation here to blend fogs + vec4 res; + float sa = 1.0 - volumetric_fog.a; + res.a = fog.a * sa + volumetric_fog.a; + if (res.a == 0.0) { + res.rgb = vec3(0.0); + } else { + res.rgb = (fog.rgb * fog.a * sa + volumetric_fog.rgb * volumetric_fog.a) / res.a; + } + fog = res; + } else { + fog = volumetric_fog; + } + } +#endif //!LOW_END_MODE +#endif //!CUSTOM_FOG_USED + + uint fog_rg = packHalf2x16(fog.rg); + uint fog_ba = packHalf2x16(fog.ba); + +#endif //!MODE_RENDER_DEPTH + + /////////////////////// DECALS //////////////////////////////// + +#ifndef MODE_RENDER_DEPTH + + uvec2 cluster_pos = uvec2(gl_FragCoord.xy) >> scene_data.cluster_shift; + uint cluster_offset = (scene_data.cluster_width * cluster_pos.y + cluster_pos.x) * (scene_data.max_cluster_element_count_div_32 + 32); + + uint cluster_z = uint(clamp((-vertex.z / scene_data.z_far) * 32.0, 0.0, 31.0)); + + //used for interpolating anything cluster related + vec3 vertex_ddx = dFdx(vertex); + vec3 vertex_ddy = dFdy(vertex); + + { // process decals + + uint cluster_decal_offset = cluster_offset + scene_data.cluster_type_size * 2; + + uint item_min; + uint item_max; + uint item_from; + uint item_to; + + cluster_get_item_range(cluster_decal_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); + +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif + + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_decal_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif + + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + uint decal_index = 32 * i + bit; + + if (!bool(decals.data[decal_index].mask & instances.data[instance_index].layer_mask)) { + continue; //not masked + } + + vec3 uv_local = (decals.data[decal_index].xform * vec4(vertex, 1.0)).xyz; + if (any(lessThan(uv_local, vec3(0.0, -1.0, 0.0))) || any(greaterThan(uv_local, vec3(1.0)))) { + continue; //out of decal + } + + //we need ddx/ddy for mipmaps, so simulate them + vec2 ddx = (decals.data[decal_index].xform * vec4(vertex_ddx, 0.0)).xz; + vec2 ddy = (decals.data[decal_index].xform * vec4(vertex_ddy, 0.0)).xz; + + float fade = pow(1.0 - (uv_local.y > 0.0 ? uv_local.y : -uv_local.y), uv_local.y > 0.0 ? decals.data[decal_index].upper_fade : decals.data[decal_index].lower_fade); + + if (decals.data[decal_index].normal_fade > 0.0) { + fade *= smoothstep(decals.data[decal_index].normal_fade, 1.0, dot(normal_interp, decals.data[decal_index].normal) * 0.5 + 0.5); + } + + if (decals.data[decal_index].albedo_rect != vec4(0.0)) { + //has albedo + vec4 decal_albedo = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].albedo_rect.zw + decals.data[decal_index].albedo_rect.xy, ddx * decals.data[decal_index].albedo_rect.zw, ddy * decals.data[decal_index].albedo_rect.zw); + decal_albedo *= decals.data[decal_index].modulate; + decal_albedo.a *= fade; + albedo = mix(albedo, decal_albedo.rgb, decal_albedo.a * decals.data[decal_index].albedo_mix); + + if (decals.data[decal_index].normal_rect != vec4(0.0)) { + vec3 decal_normal = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].normal_rect.zw + decals.data[decal_index].normal_rect.xy, ddx * decals.data[decal_index].normal_rect.zw, ddy * decals.data[decal_index].normal_rect.zw).xyz; + decal_normal.xy = decal_normal.xy * vec2(2.0, -2.0) - vec2(1.0, -1.0); //users prefer flipped y normal maps in most authoring software + decal_normal.z = sqrt(max(0.0, 1.0 - dot(decal_normal.xy, decal_normal.xy))); + //convert to view space, use xzy because y is up + decal_normal = (decals.data[decal_index].normal_xform * decal_normal.xzy).xyz; + + normal = normalize(mix(normal, decal_normal, decal_albedo.a)); + } + + if (decals.data[decal_index].orm_rect != vec4(0.0)) { + vec3 decal_orm = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].orm_rect.zw + decals.data[decal_index].orm_rect.xy, ddx * decals.data[decal_index].orm_rect.zw, ddy * decals.data[decal_index].orm_rect.zw).xyz; + ao = mix(ao, decal_orm.r, decal_albedo.a); + roughness = mix(roughness, decal_orm.g, decal_albedo.a); + metallic = mix(metallic, decal_orm.b, decal_albedo.a); + } + } + + if (decals.data[decal_index].emission_rect != vec4(0.0)) { + //emission is additive, so its independent from albedo + emission += textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].emission_energy * fade; + } + } + } + } + + //pack albedo until needed again, saves 2 VGPRs in the meantime + +#endif //not render depth + /////////////////////// LIGHTING ////////////////////////////// + +#ifdef NORMAL_USED + if (scene_data.roughness_limiter_enabled) { + //http://www.jp.square-enix.com/tech/library/pdf/ImprovedGeometricSpecularAA.pdf + float roughness2 = roughness * roughness; + vec3 dndu = dFdx(normal), dndv = dFdx(normal); + float variance = scene_data.roughness_limiter_amount * (dot(dndu, dndu) + dot(dndv, dndv)); + float kernelRoughness2 = min(2.0 * variance, scene_data.roughness_limiter_limit); //limit effect + float filteredRoughness2 = min(1.0, roughness2 + kernelRoughness2); + roughness = sqrt(filteredRoughness2); + } +#endif + //apply energy conservation + + vec3 specular_light = vec3(0.0, 0.0, 0.0); + vec3 diffuse_light = vec3(0.0, 0.0, 0.0); + vec3 ambient_light = vec3(0.0, 0.0, 0.0); + +#if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) + + if (scene_data.use_reflection_cubemap) { + vec3 ref_vec = reflect(-view, normal); + ref_vec = scene_data.radiance_inverse_xform * ref_vec; +#ifdef USE_RADIANCE_CUBEMAP_ARRAY + + float lod, blend; + blend = modf(roughness * MAX_ROUGHNESS_LOD, lod); + specular_light = texture(samplerCubeArray(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(ref_vec, lod)).rgb; + specular_light = mix(specular_light, texture(samplerCubeArray(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(ref_vec, lod + 1)).rgb, blend); + +#else + specular_light = textureLod(samplerCube(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), ref_vec, roughness * MAX_ROUGHNESS_LOD).rgb; + +#endif //USE_RADIANCE_CUBEMAP_ARRAY + specular_light *= scene_data.ambient_light_color_energy.a; + } + +#if defined(CUSTOM_RADIANCE_USED) + specular_light = mix(specular_light, custom_radiance.rgb, custom_radiance.a); +#endif + +#ifndef USE_LIGHTMAP + //lightmap overrides everything + if (scene_data.use_ambient_light) { + ambient_light = scene_data.ambient_light_color_energy.rgb; + + if (scene_data.use_ambient_cubemap) { + vec3 ambient_dir = scene_data.radiance_inverse_xform * normal; +#ifdef USE_RADIANCE_CUBEMAP_ARRAY + vec3 cubemap_ambient = texture(samplerCubeArray(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(ambient_dir, MAX_ROUGHNESS_LOD)).rgb; +#else + vec3 cubemap_ambient = textureLod(samplerCube(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), ambient_dir, MAX_ROUGHNESS_LOD).rgb; +#endif //USE_RADIANCE_CUBEMAP_ARRAY + + ambient_light = mix(ambient_light, cubemap_ambient * scene_data.ambient_light_color_energy.a, scene_data.ambient_color_sky_mix); + } + } +#endif // USE_LIGHTMAP +#if defined(CUSTOM_IRRADIANCE_USED) + ambient_light = mix(specular_light, custom_irradiance.rgb, custom_irradiance.a); +#endif +#endif //!defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) + + //radiance + +/// GI /// +#if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) + +#ifdef USE_LIGHTMAP + + //lightmap + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE)) { //has lightmap capture + uint index = instances.data[instance_index].gi_offset; + + vec3 wnormal = mat3(scene_data.camera_matrix) * normal; + const float c1 = 0.429043; + const float c2 = 0.511664; + const float c3 = 0.743125; + const float c4 = 0.886227; + const float c5 = 0.247708; + ambient_light += (c1 * lightmap_captures.data[index].sh[8].rgb * (wnormal.x * wnormal.x - wnormal.y * wnormal.y) + + c3 * lightmap_captures.data[index].sh[6].rgb * wnormal.z * wnormal.z + + c4 * lightmap_captures.data[index].sh[0].rgb - + c5 * lightmap_captures.data[index].sh[6].rgb + + 2.0 * c1 * lightmap_captures.data[index].sh[4].rgb * wnormal.x * wnormal.y + + 2.0 * c1 * lightmap_captures.data[index].sh[7].rgb * wnormal.x * wnormal.z + + 2.0 * c1 * lightmap_captures.data[index].sh[5].rgb * wnormal.y * wnormal.z + + 2.0 * c2 * lightmap_captures.data[index].sh[3].rgb * wnormal.x + + 2.0 * c2 * lightmap_captures.data[index].sh[1].rgb * wnormal.y + + 2.0 * c2 * lightmap_captures.data[index].sh[2].rgb * wnormal.z); + + } else if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap + bool uses_sh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP); + uint ofs = instances.data[instance_index].gi_offset & 0xFFFF; + vec3 uvw; + uvw.xy = uv2 * instances.data[instance_index].lightmap_uv_scale.zw + instances.data[instance_index].lightmap_uv_scale.xy; + uvw.z = float((instances.data[instance_index].gi_offset >> 16) & 0xFFFF); + + if (uses_sh) { + uvw.z *= 4.0; //SH textures use 4 times more data + vec3 lm_light_l0 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 0.0), 0.0).rgb; + vec3 lm_light_l1n1 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 1.0), 0.0).rgb; + vec3 lm_light_l1_0 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 2.0), 0.0).rgb; + vec3 lm_light_l1p1 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 3.0), 0.0).rgb; + + uint idx = instances.data[instance_index].gi_offset >> 20; + vec3 n = normalize(lightmaps.data[idx].normal_xform * normal); + + ambient_light += lm_light_l0 * 0.282095f; + ambient_light += lm_light_l1n1 * 0.32573 * n.y; + ambient_light += lm_light_l1_0 * 0.32573 * n.z; + ambient_light += lm_light_l1p1 * 0.32573 * n.x; + if (metallic > 0.01) { // since the more direct bounced light is lost, we can kind of fake it with this trick + vec3 r = reflect(normalize(-vertex), normal); + specular_light += lm_light_l1n1 * 0.32573 * r.y; + specular_light += lm_light_l1_0 * 0.32573 * r.z; + specular_light += lm_light_l1p1 * 0.32573 * r.x; + } + + } else { + ambient_light += textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw, 0.0).rgb; + } + } +#elif defined(USE_FORWARD_GI) + + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture + + //make vertex orientation the world one, but still align to camera + vec3 cam_pos = mat3(scene_data.camera_matrix) * vertex; + vec3 cam_normal = mat3(scene_data.camera_matrix) * normal; + vec3 cam_reflection = mat3(scene_data.camera_matrix) * reflect(-view, normal); + + //apply y-mult + cam_pos.y *= sdfgi.y_mult; + cam_normal.y *= sdfgi.y_mult; + cam_normal = normalize(cam_normal); + cam_reflection.y *= sdfgi.y_mult; + cam_normal = normalize(cam_normal); + cam_reflection = normalize(cam_reflection); + + vec4 light_accum = vec4(0.0); + float weight_accum = 0.0; + + vec4 light_blend_accum = vec4(0.0); + float weight_blend_accum = 0.0; + + float blend = -1.0; + + // helper constants, compute once + + uint cascade = 0xFFFFFFFF; + vec3 cascade_pos; + vec3 cascade_normal; + + for (uint i = 0; i < sdfgi.max_cascades; i++) { + cascade_pos = (cam_pos - sdfgi.cascades[i].position) * sdfgi.cascades[i].to_probe; + + if (any(lessThan(cascade_pos, vec3(0.0))) || any(greaterThanEqual(cascade_pos, sdfgi.cascade_probe_size))) { + continue; //skip cascade + } + + cascade = i; + break; + } + + if (cascade < SDFGI_MAX_CASCADES) { + bool use_specular = true; + float blend; + vec3 diffuse, specular; + sdfgi_process(cascade, cascade_pos, cam_pos, cam_normal, cam_reflection, use_specular, roughness, diffuse, specular, blend); + + if (blend > 0.0) { + //blend + if (cascade == sdfgi.max_cascades - 1) { + diffuse = mix(diffuse, ambient_light, blend); + if (use_specular) { + specular = mix(specular, specular_light, blend); + } + } else { + vec3 diffuse2, specular2; + float blend2; + cascade_pos = (cam_pos - sdfgi.cascades[cascade + 1].position) * sdfgi.cascades[cascade + 1].to_probe; + sdfgi_process(cascade + 1, cascade_pos, cam_pos, cam_normal, cam_reflection, use_specular, roughness, diffuse2, specular2, blend2); + diffuse = mix(diffuse, diffuse2, blend); + if (use_specular) { + specular = mix(specular, specular2, blend); + } + } + } + + ambient_light = diffuse; + if (use_specular) { + specular_light = specular; + } + } + } + + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes + + uint index1 = instances.data[instance_index].gi_offset & 0xFFFF; + vec3 ref_vec = normalize(reflect(normalize(vertex), normal)); + //find arbitrary tangent and bitangent, then build a matrix + vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); + vec3 tangent = normalize(cross(v0, normal)); + vec3 bitangent = normalize(cross(tangent, normal)); + mat3 normal_mat = mat3(tangent, bitangent, normal); + + vec4 amb_accum = vec4(0.0); + vec4 spec_accum = vec4(0.0); + gi_probe_compute(index1, vertex, normal, ref_vec, normal_mat, roughness * roughness, ambient_light, specular_light, spec_accum, amb_accum); + + uint index2 = instances.data[instance_index].gi_offset >> 16; + + if (index2 != 0xFFFF) { + gi_probe_compute(index2, vertex, normal, ref_vec, normal_mat, roughness * roughness, ambient_light, specular_light, spec_accum, amb_accum); + } + + if (amb_accum.a > 0.0) { + amb_accum.rgb /= amb_accum.a; + } + + if (spec_accum.a > 0.0) { + spec_accum.rgb /= spec_accum.a; + } + + specular_light = spec_accum.rgb; + ambient_light = amb_accum.rgb; + } +#elif !defined(LOW_END_MODE) + + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers + + vec2 coord; + + if (scene_data.gi_upscale_for_msaa) { + vec2 base_coord = screen_uv; + vec2 closest_coord = base_coord; + float closest_ang = dot(normal, textureLod(sampler2D(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), base_coord, 0.0).xyz * 2.0 - 1.0); + + for (int i = 0; i < 4; i++) { + const vec2 neighbours[4] = vec2[](vec2(-1, 0), vec2(1, 0), vec2(0, -1), vec2(0, 1)); + vec2 neighbour_coord = base_coord + neighbours[i] * scene_data.screen_pixel_size; + float neighbour_ang = dot(normal, textureLod(sampler2D(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), neighbour_coord, 0.0).xyz * 2.0 - 1.0); + if (neighbour_ang > closest_ang) { + closest_ang = neighbour_ang; + closest_coord = neighbour_coord; + } + } + + coord = closest_coord; + + } else { + coord = screen_uv; + } + + vec4 buffer_ambient = textureLod(sampler2D(ambient_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), coord, 0.0); + vec4 buffer_reflection = textureLod(sampler2D(reflection_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), coord, 0.0); + + ambient_light = mix(ambient_light, buffer_ambient.rgb, buffer_ambient.a); + specular_light = mix(specular_light, buffer_reflection.rgb, buffer_reflection.a); + } +#endif + +#ifndef LOW_END_MODE + if (scene_data.ssao_enabled) { + float ssao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), screen_uv).r; + ao = min(ao, ssao); + ao_light_affect = mix(ao_light_affect, max(ao_light_affect, scene_data.ssao_light_affect), scene_data.ssao_ao_affect); + } +#endif //LOW_END_MODE + + { // process reflections + + vec4 reflection_accum = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ambient_accum = vec4(0.0, 0.0, 0.0, 0.0); + + uint cluster_reflection_offset = cluster_offset + scene_data.cluster_type_size * 3; + + uint item_min; + uint item_max; + uint item_from; + uint item_to; + + cluster_get_item_range(cluster_reflection_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); + +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif + + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_reflection_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif + + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + uint reflection_index = 32 * i + bit; + + if (!bool(reflections.data[reflection_index].mask & instances.data[instance_index].layer_mask)) { + continue; //not masked + } + + reflection_process(reflection_index, vertex, normal, roughness, ambient_light, specular_light, ambient_accum, reflection_accum); + } + } + + if (reflection_accum.a > 0.0) { + specular_light = reflection_accum.rgb / reflection_accum.a; + } + +#if !defined(USE_LIGHTMAP) + if (ambient_accum.a > 0.0) { + ambient_light = ambient_accum.rgb / ambient_accum.a; + } +#endif + } + + //finalize ambient light here + ambient_light *= albedo.rgb; + ambient_light *= ao; + + // convert ao to direct light ao + ao = mix(1.0, ao, ao_light_affect); + + //this saves some VGPRs + vec3 f0 = F0(metallic, specular, albedo); + + { +#if defined(DIFFUSE_TOON) + //simplify for toon, as + specular_light *= specular * metallic * albedo * 2.0; +#else + + // scales the specular reflections, needs to be be computed before lighting happens, + // but after environment, GI, and reflection probes are added + // Environment brdf approximation (Lazarov 2013) + // see https://www.unrealengine.com/en-US/blog/physically-based-shading-on-mobile + const vec4 c0 = vec4(-1.0, -0.0275, -0.572, 0.022); + const vec4 c1 = vec4(1.0, 0.0425, 1.04, -0.04); + vec4 r = roughness * c0 + c1; + float ndotv = clamp(dot(normal, view), 0.0, 1.0); + float a004 = min(r.x * r.x, exp2(-9.28 * ndotv)) * r.x + r.y; + vec2 env = vec2(-1.04, 1.04) * a004 + r.zw; + + specular_light *= env.x * f0 + env.y; +#endif + } + +#endif //GI !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) + +#if !defined(MODE_RENDER_DEPTH) + //this saves some VGPRs + uint orms = packUnorm4x8(vec4(ao, roughness, metallic, specular)); +#endif + +// LIGHTING +#if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) + + { //directional light + + // Do shadow and lighting in two passes to reduce register pressure + uint shadow0 = 0; + uint shadow1 = 0; + + for (uint i = 0; i < 8; i++) { + if (i >= scene_data.directional_light_count) { + break; + } + + if (!bool(directional_lights.data[i].mask & instances.data[instance_index].layer_mask)) { + continue; //not masked + } + + float shadow = 1.0; + +#ifdef USE_SOFT_SHADOWS + //version with soft shadows, more expensive + if (directional_lights.data[i].shadow_enabled) { + float depth_z = -vertex.z; + + vec4 pssm_coord; + vec3 shadow_color = vec3(0.0); + vec3 light_dir = directional_lights.data[i].direction; + +#define BIAS_FUNC(m_var, m_idx) \ + m_var.xyz += light_dir * directional_lights.data[i].shadow_bias[m_idx]; \ + vec3 normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(light_dir, -normalize(normal_interp)))) * directional_lights.data[i].shadow_normal_bias[m_idx]; \ + normal_bias -= light_dir * dot(light_dir, normal_bias); \ + m_var.xyz += normal_bias; + + if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { + vec4 v = vec4(vertex, 1.0); + + BIAS_FUNC(v, 0) + + pssm_coord = (directional_lights.data[i].shadow_matrix1 * v); + pssm_coord /= pssm_coord.w; + + if (directional_lights.data[i].softshadow_angle > 0) { + float range_pos = dot(directional_lights.data[i].direction, v.xyz); + float range_begin = directional_lights.data[i].shadow_range_begin.x; + float test_radius = (range_pos - range_begin) * directional_lights.data[i].softshadow_angle; + vec2 tex_scale = directional_lights.data[i].uv_scale1 * test_radius; + shadow = sample_directional_soft_shadow(directional_shadow_atlas, pssm_coord.xyz, tex_scale * directional_lights.data[i].soft_shadow_scale); + } else { + shadow = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); + } + + shadow_color = directional_lights.data[i].shadow_color1.rgb; + + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { + vec4 v = vec4(vertex, 1.0); + + BIAS_FUNC(v, 1) + + pssm_coord = (directional_lights.data[i].shadow_matrix2 * v); + pssm_coord /= pssm_coord.w; + + if (directional_lights.data[i].softshadow_angle > 0) { + float range_pos = dot(directional_lights.data[i].direction, v.xyz); + float range_begin = directional_lights.data[i].shadow_range_begin.y; + float test_radius = (range_pos - range_begin) * directional_lights.data[i].softshadow_angle; + vec2 tex_scale = directional_lights.data[i].uv_scale2 * test_radius; + shadow = sample_directional_soft_shadow(directional_shadow_atlas, pssm_coord.xyz, tex_scale * directional_lights.data[i].soft_shadow_scale); + } else { + shadow = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); + } + + shadow_color = directional_lights.data[i].shadow_color2.rgb; + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { + vec4 v = vec4(vertex, 1.0); + + BIAS_FUNC(v, 2) + + pssm_coord = (directional_lights.data[i].shadow_matrix3 * v); + pssm_coord /= pssm_coord.w; + + if (directional_lights.data[i].softshadow_angle > 0) { + float range_pos = dot(directional_lights.data[i].direction, v.xyz); + float range_begin = directional_lights.data[i].shadow_range_begin.z; + float test_radius = (range_pos - range_begin) * directional_lights.data[i].softshadow_angle; + vec2 tex_scale = directional_lights.data[i].uv_scale3 * test_radius; + shadow = sample_directional_soft_shadow(directional_shadow_atlas, pssm_coord.xyz, tex_scale * directional_lights.data[i].soft_shadow_scale); + } else { + shadow = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); + } + + shadow_color = directional_lights.data[i].shadow_color3.rgb; + + } else { + vec4 v = vec4(vertex, 1.0); + + BIAS_FUNC(v, 3) + + pssm_coord = (directional_lights.data[i].shadow_matrix4 * v); + pssm_coord /= pssm_coord.w; + + if (directional_lights.data[i].softshadow_angle > 0) { + float range_pos = dot(directional_lights.data[i].direction, v.xyz); + float range_begin = directional_lights.data[i].shadow_range_begin.w; + float test_radius = (range_pos - range_begin) * directional_lights.data[i].softshadow_angle; + vec2 tex_scale = directional_lights.data[i].uv_scale4 * test_radius; + shadow = sample_directional_soft_shadow(directional_shadow_atlas, pssm_coord.xyz, tex_scale * directional_lights.data[i].soft_shadow_scale); + } else { + shadow = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); + } + + shadow_color = directional_lights.data[i].shadow_color4.rgb; + } + + if (directional_lights.data[i].blend_splits) { + vec3 shadow_color_blend = vec3(0.0); + float pssm_blend; + float shadow2; + + if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { + vec4 v = vec4(vertex, 1.0); + BIAS_FUNC(v, 1) + pssm_coord = (directional_lights.data[i].shadow_matrix2 * v); + pssm_coord /= pssm_coord.w; + + if (directional_lights.data[i].softshadow_angle > 0) { + float range_pos = dot(directional_lights.data[i].direction, v.xyz); + float range_begin = directional_lights.data[i].shadow_range_begin.y; + float test_radius = (range_pos - range_begin) * directional_lights.data[i].softshadow_angle; + vec2 tex_scale = directional_lights.data[i].uv_scale2 * test_radius; + shadow2 = sample_directional_soft_shadow(directional_shadow_atlas, pssm_coord.xyz, tex_scale * directional_lights.data[i].soft_shadow_scale); + } else { + shadow2 = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); + } + + pssm_blend = smoothstep(0.0, directional_lights.data[i].shadow_split_offsets.x, depth_z); + shadow_color_blend = directional_lights.data[i].shadow_color2.rgb; + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { + vec4 v = vec4(vertex, 1.0); + BIAS_FUNC(v, 2) + pssm_coord = (directional_lights.data[i].shadow_matrix3 * v); + pssm_coord /= pssm_coord.w; + + if (directional_lights.data[i].softshadow_angle > 0) { + float range_pos = dot(directional_lights.data[i].direction, v.xyz); + float range_begin = directional_lights.data[i].shadow_range_begin.z; + float test_radius = (range_pos - range_begin) * directional_lights.data[i].softshadow_angle; + vec2 tex_scale = directional_lights.data[i].uv_scale3 * test_radius; + shadow2 = sample_directional_soft_shadow(directional_shadow_atlas, pssm_coord.xyz, tex_scale * directional_lights.data[i].soft_shadow_scale); + } else { + shadow2 = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); + } + + pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.x, directional_lights.data[i].shadow_split_offsets.y, depth_z); + + shadow_color_blend = directional_lights.data[i].shadow_color3.rgb; + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { + vec4 v = vec4(vertex, 1.0); + BIAS_FUNC(v, 3) + pssm_coord = (directional_lights.data[i].shadow_matrix4 * v); + pssm_coord /= pssm_coord.w; + if (directional_lights.data[i].softshadow_angle > 0) { + float range_pos = dot(directional_lights.data[i].direction, v.xyz); + float range_begin = directional_lights.data[i].shadow_range_begin.w; + float test_radius = (range_pos - range_begin) * directional_lights.data[i].softshadow_angle; + vec2 tex_scale = directional_lights.data[i].uv_scale4 * test_radius; + shadow2 = sample_directional_soft_shadow(directional_shadow_atlas, pssm_coord.xyz, tex_scale * directional_lights.data[i].soft_shadow_scale); + } else { + shadow2 = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); + } + + pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.y, directional_lights.data[i].shadow_split_offsets.z, depth_z); + shadow_color_blend = directional_lights.data[i].shadow_color4.rgb; + } else { + pssm_blend = 0.0; //if no blend, same coord will be used (divide by z will result in same value, and already cached) + } + + pssm_blend = sqrt(pssm_blend); + + shadow = mix(shadow, shadow2, pssm_blend); + shadow_color = mix(shadow_color, shadow_color_blend, pssm_blend); + } + + shadow = mix(shadow, 1.0, smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, vertex.z)); //done with negative values for performance + +#undef BIAS_FUNC + } +#else + // Soft shadow disabled version + + if (directional_lights.data[i].shadow_enabled) { + float depth_z = -vertex.z; + + vec4 pssm_coord; + vec3 light_dir = directional_lights.data[i].direction; + vec3 base_normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(light_dir, -normalize(normal_interp)))); + +#define BIAS_FUNC(m_var, m_idx) \ + m_var.xyz += light_dir * directional_lights.data[i].shadow_bias[m_idx]; \ + vec3 normal_bias = base_normal_bias * directional_lights.data[i].shadow_normal_bias[m_idx]; \ + normal_bias -= light_dir * dot(light_dir, normal_bias); \ + m_var.xyz += normal_bias; + + if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { + vec4 v = vec4(vertex, 1.0); + + BIAS_FUNC(v, 0) + + pssm_coord = (directional_lights.data[i].shadow_matrix1 * v); +#ifdef LIGHT_TRANSMITTANCE_USED + { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.x, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix1 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.x; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.x; + + transmittance_z = z - shadow_z; + } +#endif + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { + vec4 v = vec4(vertex, 1.0); + + BIAS_FUNC(v, 1) + + pssm_coord = (directional_lights.data[i].shadow_matrix2 * v); +#ifdef LIGHT_TRANSMITTANCE_USED + { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.y, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix2 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.y; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.y; + + transmittance_z = z - shadow_z; + } +#endif + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { + vec4 v = vec4(vertex, 1.0); + + BIAS_FUNC(v, 2) + + pssm_coord = (directional_lights.data[i].shadow_matrix3 * v); +#ifdef LIGHT_TRANSMITTANCE_USED + { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.z, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix3 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.z; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.z; + + transmittance_z = z - shadow_z; + } +#endif + + } else { + vec4 v = vec4(vertex, 1.0); + + BIAS_FUNC(v, 3) + + pssm_coord = (directional_lights.data[i].shadow_matrix4 * v); +#ifdef LIGHT_TRANSMITTANCE_USED + { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.w, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix4 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.w; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.w; + + transmittance_z = z - shadow_z; + } +#endif + } + + pssm_coord /= pssm_coord.w; + + shadow = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); + + if (directional_lights.data[i].blend_splits) { + float pssm_blend; + + if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { + vec4 v = vec4(vertex, 1.0); + BIAS_FUNC(v, 1) + pssm_coord = (directional_lights.data[i].shadow_matrix2 * v); + pssm_blend = smoothstep(0.0, directional_lights.data[i].shadow_split_offsets.x, depth_z); + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { + vec4 v = vec4(vertex, 1.0); + BIAS_FUNC(v, 2) + pssm_coord = (directional_lights.data[i].shadow_matrix3 * v); + pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.x, directional_lights.data[i].shadow_split_offsets.y, depth_z); + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { + vec4 v = vec4(vertex, 1.0); + BIAS_FUNC(v, 3) + pssm_coord = (directional_lights.data[i].shadow_matrix4 * v); + pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.y, directional_lights.data[i].shadow_split_offsets.z, depth_z); + } else { + pssm_blend = 0.0; //if no blend, same coord will be used (divide by z will result in same value, and already cached) + } + + pssm_coord /= pssm_coord.w; + + float shadow2 = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); + shadow = mix(shadow, shadow2, pssm_blend); + } + + shadow = mix(shadow, 1.0, smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, vertex.z)); //done with negative values for performance + +#undef BIAS_FUNC + } +#endif + + if (i < 4) { + shadow0 |= uint(clamp(shadow * 255.0, 0.0, 255.0)) << (i * 8); + } else { + shadow1 |= uint(clamp(shadow * 255.0, 0.0, 255.0)) << ((i - 4) * 8); + } + } + + for (uint i = 0; i < 8; i++) { + if (i >= scene_data.directional_light_count) { + break; + } + + if (!bool(directional_lights.data[i].mask & instances.data[instance_index].layer_mask)) { + continue; //not masked + } + +#ifdef LIGHT_TRANSMITTANCE_USED + float transmittance_z = transmittance_depth; + + if (directional_lights.data[i].shadow_enabled) { + float depth_z = -vertex.z; + + if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.x, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix1 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.x; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.x; + + transmittance_z = z - shadow_z; + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.y, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix2 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.y; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.y; + + transmittance_z = z - shadow_z; + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.z, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix3 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.z; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.z; + + transmittance_z = z - shadow_z; + + } else { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.w, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix4 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.w; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.w; + + transmittance_z = z - shadow_z; + } +#endif + + float shadow = 1.0; + + if (i < 4) { + shadow = float(shadow0 >> (i * 8) & 0xFF) / 255.0; + } else { + shadow = float(shadow1 >> ((i - 4) * 8) & 0xFF) / 255.0; + } + + blur_shadow(shadow); + + light_compute(normal, directional_lights.data[i].direction, normalize(view), directional_lights.data[i].color * directional_lights.data[i].energy, shadow, f0, orms, 1.0, +#ifdef LIGHT_BACKLIGHT_USED + backlight, +#endif +#ifdef LIGHT_TRANSMITTANCE_USED + transmittance_color, + transmittance_depth, + transmittance_curve, + transmittance_boost, + transmittance_z, +#endif +#ifdef LIGHT_RIM_USED + rim, rim_tint, albedo, +#endif +#ifdef LIGHT_CLEARCOAT_USED + clearcoat, clearcoat_gloss, +#endif +#ifdef LIGHT_ANISOTROPY_USED + binormal, tangent, anisotropy, +#endif +#ifdef USE_SOFT_SHADOW + directional_lights.data[i].size, +#endif +#ifdef USE_SHADOW_TO_OPACITY + alpha, +#endif + diffuse_light, + specular_light); + } + } + + { //omni lights + + uint cluster_omni_offset = cluster_offset; + + uint item_min; + uint item_max; + uint item_from; + uint item_to; + + cluster_get_item_range(cluster_omni_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); + +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif + + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_omni_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif + + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + uint light_index = 32 * i + bit; + + if (!bool(omni_lights.data[light_index].mask & instances.data[instance_index].layer_mask)) { + continue; //not masked + } + + float shadow = light_process_omni_shadow(light_index, vertex, view); + + shadow = blur_shadow(shadow); + + light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow, +#ifdef LIGHT_BACKLIGHT_USED + backlight, +#endif +#ifdef LIGHT_TRANSMITTANCE_USED + transmittance_color, + transmittance_depth, + transmittance_curve, + transmittance_boost, +#endif +#ifdef LIGHT_RIM_USED + rim, + rim_tint, + albedo, +#endif +#ifdef LIGHT_CLEARCOAT_USED + clearcoat, clearcoat_gloss, +#endif +#ifdef LIGHT_ANISOTROPY_USED + tangent, binormal, anisotropy, +#endif +#ifdef USE_SHADOW_TO_OPACITY + alpha, +#endif + diffuse_light, specular_light); + } + } + } + + { //spot lights + + uint cluster_spot_offset = cluster_offset + scene_data.cluster_type_size; + + uint item_min; + uint item_max; + uint item_from; + uint item_to; + + cluster_get_item_range(cluster_spot_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); + +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif + + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_spot_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif + + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + + uint light_index = 32 * i + bit; + + if (!bool(spot_lights.data[light_index].mask & instances.data[instance_index].layer_mask)) { + continue; //not masked + } + + float shadow = light_process_spot_shadow(light_index, vertex, view); + + shadow = blur_shadow(shadow); + + light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow, +#ifdef LIGHT_BACKLIGHT_USED + backlight, +#endif +#ifdef LIGHT_TRANSMITTANCE_USED + transmittance_color, + transmittance_depth, + transmittance_curve, + transmittance_boost, +#endif +#ifdef LIGHT_RIM_USED + rim, + rim_tint, + albedo, +#endif +#ifdef LIGHT_CLEARCOAT_USED + clearcoat, clearcoat_gloss, +#endif +#ifdef LIGHT_ANISOTROPY_USED + tangent, binormal, anisotropy, +#endif +#ifdef USE_SHADOW_TO_OPACITY + alpha, +#endif + diffuse_light, specular_light); + } + } + } + +#ifdef USE_SHADOW_TO_OPACITY + alpha = min(alpha, clamp(length(ambient_light), 0.0, 1.0)); + +#if defined(ALPHA_SCISSOR_USED) + if (alpha < alpha_scissor) { + discard; + } +#endif // ALPHA_SCISSOR_USED + +#ifdef USE_OPAQUE_PREPASS + + if (alpha < opaque_prepass_threshold) { + discard; + } + +#endif // USE_OPAQUE_PREPASS + +#endif // USE_SHADOW_TO_OPACITY + +#endif //!defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) + +#ifdef MODE_RENDER_DEPTH + +#ifdef MODE_RENDER_SDF + + { + vec3 local_pos = (scene_data.sdf_to_bounds * vec4(vertex, 1.0)).xyz; + ivec3 grid_pos = scene_data.sdf_offset + ivec3(local_pos * vec3(scene_data.sdf_size)); + + uint albedo16 = 0x1; //solid flag + albedo16 |= clamp(uint(albedo.r * 31.0), 0, 31) << 11; + albedo16 |= clamp(uint(albedo.g * 31.0), 0, 31) << 6; + albedo16 |= clamp(uint(albedo.b * 31.0), 0, 31) << 1; + + imageStore(albedo_volume_grid, grid_pos, uvec4(albedo16)); + + uint facing_bits = 0; + const vec3 aniso_dir[6] = vec3[]( + vec3(1, 0, 0), + vec3(0, 1, 0), + vec3(0, 0, 1), + vec3(-1, 0, 0), + vec3(0, -1, 0), + vec3(0, 0, -1)); + + vec3 cam_normal = mat3(scene_data.camera_matrix) * normalize(normal_interp); + + float closest_dist = -1e20; + + for (uint i = 0; i < 6; i++) { + float d = dot(cam_normal, aniso_dir[i]); + if (d > closest_dist) { + closest_dist = d; + facing_bits = (1 << i); + } + } + + imageAtomicOr(geom_facing_grid, grid_pos, facing_bits); //store facing bits + + if (length(emission) > 0.001) { + float lumas[6]; + vec3 light_total = vec3(0); + + for (int i = 0; i < 6; i++) { + float strength = max(0.0, dot(cam_normal, aniso_dir[i])); + vec3 light = emission * strength; + light_total += light; + lumas[i] = max(light.r, max(light.g, light.b)); + } + + float luma_total = max(light_total.r, max(light_total.g, light_total.b)); + + uint light_aniso = 0; + + for (int i = 0; i < 6; i++) { + light_aniso |= min(31, uint((lumas[i] / luma_total) * 31.0)) << (i * 5); + } + + //compress to RGBE9995 to save space + + const float pow2to9 = 512.0f; + const float B = 15.0f; + const float N = 9.0f; + const float LN2 = 0.6931471805599453094172321215; + + float cRed = clamp(light_total.r, 0.0, 65408.0); + float cGreen = clamp(light_total.g, 0.0, 65408.0); + float cBlue = clamp(light_total.b, 0.0, 65408.0); + + float cMax = max(cRed, max(cGreen, cBlue)); + + float expp = max(-B - 1.0f, floor(log(cMax) / LN2)) + 1.0f + B; + + float sMax = floor((cMax / pow(2.0f, expp - B - N)) + 0.5f); + + float exps = expp + 1.0f; + + if (0.0 <= sMax && sMax < pow2to9) { + exps = expp; + } + + float sRed = floor((cRed / pow(2.0f, exps - B - N)) + 0.5f); + float sGreen = floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f); + float sBlue = floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f); + //store as 8985 to have 2 extra neighbour bits + uint light_rgbe = ((uint(sRed) & 0x1FF) >> 1) | ((uint(sGreen) & 0x1FF) << 8) | (((uint(sBlue) & 0x1FF) >> 1) << 17) | ((uint(exps) & 0x1F) << 25); + + imageStore(emission_grid, grid_pos, uvec4(light_rgbe)); + imageStore(emission_aniso_grid, grid_pos, uvec4(light_aniso)); + } + } + +#endif + +#ifdef MODE_RENDER_MATERIAL + + albedo_output_buffer.rgb = albedo; + albedo_output_buffer.a = alpha; + + normal_output_buffer.rgb = normal * 0.5 + 0.5; + normal_output_buffer.a = 0.0; + depth_output_buffer.r = -vertex.z; + + orm_output_buffer.r = ao; + orm_output_buffer.g = roughness; + orm_output_buffer.b = metallic; + orm_output_buffer.a = sss_strength; + + emission_output_buffer.rgb = emission; + emission_output_buffer.a = 0.0; +#endif + +#ifdef MODE_RENDER_NORMAL_ROUGHNESS + normal_roughness_output_buffer = vec4(normal * 0.5 + 0.5, roughness); + +#ifdef MODE_RENDER_GIPROBE + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes + uint index1 = instances.data[instance_index].gi_offset & 0xFFFF; + uint index2 = instances.data[instance_index].gi_offset >> 16; + giprobe_buffer.x = index1 & 0xFF; + giprobe_buffer.y = index2 & 0xFF; + } else { + giprobe_buffer.x = 0xFF; + giprobe_buffer.y = 0xFF; + } +#endif + +#endif //MODE_RENDER_NORMAL_ROUGHNESS + +//nothing happens, so a tree-ssa optimizer will result in no fragment shader :) +#else + + // multiply by albedo + diffuse_light *= albedo; // ambient must be multiplied by albedo at the end + + // apply direct light AO + ao = unpackUnorm4x8(orms).x; + specular_light *= ao; + diffuse_light *= ao; + + // apply metallic + metallic = unpackUnorm4x8(orms).z; + diffuse_light *= 1.0 - metallic; + ambient_light *= 1.0 - metallic; + + //restore fog + fog = vec4(unpackHalf2x16(fog_rg), unpackHalf2x16(fog_ba)); + +#ifdef MODE_MULTIPLE_RENDER_TARGETS + +#ifdef MODE_UNSHADED + diffuse_buffer = vec4(albedo.rgb, 0.0); + specular_buffer = vec4(0.0); + +#else + +#ifdef SSS_MODE_SKIN + sss_strength = -sss_strength; +#endif + diffuse_buffer = vec4(emission + diffuse_light + ambient_light, sss_strength); + specular_buffer = vec4(specular_light, metallic); +#endif + + diffuse_buffer.rgb = mix(diffuse_buffer.rgb, fog.rgb, fog.a); + specular_buffer.rgb = mix(specular_buffer.rgb, vec3(0.0), fog.a); + +#else //MODE_MULTIPLE_RENDER_TARGETS + +#ifdef MODE_UNSHADED + frag_color = vec4(albedo, alpha); +#else + frag_color = vec4(emission + ambient_light + diffuse_light + specular_light, alpha); + //frag_color = vec4(1.0); +#endif //USE_NO_SHADING + + // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky. + frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); + ; + +#endif //MODE_MULTIPLE_RENDER_TARGETS + +#endif //MODE_RENDER_DEPTH + } diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl new file mode 100644 index 0000000000..d78890fa9e --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl @@ -0,0 +1,352 @@ +#define M_PI 3.14159265359 +#define ROUGHNESS_MAX_LOD 5 + +#define MAX_GI_PROBES 8 + +#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic) + +#extension GL_KHR_shader_subgroup_ballot : enable +#extension GL_KHR_shader_subgroup_arithmetic : enable + +#define USE_SUBGROUPS + +#endif + +#include "cluster_data_inc.glsl" + +#if !defined(MODE_RENDER_DEPTH) || defined(MODE_RENDER_MATERIAL) || defined(MODE_RENDER_SDF) || defined(MODE_RENDER_NORMAL_ROUGHNESS) || defined(MODE_RENDER_GIPROBE) || defined(TANGENT_USED) || defined(NORMAL_MAP_USED) +#ifndef NORMAL_USED +#define NORMAL_USED +#endif +#endif + +layout(push_constant, binding = 0, std430) uniform DrawCall { + uint instance_index; + uint uv_offset; + uint pad0; + uint pad1; +} +draw_call; + +/* Set 0 Scene data that never changes, ever */ + +#define SAMPLER_NEAREST_CLAMP 0 +#define SAMPLER_LINEAR_CLAMP 1 +#define SAMPLER_NEAREST_WITH_MIPMAPS_CLAMP 2 +#define SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP 3 +#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_CLAMP 4 +#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_CLAMP 5 +#define SAMPLER_NEAREST_REPEAT 6 +#define SAMPLER_LINEAR_REPEAT 7 +#define SAMPLER_NEAREST_WITH_MIPMAPS_REPEAT 8 +#define SAMPLER_LINEAR_WITH_MIPMAPS_REPEAT 9 +#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_REPEAT 10 +#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_REPEAT 11 + +#define SDFGI_MAX_CASCADES 8 + +/* Set 1: Base Pass (never changes) */ + +layout(set = 0, binding = 1) uniform sampler material_samplers[12]; + +layout(set = 0, binding = 2) uniform sampler shadow_sampler; + +#define INSTANCE_FLAGS_USE_GI_BUFFERS (1 << 6) +#define INSTANCE_FLAGS_USE_SDFGI (1 << 7) +#define INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE (1 << 8) +#define INSTANCE_FLAGS_USE_LIGHTMAP (1 << 9) +#define INSTANCE_FLAGS_USE_SH_LIGHTMAP (1 << 10) +#define INSTANCE_FLAGS_USE_GIPROBE (1 << 11) +#define INSTANCE_FLAGS_MULTIMESH (1 << 12) +#define INSTANCE_FLAGS_MULTIMESH_FORMAT_2D (1 << 13) +#define INSTANCE_FLAGS_MULTIMESH_HAS_COLOR (1 << 14) +#define INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA (1 << 15) +#define INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT 16 +//3 bits of stride +#define INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK 0x7 + +#define INSTANCE_FLAGS_SKELETON (1 << 19) +#define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 20) + +layout(set = 0, binding = 3, std430) restrict readonly buffer OmniLights { + LightData data[]; +} +omni_lights; + +layout(set = 0, binding = 4, std430) restrict readonly buffer SpotLights { + LightData data[]; +} +spot_lights; + +layout(set = 0, binding = 5) buffer restrict readonly ReflectionProbeData { + ReflectionData data[]; +} +reflections; + +layout(set = 0, binding = 6, std140) uniform DirectionalLights { + DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; +} +directional_lights; + +#define LIGHTMAP_FLAG_USE_DIRECTION 1 +#define LIGHTMAP_FLAG_USE_SPECULAR_DIRECTION 2 + +struct Lightmap { + mat3 normal_xform; +}; + +layout(set = 0, binding = 7, std140) restrict readonly buffer Lightmaps { + Lightmap data[]; +} +lightmaps; + +struct LightmapCapture { + vec4 sh[9]; +}; + +layout(set = 0, binding = 8, std140) restrict readonly buffer LightmapCaptures { + LightmapCapture data[]; +} +lightmap_captures; + +layout(set = 0, binding = 9) uniform texture2D decal_atlas; +layout(set = 0, binding = 10) uniform texture2D decal_atlas_srgb; + +layout(set = 0, binding = 11, std430) restrict readonly buffer Decals { + DecalData data[]; +} +decals; + +layout(set = 0, binding = 12, std430) restrict readonly buffer GlobalVariableData { + vec4 data[]; +} +global_variables; + +#ifndef LOW_END_MODE + +struct SDFGIProbeCascadeData { + vec3 position; + float to_probe; + ivec3 probe_world_offset; + float to_cell; // 1/bounds * grid_size +}; + +layout(set = 0, binding = 13, std140) uniform SDFGI { + vec3 grid_size; + uint max_cascades; + + bool use_occlusion; + int probe_axis_size; + float probe_to_uvw; + float normal_bias; + + vec3 lightprobe_tex_pixel_size; + float energy; + + vec3 lightprobe_uv_offset; + float y_mult; + + vec3 occlusion_clamp; + uint pad3; + + vec3 occlusion_renormalize; + uint pad4; + + vec3 cascade_probe_size; + uint pad5; + + SDFGIProbeCascadeData cascades[SDFGI_MAX_CASCADES]; +} +sdfgi; + +#endif //LOW_END_MODE + +/* Set 2: Render Pass (changes per render pass) */ + +layout(set = 1, binding = 0, std140) uniform SceneData { + mat4 projection_matrix; + mat4 inv_projection_matrix; + + mat4 camera_matrix; + mat4 inv_camera_matrix; + + vec2 viewport_size; + vec2 screen_pixel_size; + + uint cluster_shift; + uint cluster_width; + uint cluster_type_size; + uint max_cluster_element_count_div_32; + + //use vec4s because std140 doesnt play nice with vec2s, z and w are wasted + vec4 directional_penumbra_shadow_kernel[32]; + vec4 directional_soft_shadow_kernel[32]; + vec4 penumbra_shadow_kernel[32]; + vec4 soft_shadow_kernel[32]; + + uint directional_penumbra_shadow_samples; + uint directional_soft_shadow_samples; + uint penumbra_shadow_samples; + uint soft_shadow_samples; + + vec4 ambient_light_color_energy; + + float ambient_color_sky_mix; + bool use_ambient_light; + bool use_ambient_cubemap; + bool use_reflection_cubemap; + + mat3 radiance_inverse_xform; + + vec2 shadow_atlas_pixel_size; + vec2 directional_shadow_pixel_size; + + uint directional_light_count; + float dual_paraboloid_side; + float z_far; + float z_near; + + bool ssao_enabled; + float ssao_light_affect; + float ssao_ao_affect; + bool roughness_limiter_enabled; + + float roughness_limiter_amount; + float roughness_limiter_limit; + uvec2 roughness_limiter_pad; + + vec4 ao_color; + + mat4 sdf_to_bounds; + + ivec3 sdf_offset; + bool material_uv2_mode; + + ivec3 sdf_size; + bool gi_upscale_for_msaa; + + bool volumetric_fog_enabled; + float volumetric_fog_inv_length; + float volumetric_fog_detail_spread; + uint volumetric_fog_pad; + + bool fog_enabled; + float fog_density; + float fog_height; + float fog_height_density; + + vec3 fog_light_color; + float fog_sun_scatter; + + float fog_aerial_perspective; + + float time; + float reflection_multiplier; // one normally, zero when rendering reflections + + bool pancake_shadows; +} + +scene_data; + +struct InstanceData { + mat4 transform; + uint flags; + uint instance_uniforms_ofs; //base offset in global buffer for instance variables + uint gi_offset; //GI information when using lightmapping (VCT or lightmap index) + uint layer_mask; + vec4 lightmap_uv_scale; +}; + +layout(set = 1, binding = 1, std430) buffer restrict readonly InstanceDataBuffer { + InstanceData data[]; +} +instances; + +#ifdef USE_RADIANCE_CUBEMAP_ARRAY + +layout(set = 1, binding = 2) uniform textureCubeArray radiance_cubemap; + +#else + +layout(set = 1, binding = 2) uniform textureCube radiance_cubemap; + +#endif + +layout(set = 1, binding = 3) uniform textureCubeArray reflection_atlas; + +layout(set = 1, binding = 4) uniform texture2D shadow_atlas; + +layout(set = 1, binding = 5) uniform texture2D directional_shadow_atlas; + +layout(set = 1, binding = 6) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES]; + +#ifndef LOW_END_MOD +layout(set = 1, binding = 7) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; +#endif + +layout(set = 1, binding = 8, std430) buffer restrict readonly ClusterBuffer { + uint data[]; +} +cluster_buffer; + +#ifdef MODE_RENDER_SDF + +layout(r16ui, set = 1, binding = 9) uniform restrict writeonly uimage3D albedo_volume_grid; +layout(r32ui, set = 1, binding = 10) uniform restrict writeonly uimage3D emission_grid; +layout(r32ui, set = 1, binding = 11) uniform restrict writeonly uimage3D emission_aniso_grid; +layout(r32ui, set = 1, binding = 12) uniform restrict uimage3D geom_facing_grid; + +//still need to be present for shaders that use it, so remap them to something +#define depth_buffer shadow_atlas +#define color_buffer shadow_atlas +#define normal_roughness_buffer shadow_atlas + +#else + +layout(set = 1, binding = 9) uniform texture2D depth_buffer; +layout(set = 1, binding = 10) uniform texture2D color_buffer; + +#ifndef LOW_END_MODE + +layout(set = 1, binding = 11) uniform texture2D normal_roughness_buffer; +layout(set = 1, binding = 12) uniform texture2D ao_buffer; +layout(set = 1, binding = 13) uniform texture2D ambient_buffer; +layout(set = 1, binding = 14) uniform texture2D reflection_buffer; +layout(set = 1, binding = 15) uniform texture2DArray sdfgi_lightprobe_texture; +layout(set = 1, binding = 16) uniform texture3D sdfgi_occlusion_cascades; + +struct GIProbeData { + mat4 xform; + vec3 bounds; + float dynamic_range; + + float bias; + float normal_bias; + bool blend_ambient; + uint texture_slot; + + float anisotropy_strength; + float ambient_occlusion; + float ambient_occlusion_size; + uint mipmaps; +}; + +layout(set = 1, binding = 17, std140) uniform GIProbes { + GIProbeData data[MAX_GI_PROBES]; +} +gi_probes; + +layout(set = 1, binding = 18) uniform texture3D volumetric_fog_texture; + +#endif // LOW_END_MODE + +#endif + +/* Set 2 Skeleton & Instancing (can change per item) */ + +layout(set = 2, binding = 0, std430) restrict readonly buffer Transforms { + vec4 data[]; +} +transforms; + +/* Set 3 User Material */ diff --git a/servers/rendering/renderer_rd/shaders/screen_space_reflection.glsl b/servers/rendering/renderer_rd/shaders/screen_space_reflection.glsl new file mode 100644 index 0000000000..06dc4b13de --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/screen_space_reflection.glsl @@ -0,0 +1,246 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(rgba16f, set = 0, binding = 0) uniform restrict readonly image2D source_diffuse; +layout(r32f, set = 0, binding = 1) uniform restrict readonly image2D source_depth; +layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly image2D ssr_image; +#ifdef MODE_ROUGH +layout(r8, set = 1, binding = 1) uniform restrict writeonly image2D blur_radius_image; +#endif +layout(rgba8, set = 2, binding = 0) uniform restrict readonly image2D source_normal_roughness; +layout(set = 3, binding = 0) uniform sampler2D source_metallic; + +layout(push_constant, binding = 2, std430) uniform Params { + vec4 proj_info; + + ivec2 screen_size; + float camera_z_near; + float camera_z_far; + + int num_steps; + float depth_tolerance; + float distance_fade; + float curve_fade_in; + + bool orthogonal; + float filter_mipmap_levels; + bool use_half_res; + uint metallic_mask; + + mat4 projection; +} +params; + +vec2 view_to_screen(vec3 view_pos, out float w) { + vec4 projected = params.projection * vec4(view_pos, 1.0); + projected.xyz /= projected.w; + projected.xy = projected.xy * 0.5 + 0.5; + w = projected.w; + return projected.xy; +} + +#define M_PI 3.14159265359 + +vec3 reconstructCSPosition(vec2 S, float z) { + if (params.orthogonal) { + return vec3((S.xy * params.proj_info.xy + params.proj_info.zw), z); + } else { + return vec3((S.xy * params.proj_info.xy + params.proj_info.zw) * z, z); + } +} + +void main() { + // Pixel being shaded + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + + if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing + return; + } + + vec2 pixel_size = 1.0 / vec2(params.screen_size); + vec2 uv = vec2(ssC) * pixel_size; + + uv += pixel_size * 0.5; + + float base_depth = imageLoad(source_depth, ssC).r; + + // World space point being shaded + vec3 vertex = reconstructCSPosition(uv * vec2(params.screen_size), base_depth); + + vec4 normal_roughness = imageLoad(source_normal_roughness, ssC); + vec3 normal = normal_roughness.xyz * 2.0 - 1.0; + normal = normalize(normal); + normal.y = -normal.y; //because this code reads flipped + + vec3 view_dir = normalize(vertex); + vec3 ray_dir = normalize(reflect(view_dir, normal)); + + if (dot(ray_dir, normal) < 0.001) { + imageStore(ssr_image, ssC, vec4(0.0)); + return; + } + //ray_dir = normalize(view_dir - normal * dot(normal,view_dir) * 2.0); + //ray_dir = normalize(vec3(1.0, 1.0, -1.0)); + + //////////////// + + // make ray length and clip it against the near plane (don't want to trace beyond visible) + float ray_len = (vertex.z + ray_dir.z * params.camera_z_far) > -params.camera_z_near ? (-params.camera_z_near - vertex.z) / ray_dir.z : params.camera_z_far; + vec3 ray_end = vertex + ray_dir * ray_len; + + float w_begin; + vec2 vp_line_begin = view_to_screen(vertex, w_begin); + float w_end; + vec2 vp_line_end = view_to_screen(ray_end, w_end); + vec2 vp_line_dir = vp_line_end - vp_line_begin; + + // we need to interpolate w along the ray, to generate perspective correct reflections + w_begin = 1.0 / w_begin; + w_end = 1.0 / w_end; + + float z_begin = vertex.z * w_begin; + float z_end = ray_end.z * w_end; + + vec2 line_begin = vp_line_begin / pixel_size; + vec2 line_dir = vp_line_dir / pixel_size; + float z_dir = z_end - z_begin; + float w_dir = w_end - w_begin; + + // clip the line to the viewport edges + + float scale_max_x = min(1.0, 0.99 * (1.0 - vp_line_begin.x) / max(1e-5, vp_line_dir.x)); + float scale_max_y = min(1.0, 0.99 * (1.0 - vp_line_begin.y) / max(1e-5, vp_line_dir.y)); + float scale_min_x = min(1.0, 0.99 * vp_line_begin.x / max(1e-5, -vp_line_dir.x)); + float scale_min_y = min(1.0, 0.99 * vp_line_begin.y / max(1e-5, -vp_line_dir.y)); + float line_clip = min(scale_max_x, scale_max_y) * min(scale_min_x, scale_min_y); + line_dir *= line_clip; + z_dir *= line_clip; + w_dir *= line_clip; + + // clip z and w advance to line advance + vec2 line_advance = normalize(line_dir); // down to pixel + float step_size = length(line_advance) / length(line_dir); + float z_advance = z_dir * step_size; // adapt z advance to line advance + float w_advance = w_dir * step_size; // adapt w advance to line advance + + // make line advance faster if direction is closer to pixel edges (this avoids sampling the same pixel twice) + float advance_angle_adj = 1.0 / max(abs(line_advance.x), abs(line_advance.y)); + line_advance *= advance_angle_adj; // adapt z advance to line advance + z_advance *= advance_angle_adj; + w_advance *= advance_angle_adj; + + vec2 pos = line_begin; + float z = z_begin; + float w = w_begin; + float z_from = z / w; + float z_to = z_from; + float depth; + vec2 prev_pos = pos; + + bool found = false; + + float steps_taken = 0.0; + + for (int i = 0; i < params.num_steps; i++) { + pos += line_advance; + z += z_advance; + w += w_advance; + + // convert to linear depth + + depth = imageLoad(source_depth, ivec2(pos - 0.5)).r; + + z_from = z_to; + z_to = z / w; + + if (depth > z_to) { + // if depth was surpassed + if (depth <= max(z_to, z_from) + params.depth_tolerance && -depth < params.camera_z_far) { + // check the depth tolerance and far clip + // check that normal is valid + found = true; + } + break; + } + + steps_taken += 1.0; + prev_pos = pos; + } + + if (found) { + float margin_blend = 1.0; + + vec2 margin = vec2((params.screen_size.x + params.screen_size.y) * 0.5 * 0.05); // make a uniform margin + if (any(bvec4(lessThan(pos, -margin), greaterThan(pos, params.screen_size + margin)))) { + // clip outside screen + margin + imageStore(ssr_image, ssC, vec4(0.0)); + return; + } + + { + //blend fading out towards external margin + vec2 margin_grad = mix(pos - params.screen_size, -pos, lessThan(pos, vec2(0.0))); + margin_blend = 1.0 - smoothstep(0.0, margin.x, max(margin_grad.x, margin_grad.y)); + //margin_blend = 1.0; + } + + vec2 final_pos; + float grad; + grad = steps_taken / float(params.num_steps); + float initial_fade = params.curve_fade_in == 0.0 ? 1.0 : pow(clamp(grad, 0.0, 1.0), params.curve_fade_in); + float fade = pow(clamp(1.0 - grad, 0.0, 1.0), params.distance_fade) * initial_fade; + final_pos = pos; + + vec4 final_color; + +#ifdef MODE_ROUGH + + // if roughness is enabled, do screen space cone tracing + float blur_radius = 0.0; + float roughness = normal_roughness.w; + + if (roughness > 0.001) { + float cone_angle = min(roughness, 0.999) * M_PI * 0.5; + float cone_len = length(final_pos - line_begin); + float op_len = 2.0 * tan(cone_angle) * cone_len; // opposite side of iso triangle + { + // fit to sphere inside cone (sphere ends at end of cone), something like this: + // ___ + // \O/ + // V + // + // as it avoids bleeding from beyond the reflection as much as possible. As a plus + // it also makes the rough reflection more elongated. + float a = op_len; + float h = cone_len; + float a2 = a * a; + float fh2 = 4.0f * h * h; + blur_radius = (a * (sqrt(a2 + fh2) - a)) / (4.0f * h); + } + } + + final_color = imageLoad(source_diffuse, ivec2((final_pos - 0.5) * pixel_size)); + + imageStore(blur_radius_image, ssC, vec4(blur_radius / 255.0)); //stored in r8 + +#endif + + final_color = vec4(imageLoad(source_diffuse, ivec2(final_pos - 0.5)).rgb, fade * margin_blend); + //change blend by metallic + vec4 metallic_mask = unpackUnorm4x8(params.metallic_mask); + final_color.a *= dot(metallic_mask, texelFetch(source_metallic, ssC << 1, 0)); + + imageStore(ssr_image, ssC, final_color); + + } else { +#ifdef MODE_ROUGH + imageStore(blur_radius_image, ssC, vec4(0.0)); +#endif + imageStore(ssr_image, ssC, vec4(0.0)); + } +} diff --git a/servers/rendering/renderer_rd/shaders/screen_space_reflection_filter.glsl b/servers/rendering/renderer_rd/shaders/screen_space_reflection_filter.glsl new file mode 100644 index 0000000000..a5afe74cb2 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/screen_space_reflection_filter.glsl @@ -0,0 +1,154 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(rgba16f, set = 0, binding = 0) uniform restrict readonly image2D source_ssr; +layout(r8, set = 0, binding = 1) uniform restrict readonly image2D source_radius; +layout(rgba8, set = 1, binding = 0) uniform restrict readonly image2D source_normal; + +layout(rgba16f, set = 2, binding = 0) uniform restrict writeonly image2D dest_ssr; +#ifndef VERTICAL_PASS +layout(r8, set = 2, binding = 1) uniform restrict writeonly image2D dest_radius; +#endif +layout(r32f, set = 3, binding = 0) uniform restrict readonly image2D source_depth; + +layout(push_constant, binding = 2, std430) uniform Params { + vec4 proj_info; + + bool orthogonal; + float edge_tolerance; + int increment; + uint pad; + + ivec2 screen_size; + bool vertical; + uint steps; +} +params; + +#define GAUSS_TABLE_SIZE 15 + +const float gauss_table[GAUSS_TABLE_SIZE + 1] = float[]( + 0.1847392078702266, + 0.16595854345772326, + 0.12031364177766891, + 0.07038755277896766, + 0.03322925565155569, + 0.012657819729901945, + 0.0038903040680094217, + 0.0009646503390864025, + 0.00019297087402915717, + 0.000031139936308099136, + 0.000004053309048174758, + 4.255228059965837e-7, + 3.602517634249573e-8, + 2.4592560765896795e-9, + 1.3534945386863618e-10, + 0.0 //one more for interpolation +); + +float gauss_weight(float p_val) { + float idxf; + float c = modf(max(0.0, p_val * float(GAUSS_TABLE_SIZE)), idxf); + int idx = int(idxf); + if (idx >= GAUSS_TABLE_SIZE + 1) { + return 0.0; + } + + return mix(gauss_table[idx], gauss_table[idx + 1], c); +} + +#define M_PI 3.14159265359 + +vec3 reconstructCSPosition(vec2 S, float z) { + if (params.orthogonal) { + return vec3((S.xy * params.proj_info.xy + params.proj_info.zw), z); + } else { + return vec3((S.xy * params.proj_info.xy + params.proj_info.zw) * z, z); + } +} + +void do_filter(inout vec4 accum, inout float accum_radius, inout float divisor, ivec2 texcoord, ivec2 increment, vec3 p_pos, vec3 normal, float p_limit_radius) { + for (int i = 1; i < params.steps; i++) { + float d = float(i * params.increment); + ivec2 tc = texcoord + increment * i; + float depth = imageLoad(source_depth, tc).r; + vec3 view_pos = reconstructCSPosition(vec2(tc) + 0.5, depth); + vec3 view_normal = normalize(imageLoad(source_normal, tc).rgb * 2.0 - 1.0); + view_normal.y = -view_normal.y; + + float r = imageLoad(source_radius, tc).r; + float radius = round(r * 255.0); + + float angle_n = 1.0 - abs(dot(normal, view_normal)); + if (angle_n > params.edge_tolerance) { + break; + } + + float angle = abs(dot(normal, normalize(view_pos - p_pos))); + + if (angle > params.edge_tolerance) { + break; + } + + if (d < radius) { + float w = gauss_weight(d / radius); + accum += imageLoad(source_ssr, tc) * w; +#ifndef VERTICAL_PASS + accum_radius += r * w; +#endif + divisor += w; + } + } +} + +void main() { + // Pixel being shaded + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + + if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing + return; + } + + float base_contrib = gauss_table[0]; + + vec4 accum = imageLoad(source_ssr, ssC); + + float accum_radius = imageLoad(source_radius, ssC).r; + float radius = accum_radius * 255.0; + + float divisor = gauss_table[0]; + accum *= divisor; + accum_radius *= divisor; +#ifdef VERTICAL_PASS + ivec2 direction = ivec2(0, params.increment); +#else + ivec2 direction = ivec2(params.increment, 0); +#endif + float depth = imageLoad(source_depth, ssC).r; + vec3 pos = reconstructCSPosition(vec2(ssC) + 0.5, depth); + vec3 normal = imageLoad(source_normal, ssC).xyz * 2.0 - 1.0; + normal = normalize(normal); + normal.y = -normal.y; + + do_filter(accum, accum_radius, divisor, ssC, direction, pos, normal, radius); + do_filter(accum, accum_radius, divisor, ssC, -direction, pos, normal, radius); + + if (divisor > 0.0) { + accum /= divisor; + accum_radius /= divisor; + } else { + accum = vec4(0.0); + accum_radius = 0.0; + } + + imageStore(dest_ssr, ssC, accum); + +#ifndef VERTICAL_PASS + imageStore(dest_radius, ssC, vec4(accum_radius)); +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/screen_space_reflection_scale.glsl b/servers/rendering/renderer_rd/shaders/screen_space_reflection_scale.glsl new file mode 100644 index 0000000000..218605a962 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/screen_space_reflection_scale.glsl @@ -0,0 +1,87 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform sampler2D source_ssr; +layout(set = 1, binding = 0) uniform sampler2D source_depth; +layout(set = 1, binding = 1) uniform sampler2D source_normal; +layout(rgba16f, set = 2, binding = 0) uniform restrict writeonly image2D dest_ssr; +layout(r32f, set = 3, binding = 0) uniform restrict writeonly image2D dest_depth; +layout(rgba8, set = 3, binding = 1) uniform restrict writeonly image2D dest_normal; + +layout(push_constant, binding = 1, std430) uniform Params { + ivec2 screen_size; + float camera_z_near; + float camera_z_far; + + bool orthogonal; + bool filtered; + uint pad[2]; +} +params; + +void main() { + // Pixel being shaded + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + + if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing + return; + } + //do not filter, SSR will generate arctifacts if this is done + + float divisor = 0.0; + vec4 color; + float depth; + vec3 normal; + + if (params.filtered) { + color = vec4(0.0); + depth = 0.0; + normal = vec3(0.0); + + for (int i = 0; i < 4; i++) { + ivec2 ofs = ssC << 1; + if (bool(i & 1)) { + ofs.x += 1; + } + if (bool(i & 2)) { + ofs.y += 1; + } + color += texelFetch(source_ssr, ofs, 0); + float d = texelFetch(source_depth, ofs, 0).r; + normal += texelFetch(source_normal, ofs, 0).xyz * 2.0 - 1.0; + + d = d * 2.0 - 1.0; + if (params.orthogonal) { + d = ((d + (params.camera_z_far + params.camera_z_near) / (params.camera_z_far - params.camera_z_near)) * (params.camera_z_far - params.camera_z_near)) / 2.0; + } else { + d = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - d * (params.camera_z_far - params.camera_z_near)); + } + depth += -d; + } + + color /= 4.0; + depth /= 4.0; + normal = normalize(normal / 4.0) * 0.5 + 0.5; + } else { + color = texelFetch(source_ssr, ssC << 1, 0); + depth = texelFetch(source_depth, ssC << 1, 0).r; + normal = texelFetch(source_normal, ssC << 1, 0).xyz; + + depth = depth * 2.0 - 1.0; + if (params.orthogonal) { + depth = ((depth + (params.camera_z_far + params.camera_z_near) / (params.camera_z_far - params.camera_z_near)) * (params.camera_z_far - params.camera_z_near)) / 2.0; + } else { + depth = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - depth * (params.camera_z_far - params.camera_z_near)); + } + depth = -depth; + } + + imageStore(dest_ssr, ssC, color); + imageStore(dest_depth, ssC, vec4(depth)); + imageStore(dest_normal, ssC, vec4(normal, 0.0)); +} diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl new file mode 100644 index 0000000000..e4c3f3a84b --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl @@ -0,0 +1,174 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#define MAX_CASCADES 8 + +layout(set = 0, binding = 1) uniform texture3D sdf_cascades[MAX_CASCADES]; +layout(set = 0, binding = 2) uniform texture3D light_cascades[MAX_CASCADES]; +layout(set = 0, binding = 3) uniform texture3D aniso0_cascades[MAX_CASCADES]; +layout(set = 0, binding = 4) uniform texture3D aniso1_cascades[MAX_CASCADES]; +layout(set = 0, binding = 5) uniform texture3D occlusion_texture; + +layout(set = 0, binding = 8) uniform sampler linear_sampler; + +struct CascadeData { + vec3 offset; //offset of (0,0,0) in world coordinates + float to_cell; // 1/bounds * grid_size + ivec3 probe_world_offset; + uint pad; +}; + +layout(set = 0, binding = 9, std140) uniform Cascades { + CascadeData data[MAX_CASCADES]; +} +cascades; + +layout(rgba16f, set = 0, binding = 10) uniform restrict writeonly image2D screen_buffer; + +layout(set = 0, binding = 11) uniform texture2DArray lightprobe_texture; + +layout(push_constant, binding = 0, std430) uniform Params { + vec3 grid_size; + uint max_cascades; + + ivec2 screen_size; + bool use_occlusion; + float y_mult; + + vec3 cam_extent; + int probe_axis_size; + + mat4 cam_transform; +} +params; + +vec3 linear_to_srgb(vec3 color) { + //if going to srgb, clamp from 0 to 1. + color = clamp(color, vec3(0.0), vec3(1.0)); + const vec3 a = vec3(0.055f); + return mix((vec3(1.0f) + a) * pow(color.rgb, vec3(1.0f / 2.4f)) - a, 12.92f * color.rgb, lessThan(color.rgb, vec3(0.0031308f))); +} + +vec2 octahedron_wrap(vec2 v) { + vec2 signVal; + signVal.x = v.x >= 0.0 ? 1.0 : -1.0; + signVal.y = v.y >= 0.0 ? 1.0 : -1.0; + return (1.0 - abs(v.yx)) * signVal; +} + +vec2 octahedron_encode(vec3 n) { + // https://twitter.com/Stubbesaurus/status/937994790553227264 + n /= (abs(n.x) + abs(n.y) + abs(n.z)); + n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy); + n.xy = n.xy * 0.5 + 0.5; + return n.xy; +} + +void main() { + // Pixel being shaded + ivec2 screen_pos = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(screen_pos, params.screen_size))) { //too large, do nothing + return; + } + + vec3 ray_pos; + vec3 ray_dir; + { + ray_pos = params.cam_transform[3].xyz; + + ray_dir.xy = params.cam_extent.xy * ((vec2(screen_pos) / vec2(params.screen_size)) * 2.0 - 1.0); + ray_dir.z = params.cam_extent.z; + + ray_dir = normalize(mat3(params.cam_transform) * ray_dir); + } + + ray_pos.y *= params.y_mult; + ray_dir.y *= params.y_mult; + ray_dir = normalize(ray_dir); + + vec3 pos_to_uvw = 1.0 / params.grid_size; + + vec3 light = vec3(0.0); + float blend = 0.0; + +#if 1 + // No interpolation + + vec3 inv_dir = 1.0 / ray_dir; + + float rough = 0.5; + bool hit = false; + + for (uint i = 0; i < params.max_cascades; i++) { + //convert to local bounds + vec3 pos = ray_pos - cascades.data[i].offset; + pos *= cascades.data[i].to_cell; + + // Should never happen for debug, since we start mostly at the bounds center, + // but add anyway. + //if (any(lessThan(pos,vec3(0.0))) || any(greaterThanEqual(pos,params.grid_size))) { + // continue; //already past bounds for this cascade, goto next + //} + + //find maximum advance distance (until reaching bounds) + vec3 t0 = -pos * inv_dir; + vec3 t1 = (params.grid_size - pos) * inv_dir; + vec3 tmax = max(t0, t1); + float max_advance = min(tmax.x, min(tmax.y, tmax.z)); + + float advance = 0.0; + vec3 uvw; + hit = false; + + while (advance < max_advance) { + //read how much to advance from SDF + uvw = (pos + ray_dir * advance) * pos_to_uvw; + + float distance = texture(sampler3D(sdf_cascades[i], linear_sampler), uvw).r * 255.0 - 1.7; + + if (distance < 0.001) { + //consider hit + hit = true; + break; + } + + advance += distance; + } + + if (!hit) { + pos += ray_dir * min(advance, max_advance); + pos /= cascades.data[i].to_cell; + pos += cascades.data[i].offset; + ray_pos = pos; + continue; + } + + //compute albedo, emission and normal at hit point + + const float EPSILON = 0.001; + vec3 hit_normal = normalize(vec3( + texture(sampler3D(sdf_cascades[i], linear_sampler), uvw + vec3(EPSILON, 0.0, 0.0)).r - texture(sampler3D(sdf_cascades[i], linear_sampler), uvw - vec3(EPSILON, 0.0, 0.0)).r, + texture(sampler3D(sdf_cascades[i], linear_sampler), uvw + vec3(0.0, EPSILON, 0.0)).r - texture(sampler3D(sdf_cascades[i], linear_sampler), uvw - vec3(0.0, EPSILON, 0.0)).r, + texture(sampler3D(sdf_cascades[i], linear_sampler), uvw + vec3(0.0, 0.0, EPSILON)).r - texture(sampler3D(sdf_cascades[i], linear_sampler), uvw - vec3(0.0, 0.0, EPSILON)).r)); + + vec3 hit_light = texture(sampler3D(light_cascades[i], linear_sampler), uvw).rgb; + vec4 aniso0 = texture(sampler3D(aniso0_cascades[i], linear_sampler), uvw); + vec3 hit_aniso0 = aniso0.rgb; + vec3 hit_aniso1 = vec3(aniso0.a, texture(sampler3D(aniso1_cascades[i], linear_sampler), uvw).rg); + + hit_light *= (dot(max(vec3(0.0), (hit_normal * hit_aniso0)), vec3(1.0)) + dot(max(vec3(0.0), (-hit_normal * hit_aniso1)), vec3(1.0))); + + light = hit_light; + + break; + } + +#endif + + imageStore(screen_buffer, screen_pos, vec4(linear_to_srgb(light), 1.0)); +} diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_debug_probes.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_debug_probes.glsl new file mode 100644 index 0000000000..08da283dad --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/sdfgi_debug_probes.glsl @@ -0,0 +1,231 @@ +#[vertex] + +#version 450 + +VERSION_DEFINES + +#define MAX_CASCADES 8 + +layout(push_constant, binding = 0, std430) uniform Params { + mat4 projection; + + uint band_power; + uint sections_in_band; + uint band_mask; + float section_arc; + + vec3 grid_size; + uint cascade; + + uint pad; + float y_mult; + uint probe_debug_index; + int probe_axis_size; +} +params; + +// http://in4k.untergrund.net/html_articles/hugi_27_-_coding_corner_polaris_sphere_tessellation_101.htm + +vec3 get_sphere_vertex(uint p_vertex_id) { + float x_angle = float(p_vertex_id & 1u) + (p_vertex_id >> params.band_power); + + float y_angle = + float((p_vertex_id & params.band_mask) >> 1) + ((p_vertex_id >> params.band_power) * params.sections_in_band); + + x_angle *= params.section_arc * 0.5f; // remember - 180AA x rot not 360 + y_angle *= -params.section_arc; + + vec3 point = vec3(sin(x_angle) * sin(y_angle), cos(x_angle), sin(x_angle) * cos(y_angle)); + + return point; +} + +#ifdef MODE_PROBES + +layout(location = 0) out vec3 normal_interp; +layout(location = 1) out flat uint probe_index; + +#endif + +#ifdef MODE_VISIBILITY + +layout(location = 0) out float visibility; + +#endif + +struct CascadeData { + vec3 offset; //offset of (0,0,0) in world coordinates + float to_cell; // 1/bounds * grid_size + ivec3 probe_world_offset; + uint pad; +}; + +layout(set = 0, binding = 1, std140) uniform Cascades { + CascadeData data[MAX_CASCADES]; +} +cascades; + +layout(set = 0, binding = 4) uniform texture3D occlusion_texture; +layout(set = 0, binding = 3) uniform sampler linear_sampler; + +void main() { +#ifdef MODE_PROBES + probe_index = gl_InstanceIndex; + + normal_interp = get_sphere_vertex(gl_VertexIndex); + + vec3 vertex = normal_interp * 0.2; + + float probe_cell_size = float(params.grid_size / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; + + ivec3 probe_cell; + probe_cell.x = int(probe_index % params.probe_axis_size); + probe_cell.y = int(probe_index / (params.probe_axis_size * params.probe_axis_size)); + probe_cell.z = int((probe_index / params.probe_axis_size) % params.probe_axis_size); + + vertex += (cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size) / vec3(1.0, params.y_mult, 1.0); + + gl_Position = params.projection * vec4(vertex, 1.0); +#endif + +#ifdef MODE_VISIBILITY + + int probe_index = int(params.probe_debug_index); + + vec3 vertex = get_sphere_vertex(gl_VertexIndex) * 0.01; + + float probe_cell_size = float(params.grid_size / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; + + ivec3 probe_cell; + probe_cell.x = int(probe_index % params.probe_axis_size); + probe_cell.y = int((probe_index % (params.probe_axis_size * params.probe_axis_size)) / params.probe_axis_size); + probe_cell.z = int(probe_index / (params.probe_axis_size * params.probe_axis_size)); + + vertex += (cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size) / vec3(1.0, params.y_mult, 1.0); + + int probe_voxels = int(params.grid_size.x) / int(params.probe_axis_size - 1); + int occluder_index = int(gl_InstanceIndex); + + int diameter = probe_voxels * 2; + ivec3 occluder_pos; + occluder_pos.x = int(occluder_index % diameter); + occluder_pos.y = int(occluder_index / (diameter * diameter)); + occluder_pos.z = int((occluder_index / diameter) % diameter); + + float cell_size = 1.0 / cascades.data[params.cascade].to_cell; + + ivec3 occluder_offset = occluder_pos - ivec3(diameter / 2); + vertex += ((vec3(occluder_offset) + vec3(0.5)) * cell_size) / vec3(1.0, params.y_mult, 1.0); + + ivec3 global_cell = probe_cell + cascades.data[params.cascade].probe_world_offset; + uint occlusion_layer = 0; + if ((global_cell.x & 1) != 0) { + occlusion_layer |= 1; + } + if ((global_cell.y & 1) != 0) { + occlusion_layer |= 2; + } + if ((global_cell.z & 1) != 0) { + occlusion_layer |= 4; + } + ivec3 tex_pos = probe_cell * probe_voxels + occluder_offset; + + const vec4 layer_axis[4] = vec4[]( + vec4(1, 0, 0, 0), + vec4(0, 1, 0, 0), + vec4(0, 0, 1, 0), + vec4(0, 0, 0, 1)); + + tex_pos.z += int(params.cascade) * int(params.grid_size); + if (occlusion_layer >= 4) { + tex_pos.x += int(params.grid_size.x); + occlusion_layer &= 3; + } + + visibility = dot(texelFetch(sampler3D(occlusion_texture, linear_sampler), tex_pos, 0), layer_axis[occlusion_layer]); + + gl_Position = params.projection * vec4(vertex, 1.0); + +#endif +} + +#[fragment] + +#version 450 + +VERSION_DEFINES + +layout(location = 0) out vec4 frag_color; + +layout(set = 0, binding = 2) uniform texture2DArray lightprobe_texture; +layout(set = 0, binding = 3) uniform sampler linear_sampler; + +layout(push_constant, binding = 0, std430) uniform Params { + mat4 projection; + + uint band_power; + uint sections_in_band; + uint band_mask; + float section_arc; + + vec3 grid_size; + uint cascade; + + uint pad; + float y_mult; + uint probe_debug_index; + int probe_axis_size; +} +params; + +#ifdef MODE_PROBES + +layout(location = 0) in vec3 normal_interp; +layout(location = 1) in flat uint probe_index; + +#endif + +#ifdef MODE_VISIBILITY +layout(location = 0) in float visibility; +#endif + +vec2 octahedron_wrap(vec2 v) { + vec2 signVal; + signVal.x = v.x >= 0.0 ? 1.0 : -1.0; + signVal.y = v.y >= 0.0 ? 1.0 : -1.0; + return (1.0 - abs(v.yx)) * signVal; +} + +vec2 octahedron_encode(vec3 n) { + // https://twitter.com/Stubbesaurus/status/937994790553227264 + n /= (abs(n.x) + abs(n.y) + abs(n.z)); + n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy); + n.xy = n.xy * 0.5 + 0.5; + return n.xy; +} + +void main() { +#ifdef MODE_PROBES + + ivec3 tex_pos; + tex_pos.x = int(probe_index) % params.probe_axis_size; //x + tex_pos.y = int(probe_index) / (params.probe_axis_size * params.probe_axis_size); + tex_pos.x += params.probe_axis_size * ((int(probe_index) / params.probe_axis_size) % params.probe_axis_size); //z + tex_pos.z = int(params.cascade); + + vec3 tex_pos_ofs = vec3(octahedron_encode(normal_interp) * float(OCT_SIZE), 0.0); + vec3 tex_posf = vec3(vec2(tex_pos.xy * (OCT_SIZE + 2) + ivec2(1)), float(tex_pos.z)) + tex_pos_ofs; + + tex_posf.xy /= vec2(ivec2(params.probe_axis_size * params.probe_axis_size * (OCT_SIZE + 2), params.probe_axis_size * (OCT_SIZE + 2))); + + vec4 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), tex_posf, 0.0); + + frag_color = indirect_light; + +#endif + +#ifdef MODE_VISIBILITY + + frag_color = vec4(vec3(1, visibility, visibility), 1.0); +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl new file mode 100644 index 0000000000..dc7238abed --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl @@ -0,0 +1,508 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +#define MAX_CASCADES 8 + +layout(set = 0, binding = 1) uniform texture3D sdf_cascades[MAX_CASCADES]; +layout(set = 0, binding = 2) uniform sampler linear_sampler; + +layout(set = 0, binding = 3, std430) restrict readonly buffer DispatchData { + uint x; + uint y; + uint z; + uint total_count; +} +dispatch_data; + +struct ProcessVoxel { + uint position; //xyz 7 bit packed, extra 11 bits for neigbours + uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neibhbours + uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbours + uint light_aniso; //55555 light anisotropy, extra 2 bits for neighbours + //total neighbours: 26 +}; + +#ifdef MODE_PROCESS_STATIC +layout(set = 0, binding = 4, std430) restrict buffer ProcessVoxels { +#else +layout(set = 0, binding = 4, std430) restrict buffer readonly ProcessVoxels { +#endif + ProcessVoxel data[]; +} +process_voxels; + +layout(r32ui, set = 0, binding = 5) uniform restrict uimage3D dst_light; +layout(rgba8, set = 0, binding = 6) uniform restrict image3D dst_aniso0; +layout(rg8, set = 0, binding = 7) uniform restrict image3D dst_aniso1; + +struct CascadeData { + vec3 offset; //offset of (0,0,0) in world coordinates + float to_cell; // 1/bounds * grid_size + ivec3 probe_world_offset; + uint pad; +}; + +layout(set = 0, binding = 8, std140) uniform Cascades { + CascadeData data[MAX_CASCADES]; +} +cascades; + +#define LIGHT_TYPE_DIRECTIONAL 0 +#define LIGHT_TYPE_OMNI 1 +#define LIGHT_TYPE_SPOT 2 + +struct Light { + vec3 color; + float energy; + + vec3 direction; + bool has_shadow; + + vec3 position; + float attenuation; + + uint type; + float cos_spot_angle; + float inv_spot_attenuation; + float radius; + + vec4 shadow_color; +}; + +layout(set = 0, binding = 9, std140) buffer restrict readonly Lights { + Light data[]; +} +lights; + +layout(set = 0, binding = 10) uniform texture2DArray lightprobe_texture; +layout(set = 0, binding = 11) uniform texture3D occlusion_texture; + +layout(push_constant, binding = 0, std430) uniform Params { + vec3 grid_size; + uint max_cascades; + + uint cascade; + uint light_count; + uint process_offset; + uint process_increment; + + int probe_axis_size; + float bounce_feedback; + float y_mult; + bool use_occlusion; +} +params; + +vec2 octahedron_wrap(vec2 v) { + vec2 signVal; + signVal.x = v.x >= 0.0 ? 1.0 : -1.0; + signVal.y = v.y >= 0.0 ? 1.0 : -1.0; + return (1.0 - abs(v.yx)) * signVal; +} + +vec2 octahedron_encode(vec3 n) { + // https://twitter.com/Stubbesaurus/status/937994790553227264 + n /= (abs(n.x) + abs(n.y) + abs(n.z)); + n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy); + n.xy = n.xy * 0.5 + 0.5; + return n.xy; +} + +float get_omni_attenuation(float distance, float inv_range, float decay) { + float nd = distance * inv_range; + nd *= nd; + nd *= nd; // nd^4 + nd = max(1.0 - nd, 0.0); + nd *= nd; // nd^2 + return nd * pow(max(distance, 0.0001), -decay); +} + +void main() { + uint voxel_index = uint(gl_GlobalInvocationID.x); + + //used for skipping voxels every N frames + if (params.process_increment > 1) { + voxel_index *= params.process_increment; + voxel_index += params.process_offset; + } + + if (voxel_index >= dispatch_data.total_count) { + return; + } + + uint voxel_position = process_voxels.data[voxel_index].position; + + //keep for storing to texture + ivec3 positioni = ivec3((uvec3(voxel_position, voxel_position, voxel_position) >> uvec3(0, 7, 14)) & uvec3(0x7F)); + + vec3 position = vec3(positioni) + vec3(0.5); + position /= cascades.data[params.cascade].to_cell; + position += cascades.data[params.cascade].offset; + + uint voxel_albedo = process_voxels.data[voxel_index].albedo; + + vec3 albedo = vec3(uvec3(voxel_albedo >> 10, voxel_albedo >> 5, voxel_albedo) & uvec3(0x1F)) / float(0x1F); + vec3 light_accum[6] = vec3[](vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0)); + uint valid_aniso = (voxel_albedo >> 15) & 0x3F; + + const vec3 aniso_dir[6] = vec3[]( + vec3(1, 0, 0), + vec3(0, 1, 0), + vec3(0, 0, 1), + vec3(-1, 0, 0), + vec3(0, -1, 0), + vec3(0, 0, -1)); + + // Add indirect light first, in order to save computation resources +#ifdef MODE_PROCESS_DYNAMIC + if (params.bounce_feedback > 0.001) { + vec3 feedback = (params.bounce_feedback < 1.0) ? (albedo * params.bounce_feedback) : mix(albedo, vec3(1.0), params.bounce_feedback - 1.0); + vec3 pos = (vec3(positioni) + vec3(0.5)) * float(params.probe_axis_size - 1) / params.grid_size; + ivec3 probe_base_pos = ivec3(pos); + + float weight_accum[6] = float[](0, 0, 0, 0, 0, 0); + + ivec3 tex_pos = ivec3(probe_base_pos.xy, int(params.cascade)); + tex_pos.x += probe_base_pos.z * int(params.probe_axis_size); + + tex_pos.xy = tex_pos.xy * (OCT_SIZE + 2) + ivec2(1); + + vec3 base_tex_posf = vec3(tex_pos); + vec2 tex_pixel_size = 1.0 / vec2(ivec2((OCT_SIZE + 2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE + 2) * params.probe_axis_size)); + vec3 probe_uv_offset = vec3(ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx; + + for (uint j = 0; j < 8; j++) { + ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); + ivec3 probe_posi = probe_base_pos; + probe_posi += offset; + + // Compute weight + + vec3 probe_pos = vec3(probe_posi); + vec3 probe_to_pos = pos - probe_pos; + vec3 probe_dir = normalize(-probe_to_pos); + + // Compute lightprobe texture position + + vec3 trilinear = vec3(1.0) - abs(probe_to_pos); + + for (uint k = 0; k < 6; k++) { + if (bool(valid_aniso & (1 << k))) { + vec3 n = aniso_dir[k]; + float weight = trilinear.x * trilinear.y * trilinear.z * max(0, dot(n, probe_dir)); + + if (weight > 0.0 && params.use_occlusion) { + ivec3 occ_indexv = abs((cascades.data[params.cascade].probe_world_offset + probe_posi) & ivec3(1, 1, 1)) * ivec3(1, 2, 4); + vec4 occ_mask = mix(vec4(0.0), vec4(1.0), equal(ivec4(occ_indexv.x | occ_indexv.y), ivec4(0, 1, 2, 3))); + + vec3 occ_pos = (vec3(positioni) + aniso_dir[k] + vec3(0.5)) / params.grid_size; + occ_pos.z += float(params.cascade); + if (occ_indexv.z != 0) { //z bit is on, means index is >=4, so make it switch to the other half of textures + occ_pos.x += 1.0; + } + occ_pos *= vec3(0.5, 1.0, 1.0 / float(params.max_cascades)); //renormalize + float occlusion = dot(textureLod(sampler3D(occlusion_texture, linear_sampler), occ_pos, 0.0), occ_mask); + + weight *= occlusion; + } + + if (weight > 0.0) { + vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0); + tex_posf.xy *= tex_pixel_size; + + vec3 pos_uvw = tex_posf; + pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy; + pos_uvw.x += float(offset.z) * probe_uv_offset.z; + vec3 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb; + + light_accum[k] += indirect_light * weight; + weight_accum[k] += weight; + } + } + } + } + + for (uint k = 0; k < 6; k++) { + if (weight_accum[k] > 0.0) { + light_accum[k] /= weight_accum[k]; + light_accum[k] *= feedback; + } + } + } + +#endif + + { + uint rgbe = process_voxels.data[voxel_index].light; + + //read rgbe8985 + float r = float((rgbe & 0xff) << 1); + float g = float((rgbe >> 8) & 0x1ff); + float b = float(((rgbe >> 17) & 0xff) << 1); + float e = float((rgbe >> 25) & 0x1F); + float m = pow(2.0, e - 15.0 - 9.0); + + vec3 l = vec3(r, g, b) * m; + + uint aniso = process_voxels.data[voxel_index].light_aniso; + for (uint i = 0; i < 6; i++) { + float strength = ((aniso >> (i * 5)) & 0x1F) / float(0x1F); + light_accum[i] += l * strength; + } + } + + // Raytrace light + + vec3 pos_to_uvw = 1.0 / params.grid_size; + vec3 uvw_ofs = pos_to_uvw * 0.5; + + for (uint i = 0; i < params.light_count; i++) { + float attenuation = 1.0; + vec3 direction; + float light_distance = 1e20; + + switch (lights.data[i].type) { + case LIGHT_TYPE_DIRECTIONAL: { + direction = -lights.data[i].direction; + } break; + case LIGHT_TYPE_OMNI: { + vec3 rel_vec = lights.data[i].position - position; + direction = normalize(rel_vec); + light_distance = length(rel_vec); + rel_vec.y /= params.y_mult; + attenuation = get_omni_attenuation(light_distance, 1.0 / lights.data[i].radius, lights.data[i].attenuation); + + } break; + case LIGHT_TYPE_SPOT: { + vec3 rel_vec = lights.data[i].position - position; + direction = normalize(rel_vec); + light_distance = length(rel_vec); + rel_vec.y /= params.y_mult; + attenuation = get_omni_attenuation(light_distance, 1.0 / lights.data[i].radius, lights.data[i].attenuation); + + float cos_spot_angle = lights.data[i].cos_spot_angle; + float cos_angle = dot(-direction, lights.data[i].direction); + + if (cos_angle < cos_spot_angle) { + continue; + } + + float scos = max(cos_angle, cos_spot_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cos_spot_angle)); + attenuation *= 1.0 - pow(spot_rim, lights.data[i].inv_spot_attenuation); + } break; + } + + if (attenuation < 0.001) { + continue; + } + + bool hit = false; + + vec3 ray_pos = position; + vec3 ray_dir = direction; + vec3 inv_dir = 1.0 / ray_dir; + + //this is how to properly bias outgoing rays + float cell_size = 1.0 / cascades.data[params.cascade].to_cell; + ray_pos += sign(direction) * cell_size * 0.48; // go almost to the box edge but remain inside + ray_pos += ray_dir * 0.4 * cell_size; //apply a small bias from there + + for (uint j = params.cascade; j < params.max_cascades; j++) { + //convert to local bounds + vec3 pos = ray_pos - cascades.data[j].offset; + pos *= cascades.data[j].to_cell; + float local_distance = light_distance * cascades.data[j].to_cell; + + if (any(lessThan(pos, vec3(0.0))) || any(greaterThanEqual(pos, params.grid_size))) { + continue; //already past bounds for this cascade, goto next + } + + //find maximum advance distance (until reaching bounds) + vec3 t0 = -pos * inv_dir; + vec3 t1 = (params.grid_size - pos) * inv_dir; + vec3 tmax = max(t0, t1); + float max_advance = min(tmax.x, min(tmax.y, tmax.z)); + + max_advance = min(local_distance, max_advance); + + float advance = 0.0; + float occlusion = 1.0; + + while (advance < max_advance) { + //read how much to advance from SDF + vec3 uvw = (pos + ray_dir * advance) * pos_to_uvw; + + float distance = texture(sampler3D(sdf_cascades[j], linear_sampler), uvw).r * 255.0 - 1.0; + if (distance < 0.001) { + //consider hit + hit = true; + break; + } + + occlusion = min(occlusion, distance); + + advance += distance; + } + + if (hit) { + attenuation *= occlusion; + break; + } + + if (advance >= local_distance) { + break; //past light distance, abandon search + } + //change ray origin to collision with bounds + pos += ray_dir * max_advance; + pos /= cascades.data[j].to_cell; + pos += cascades.data[j].offset; + light_distance -= max_advance / cascades.data[j].to_cell; + ray_pos = pos; + } + + if (!hit) { + vec3 light = albedo * lights.data[i].color.rgb * lights.data[i].energy * attenuation; + + for (int j = 0; j < 6; j++) { + if (bool(valid_aniso & (1 << j))) { + light_accum[j] += max(0.0, dot(aniso_dir[j], direction)) * light; + } + } + } + } + + // Store the light in the light texture + + float lumas[6]; + vec3 light_total = vec3(0); + + for (int i = 0; i < 6; i++) { + light_total += light_accum[i]; + lumas[i] = max(light_accum[i].r, max(light_accum[i].g, light_accum[i].b)); + } + + float luma_total = max(light_total.r, max(light_total.g, light_total.b)); + + uint light_total_rgbe; + + { + //compress to RGBE9995 to save space + + const float pow2to9 = 512.0f; + const float B = 15.0f; + const float N = 9.0f; + const float LN2 = 0.6931471805599453094172321215; + + float cRed = clamp(light_total.r, 0.0, 65408.0); + float cGreen = clamp(light_total.g, 0.0, 65408.0); + float cBlue = clamp(light_total.b, 0.0, 65408.0); + + float cMax = max(cRed, max(cGreen, cBlue)); + + float expp = max(-B - 1.0f, floor(log(cMax) / LN2)) + 1.0f + B; + + float sMax = floor((cMax / pow(2.0f, expp - B - N)) + 0.5f); + + float exps = expp + 1.0f; + + if (0.0 <= sMax && sMax < pow2to9) { + exps = expp; + } + + float sRed = floor((cRed / pow(2.0f, exps - B - N)) + 0.5f); + float sGreen = floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f); + float sBlue = floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f); +#ifdef MODE_PROCESS_STATIC + //since its self-save, use RGBE8985 + light_total_rgbe = ((uint(sRed) & 0x1FF) >> 1) | ((uint(sGreen) & 0x1FF) << 8) | (((uint(sBlue) & 0x1FF) >> 1) << 17) | ((uint(exps) & 0x1F) << 25); + +#else + light_total_rgbe = (uint(sRed) & 0x1FF) | ((uint(sGreen) & 0x1FF) << 9) | ((uint(sBlue) & 0x1FF) << 18) | ((uint(exps) & 0x1F) << 27); +#endif + } + +#ifdef MODE_PROCESS_DYNAMIC + + vec4 aniso0; + aniso0.r = lumas[0] / luma_total; + aniso0.g = lumas[1] / luma_total; + aniso0.b = lumas[2] / luma_total; + aniso0.a = lumas[3] / luma_total; + + vec2 aniso1; + aniso1.r = lumas[4] / luma_total; + aniso1.g = lumas[5] / luma_total; + + //save to 3D textures + imageStore(dst_aniso0, positioni, aniso0); + imageStore(dst_aniso1, positioni, vec4(aniso1, 0.0, 0.0)); + imageStore(dst_light, positioni, uvec4(light_total_rgbe)); + + //also fill neighbours, so light interpolation during the indirect pass works + + //recover the neighbour list from the leftover bits + uint neighbours = (voxel_albedo >> 21) | ((voxel_position >> 21) << 11) | ((process_voxels.data[voxel_index].light >> 30) << 22) | ((process_voxels.data[voxel_index].light_aniso >> 30) << 24); + + const uint max_neighbours = 26; + const ivec3 neighbour_positions[max_neighbours] = ivec3[]( + ivec3(-1, -1, -1), + ivec3(-1, -1, 0), + ivec3(-1, -1, 1), + ivec3(-1, 0, -1), + ivec3(-1, 0, 0), + ivec3(-1, 0, 1), + ivec3(-1, 1, -1), + ivec3(-1, 1, 0), + ivec3(-1, 1, 1), + ivec3(0, -1, -1), + ivec3(0, -1, 0), + ivec3(0, -1, 1), + ivec3(0, 0, -1), + ivec3(0, 0, 1), + ivec3(0, 1, -1), + ivec3(0, 1, 0), + ivec3(0, 1, 1), + ivec3(1, -1, -1), + ivec3(1, -1, 0), + ivec3(1, -1, 1), + ivec3(1, 0, -1), + ivec3(1, 0, 0), + ivec3(1, 0, 1), + ivec3(1, 1, -1), + ivec3(1, 1, 0), + ivec3(1, 1, 1)); + + for (uint i = 0; i < max_neighbours; i++) { + if (bool(neighbours & (1 << i))) { + ivec3 neighbour_pos = positioni + neighbour_positions[i]; + imageStore(dst_light, neighbour_pos, uvec4(light_total_rgbe)); + imageStore(dst_aniso0, neighbour_pos, aniso0); + imageStore(dst_aniso1, neighbour_pos, vec4(aniso1, 0.0, 0.0)); + } + } + +#endif + +#ifdef MODE_PROCESS_STATIC + + //save back the anisotropic + + uint light = process_voxels.data[voxel_index].light & (3 << 30); + light |= light_total_rgbe; + process_voxels.data[voxel_index].light = light; //replace + + uint light_aniso = process_voxels.data[voxel_index].light_aniso & (3 << 30); + for (int i = 0; i < 6; i++) { + light_aniso |= min(31, uint((lumas[i] / luma_total) * 31.0)) << (i * 5); + } + + process_voxels.data[voxel_index].light_aniso = light_aniso; + +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_fields.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_fields.glsl new file mode 100644 index 0000000000..69d8824d8a --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/sdfgi_fields.glsl @@ -0,0 +1,182 @@ +/* clang-format off */ +[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = OCT_RES, local_size_y = OCT_RES, local_size_z = 1) in; + +/* clang-format on */ + +#define MAX_CASCADES 8 + +layout(rgba16f, set = 0, binding = 1) uniform restrict image2DArray irradiance_texture; +layout(rg16f, set = 0, binding = 2) uniform restrict image2DArray depth_texture; + +layout(rgba32ui, set = 0, binding = 3) uniform restrict uimage2DArray irradiance_history_texture; +layout(rg32ui, set = 0, binding = 4) uniform restrict uimage2DArray depth_history_texture; + +struct CascadeData { + vec3 offset; //offset of (0,0,0) in world coordinates + float to_cell; // 1/bounds * grid_size +}; + +layout(set = 0, binding = 5, std140) uniform Cascades { + CascadeData data[MAX_CASCADES]; +} +cascades; + +#define DEPTH_HISTORY_BITS 24 +#define IRRADIANCE_HISTORY_BITS 16 + +layout(push_constant, binding = 0, std430) uniform Params { + vec3 grid_size; + uint max_cascades; + + uint probe_axis_size; + uint cascade; + uint history_size; + uint pad0; + + ivec3 scroll; //scroll in probes + uint pad1; +} +params; + +void main() { + ivec2 local = ivec2(gl_LocalInvocationID.xy); + ivec2 probe = ivec2(gl_WorkGroupID.xy); + + ivec3 probe_cell; + probe_cell.x = probe.x % int(params.probe_axis_size); + probe_cell.y = probe.y; + probe_cell.z = probe.x / int(params.probe_axis_size); + +#ifdef MODE_SCROLL_BEGIN + + ivec3 read_cell = probe_cell - params.scroll; + + uint src_layer = (params.history_size + 1) * params.cascade; + uint dst_layer = (params.history_size + 1) * params.max_cascades; + + for (uint i = 0; i <= params.history_size; i++) { + ivec3 write_pos = ivec3(probe * OCT_RES + local, int(i)); + + if (any(lessThan(read_pos, ivec3(0))) || any(greaterThanEqual(read_pos, ivec3(params.probe_axis_size)))) { + // nowhere to read from for scrolling, try finding the value from upper probes + +#ifdef MODE_IRRADIANCE + imageStore(irradiance_history_texture, write_pos, uvec4(0)); +#endif +#ifdef MODE_DEPTH + imageStore(depth_history_texture, write_pos, uvec4(0)); +#endif + } else { + ivec3 read_pos; + read_pos.xy = read_cell.xy; + read_pos.x += read_cell.z * params.probe_axis_size; + read_pos.xy = read_pos.xy * OCT_RES + local; + read_pos.z = int(i); + +#ifdef MODE_IRRADIANCE + uvec4 value = imageLoad(irradiance_history_texture, read_pos); + imageStore(irradiance_history_texture, write_pos, value); +#endif +#ifdef MODE_DEPTH + uvec2 value = imageLoad(depth_history_texture, read_pos); + imageStore(depth_history_texture, write_pos, value); +#endif + } + } + +#endif // MODE_SCROLL_BEGIN + +#ifdef MODE_SCROLL_END + + uint src_layer = (params.history_size + 1) * params.max_cascades; + uint dst_layer = (params.history_size + 1) * params.cascade; + + for (uint i = 0; i <= params.history_size; i++) { + ivec3 pos = ivec3(probe * OCT_RES + local, int(i)); + +#ifdef MODE_IRRADIANCE + uvec4 value = imageLoad(irradiance_history_texture, read_pos); + imageStore(irradiance_history_texture, write_pos, value); +#endif +#ifdef MODE_DEPTH + uvec2 value = imageLoad(depth_history_texture, read_pos); + imageStore(depth_history_texture, write_pos, value); +#endif + } + +#endif //MODE_SCROLL_END + +#ifdef MODE_STORE + + uint src_layer = (params.history_size + 1) * params.cascade + params.history_size; + ivec3 read_pos = ivec3(probe * OCT_RES + local, int(src_layer)); + + ivec3 write_pos = ivec3(probe * (OCT_RES + 2) + ivec2(1), int(params.cascade)); + + ivec3 copy_to[4] = ivec3[](write_pos, ivec3(-2, -2, -2), ivec3(-2, -2, -2), ivec3(-2, -2, -2)); + +#ifdef MODE_IRRADIANCE + uvec4 average = imageLoad(irradiance_history_texture, read_pos); + vec4 light_accum = vec4(average / params.history_size) / float(1 << IRRADIANCE_HISTORY_BITS); + +#endif +#ifdef MODE_DEPTH + uvec2 value = imageLoad(depth_history_texture, read_pos); + vec2 depth_accum = vec4(average / params.history_size) / float(1 << IRRADIANCE_HISTORY_BITS); + + float probe_cell_size = float(params.grid_size / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; + float max_depth = length(params.grid_size / cascades.data[params.max_cascades - 1].to_cell); + max_depth /= probe_cell_size; + + depth_value = (vec2(average / params.history_size) / float(1 << DEPTH_HISTORY_BITS)) * vec2(max_depth, max_depth * max_depth); + +#endif + + /* Fill the border if required */ + + if (local == ivec2(0, 0)) { + copy_to[1] = texture_pos + ivec3(OCT_RES - 1, -1, 0); + copy_to[2] = texture_pos + ivec3(-1, OCT_RES - 1, 0); + copy_to[3] = texture_pos + ivec3(OCT_RES, OCT_RES, 0); + } else if (local == ivec2(OCT_RES - 1, 0)) { + copy_to[1] = texture_pos + ivec3(0, -1, 0); + copy_to[2] = texture_pos + ivec3(OCT_RES, OCT_RES - 1, 0); + copy_to[3] = texture_pos + ivec3(-1, OCT_RES, 0); + } else if (local == ivec2(0, OCT_RES - 1)) { + copy_to[1] = texture_pos + ivec3(-1, 0, 0); + copy_to[2] = texture_pos + ivec3(OCT_RES - 1, OCT_RES, 0); + copy_to[3] = texture_pos + ivec3(OCT_RES, -1, 0); + } else if (local == ivec2(OCT_RES - 1, OCT_RES - 1)) { + copy_to[1] = texture_pos + ivec3(0, OCT_RES, 0); + copy_to[2] = texture_pos + ivec3(OCT_RES, 0, 0); + copy_to[3] = texture_pos + ivec3(-1, -1, 0); + } else if (local.y == 0) { + copy_to[1] = texture_pos + ivec3(OCT_RES - local.x - 1, local.y - 1, 0); + } else if (local.x == 0) { + copy_to[1] = texture_pos + ivec3(local.x - 1, OCT_RES - local.y - 1, 0); + } else if (local.y == OCT_RES - 1) { + copy_to[1] = texture_pos + ivec3(OCT_RES - local.x - 1, local.y + 1, 0); + } else if (local.x == OCT_RES - 1) { + copy_to[1] = texture_pos + ivec3(local.x + 1, OCT_RES - local.y - 1, 0); + } + + for (int i = 0; i < 4; i++) { + if (copy_to[i] == ivec3(-2, -2, -2)) { + continue; + } +#ifdef MODE_IRRADIANCE + imageStore(irradiance_texture, copy_to[i], light_accum); +#endif +#ifdef MODE_DEPTH + imageStore(depth_texture, copy_to[i], vec4(depth_value, 0.0, 0.0)); +#endif + } + +#endif // MODE_STORE +} diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl new file mode 100644 index 0000000000..007e4c113a --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl @@ -0,0 +1,612 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#define MAX_CASCADES 8 + +layout(set = 0, binding = 1) uniform texture3D sdf_cascades[MAX_CASCADES]; +layout(set = 0, binding = 2) uniform texture3D light_cascades[MAX_CASCADES]; +layout(set = 0, binding = 3) uniform texture3D aniso0_cascades[MAX_CASCADES]; +layout(set = 0, binding = 4) uniform texture3D aniso1_cascades[MAX_CASCADES]; + +layout(set = 0, binding = 6) uniform sampler linear_sampler; + +struct CascadeData { + vec3 offset; //offset of (0,0,0) in world coordinates + float to_cell; // 1/bounds * grid_size + ivec3 probe_world_offset; + uint pad; +}; + +layout(set = 0, binding = 7, std140) uniform Cascades { + CascadeData data[MAX_CASCADES]; +} +cascades; + +layout(r32ui, set = 0, binding = 8) uniform restrict uimage2DArray lightprobe_texture_data; +layout(rgba16i, set = 0, binding = 9) uniform restrict iimage2DArray lightprobe_history_texture; +layout(rgba32i, set = 0, binding = 10) uniform restrict iimage2D lightprobe_average_texture; + +//used for scrolling +layout(rgba16i, set = 0, binding = 11) uniform restrict iimage2DArray lightprobe_history_scroll_texture; +layout(rgba32i, set = 0, binding = 12) uniform restrict iimage2D lightprobe_average_scroll_texture; + +layout(rgba32i, set = 0, binding = 13) uniform restrict iimage2D lightprobe_average_parent_texture; + +layout(rgba16f, set = 0, binding = 14) uniform restrict writeonly image2DArray lightprobe_ambient_texture; + +#ifdef USE_CUBEMAP_ARRAY +layout(set = 1, binding = 0) uniform textureCubeArray sky_irradiance; +#else +layout(set = 1, binding = 0) uniform textureCube sky_irradiance; +#endif +layout(set = 1, binding = 1) uniform sampler linear_sampler_mipmaps; + +#define HISTORY_BITS 10 + +#define SKY_MODE_DISABLED 0 +#define SKY_MODE_COLOR 1 +#define SKY_MODE_SKY 2 + +layout(push_constant, binding = 0, std430) uniform Params { + vec3 grid_size; + uint max_cascades; + + uint probe_axis_size; + uint cascade; + uint history_index; + uint history_size; + + uint ray_count; + float ray_bias; + ivec2 image_size; + + ivec3 world_offset; + uint sky_mode; + + ivec3 scroll; + float sky_energy; + + vec3 sky_color; + float y_mult; + + bool store_ambient_texture; + uint pad[3]; +} +params; + +const float PI = 3.14159265f; +const float GOLDEN_ANGLE = PI * (3.0 - sqrt(5.0)); + +vec3 vogel_hemisphere(uint p_index, uint p_count, float p_offset) { + float r = sqrt(float(p_index) + 0.5f) / sqrt(float(p_count)); + float theta = float(p_index) * GOLDEN_ANGLE + p_offset; + float y = cos(r * PI * 0.5); + float l = sin(r * PI * 0.5); + return vec3(l * cos(theta), l * sin(theta), y * (float(p_index & 1) * 2.0 - 1.0)); +} + +uvec3 hash3(uvec3 x) { + x = ((x >> 16) ^ x) * 0x45d9f3b; + x = ((x >> 16) ^ x) * 0x45d9f3b; + x = (x >> 16) ^ x; + return x; +} + +float hashf3(vec3 co) { + return fract(sin(dot(co, vec3(12.9898, 78.233, 137.13451))) * 43758.5453); +} + +vec3 octahedron_encode(vec2 f) { + // https://twitter.com/Stubbesaurus/status/937994790553227264 + f = f * 2.0 - 1.0; + vec3 n = vec3(f.x, f.y, 1.0f - abs(f.x) - abs(f.y)); + float t = clamp(-n.z, 0.0, 1.0); + n.x += n.x >= 0 ? -t : t; + n.y += n.y >= 0 ? -t : t; + return normalize(n); +} + +uint rgbe_encode(vec3 color) { + const float pow2to9 = 512.0f; + const float B = 15.0f; + const float N = 9.0f; + const float LN2 = 0.6931471805599453094172321215; + + float cRed = clamp(color.r, 0.0, 65408.0); + float cGreen = clamp(color.g, 0.0, 65408.0); + float cBlue = clamp(color.b, 0.0, 65408.0); + + float cMax = max(cRed, max(cGreen, cBlue)); + + float expp = max(-B - 1.0f, floor(log(cMax) / LN2)) + 1.0f + B; + + float sMax = floor((cMax / pow(2.0f, expp - B - N)) + 0.5f); + + float exps = expp + 1.0f; + + if (0.0 <= sMax && sMax < pow2to9) { + exps = expp; + } + + float sRed = floor((cRed / pow(2.0f, exps - B - N)) + 0.5f); + float sGreen = floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f); + float sBlue = floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f); + return (uint(sRed) & 0x1FF) | ((uint(sGreen) & 0x1FF) << 9) | ((uint(sBlue) & 0x1FF) << 18) | ((uint(exps) & 0x1F) << 27); +} + +struct SH { +#if (SH_SIZE == 16) + float c[48]; +#else + float c[28]; +#endif +}; + +shared SH sh_accum[64]; //8x8 + +void main() { + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(pos, params.image_size))) { //too large, do nothing + return; + } + + uint probe_index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 8; + +#ifdef MODE_PROCESS + + float probe_cell_size = float(params.grid_size.x / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; + + ivec3 probe_cell; + probe_cell.x = pos.x % int(params.probe_axis_size); + probe_cell.y = pos.y; + probe_cell.z = pos.x / int(params.probe_axis_size); + + vec3 probe_pos = cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size; + vec3 pos_to_uvw = 1.0 / params.grid_size; + + for (uint i = 0; i < SH_SIZE * 3; i++) { + sh_accum[probe_index].c[i] = 0.0; + } + + // quickly ensure each probe has a different "offset" for the vogel function, based on integer world position + uvec3 h3 = hash3(uvec3(params.world_offset + probe_cell)); + float offset = hashf3(vec3(h3 & uvec3(0xFFFFF))); + + //for a more homogeneous hemisphere, alternate based on history frames + uint ray_offset = params.history_index; + uint ray_mult = params.history_size; + uint ray_total = ray_mult * params.ray_count; + + for (uint i = 0; i < params.ray_count; i++) { + vec3 ray_dir = vogel_hemisphere(ray_offset + i * ray_mult, ray_total, offset); + ray_dir.y *= params.y_mult; + ray_dir = normalize(ray_dir); + + //needs to be visible + vec3 ray_pos = probe_pos; + vec3 inv_dir = 1.0 / ray_dir; + + bool hit = false; + uint hit_cascade; + + float bias = params.ray_bias; + vec3 abs_ray_dir = abs(ray_dir); + ray_pos += ray_dir * 1.0 / max(abs_ray_dir.x, max(abs_ray_dir.y, abs_ray_dir.z)) * bias / cascades.data[params.cascade].to_cell; + vec3 uvw; + + for (uint j = params.cascade; j < params.max_cascades; j++) { + //convert to local bounds + vec3 pos = ray_pos - cascades.data[j].offset; + pos *= cascades.data[j].to_cell; + + if (any(lessThan(pos, vec3(0.0))) || any(greaterThanEqual(pos, params.grid_size))) { + continue; //already past bounds for this cascade, goto next + } + + //find maximum advance distance (until reaching bounds) + vec3 t0 = -pos * inv_dir; + vec3 t1 = (params.grid_size - pos) * inv_dir; + vec3 tmax = max(t0, t1); + float max_advance = min(tmax.x, min(tmax.y, tmax.z)); + + float advance = 0.0; + + while (advance < max_advance) { + //read how much to advance from SDF + uvw = (pos + ray_dir * advance) * pos_to_uvw; + + float distance = texture(sampler3D(sdf_cascades[j], linear_sampler), uvw).r * 255.0 - 1.0; + if (distance < 0.05) { + //consider hit + hit = true; + break; + } + + advance += distance; + } + + if (hit) { + hit_cascade = j; + break; + } + + //change ray origin to collision with bounds + pos += ray_dir * max_advance; + pos /= cascades.data[j].to_cell; + pos += cascades.data[j].offset; + ray_pos = pos; + } + + vec4 light; + if (hit) { + //avoid reading different texture from different threads + for (uint j = params.cascade; j < params.max_cascades; j++) { + if (j == hit_cascade) { + const float EPSILON = 0.001; + vec3 hit_normal = normalize(vec3( + texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(EPSILON, 0.0, 0.0)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(EPSILON, 0.0, 0.0)).r, + texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(0.0, EPSILON, 0.0)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(0.0, EPSILON, 0.0)).r, + texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(0.0, 0.0, EPSILON)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(0.0, 0.0, EPSILON)).r)); + + vec3 hit_light = texture(sampler3D(light_cascades[hit_cascade], linear_sampler), uvw).rgb; + vec4 aniso0 = texture(sampler3D(aniso0_cascades[hit_cascade], linear_sampler), uvw); + vec3 hit_aniso0 = aniso0.rgb; + vec3 hit_aniso1 = vec3(aniso0.a, texture(sampler3D(aniso1_cascades[hit_cascade], linear_sampler), uvw).rg); + + //one liner magic + light.rgb = hit_light * (dot(max(vec3(0.0), (hit_normal * hit_aniso0)), vec3(1.0)) + dot(max(vec3(0.0), (-hit_normal * hit_aniso1)), vec3(1.0))); + light.a = 1.0; + } + } + + } else if (params.sky_mode == SKY_MODE_SKY) { +#ifdef USE_CUBEMAP_ARRAY + light.rgb = textureLod(samplerCubeArray(sky_irradiance, linear_sampler_mipmaps), vec4(ray_dir, 0.0), 2.0).rgb; //use second mipmap because we dont usually throw a lot of rays, so this compensates +#else + light.rgb = textureLod(samplerCube(sky_irradiance, linear_sampler_mipmaps), ray_dir, 2.0).rgb; //use second mipmap because we dont usually throw a lot of rays, so this compensates +#endif + light.rgb *= params.sky_energy; + light.a = 0.0; + + } else if (params.sky_mode == SKY_MODE_COLOR) { + light.rgb = params.sky_color; + light.rgb *= params.sky_energy; + light.a = 0.0; + } else { + light = vec4(0, 0, 0, 0); + } + + vec3 ray_dir2 = ray_dir * ray_dir; + +#define SH_ACCUM(m_idx, m_value) \ + { \ + vec3 l = light.rgb * (m_value); \ + sh_accum[probe_index].c[m_idx * 3 + 0] += l.r; \ + sh_accum[probe_index].c[m_idx * 3 + 1] += l.g; \ + sh_accum[probe_index].c[m_idx * 3 + 2] += l.b; \ + } + SH_ACCUM(0, 0.282095); //l0 + SH_ACCUM(1, 0.488603 * ray_dir.y); //l1n1 + SH_ACCUM(2, 0.488603 * ray_dir.z); //l1n0 + SH_ACCUM(3, 0.488603 * ray_dir.x); //l1p1 + SH_ACCUM(4, 1.092548 * ray_dir.x * ray_dir.y); //l2n2 + SH_ACCUM(5, 1.092548 * ray_dir.y * ray_dir.z); //l2n1 + SH_ACCUM(6, 0.315392 * (3.0 * ray_dir2.z - 1.0)); //l20 + SH_ACCUM(7, 1.092548 * ray_dir.x * ray_dir.z); //l2p1 + SH_ACCUM(8, 0.546274 * (ray_dir2.x - ray_dir2.y)); //l2p2 +#if (SH_SIZE == 16) + SH_ACCUM(9, 0.590043 * ray_dir.y * (3.0f * ray_dir2.x - ray_dir2.y)); + SH_ACCUM(10, 2.890611 * ray_dir.y * ray_dir.x * ray_dir.z); + SH_ACCUM(11, 0.646360 * ray_dir.y * (-1.0f + 5.0f * ray_dir2.z)); + SH_ACCUM(12, 0.373176 * (5.0f * ray_dir2.z * ray_dir.z - 3.0f * ray_dir.z)); + SH_ACCUM(13, 0.457045 * ray_dir.x * (-1.0f + 5.0f * ray_dir2.z)); + SH_ACCUM(14, 1.445305 * (ray_dir2.x - ray_dir2.y) * ray_dir.z); + SH_ACCUM(15, 0.590043 * ray_dir.x * (ray_dir2.x - 3.0f * ray_dir2.y)); + +#endif + } + + for (uint i = 0; i < SH_SIZE; i++) { + // store in history texture + ivec3 prev_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(params.history_index)); + ivec2 average_pos = prev_pos.xy; + + vec4 value = vec4(sh_accum[probe_index].c[i * 3 + 0], sh_accum[probe_index].c[i * 3 + 1], sh_accum[probe_index].c[i * 3 + 2], 1.0) * 4.0 / float(params.ray_count); + + ivec4 ivalue = clamp(ivec4(value * float(1 << HISTORY_BITS)), -32768, 32767); //clamp to 16 bits, so higher values don't break average + + ivec4 prev_value = imageLoad(lightprobe_history_texture, prev_pos); + ivec4 average = imageLoad(lightprobe_average_texture, average_pos); + + average -= prev_value; + average += ivalue; + + imageStore(lightprobe_history_texture, prev_pos, ivalue); + imageStore(lightprobe_average_texture, average_pos, average); + + if (params.store_ambient_texture && i == 0) { + ivec3 ambient_pos = ivec3(pos, int(params.cascade)); + vec4 ambient_light = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS); + ambient_light *= 0.88622; // SHL0 + imageStore(lightprobe_ambient_texture, ambient_pos, ambient_light); + } + } +#endif // MODE PROCESS + +#ifdef MODE_STORE + + // converting to octahedral in this step is required because + // octahedral is much faster to read from the screen than spherical harmonics, + // despite the very slight quality loss + + ivec2 sh_pos = (pos / OCT_SIZE) * ivec2(1, SH_SIZE); + ivec2 oct_pos = (pos / OCT_SIZE) * (OCT_SIZE + 2) + ivec2(1); + ivec2 local_pos = pos % OCT_SIZE; + + //compute the octahedral normal for this texel + vec3 normal = octahedron_encode(vec2(local_pos) / float(OCT_SIZE)); + + // read the spherical harmonic + + vec3 normal2 = normal * normal; + float c[SH_SIZE] = float[]( + + 0.282095, //l0 + 0.488603 * normal.y, //l1n1 + 0.488603 * normal.z, //l1n0 + 0.488603 * normal.x, //l1p1 + 1.092548 * normal.x * normal.y, //l2n2 + 1.092548 * normal.y * normal.z, //l2n1 + 0.315392 * (3.0 * normal2.z - 1.0), //l20 + 1.092548 * normal.x * normal.z, //l2p1 + 0.546274 * (normal2.x - normal2.y) //l2p2 +#if (SH_SIZE == 16) + , + 0.590043 * normal.y * (3.0f * normal2.x - normal2.y), + 2.890611 * normal.y * normal.x * normal.z, + 0.646360 * normal.y * (-1.0f + 5.0f * normal2.z), + 0.373176 * (5.0f * normal2.z * normal.z - 3.0f * normal.z), + 0.457045 * normal.x * (-1.0f + 5.0f * normal2.z), + 1.445305 * (normal2.x - normal2.y) * normal.z, + 0.590043 * normal.x * (normal2.x - 3.0f * normal2.y) + +#endif + ); + + const float l_mult[SH_SIZE] = float[]( + 1.0, + 2.0 / 3.0, + 2.0 / 3.0, + 2.0 / 3.0, + 1.0 / 4.0, + 1.0 / 4.0, + 1.0 / 4.0, + 1.0 / 4.0, + 1.0 / 4.0 +#if (SH_SIZE == 16) + , // l4 does not contribute to irradiance + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 +#endif + ); + + vec3 irradiance = vec3(0.0); + vec3 radiance = vec3(0.0); + + for (uint i = 0; i < SH_SIZE; i++) { + // store in history texture + ivec2 average_pos = sh_pos + ivec2(0, i); + ivec4 average = imageLoad(lightprobe_average_texture, average_pos); + + vec4 sh = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS); + + vec3 m = sh.rgb * c[i] * 4.0; + + irradiance += m * l_mult[i]; + radiance += m; + } + + //encode RGBE9995 for the final texture + + uint irradiance_rgbe = rgbe_encode(irradiance); + uint radiance_rgbe = rgbe_encode(radiance); + + //store in octahedral map + + ivec3 texture_pos = ivec3(oct_pos, int(params.cascade)); + ivec3 copy_to[4] = ivec3[](ivec3(-2, -2, -2), ivec3(-2, -2, -2), ivec3(-2, -2, -2), ivec3(-2, -2, -2)); + copy_to[0] = texture_pos + ivec3(local_pos, 0); + + if (local_pos == ivec2(0, 0)) { + copy_to[1] = texture_pos + ivec3(OCT_SIZE - 1, -1, 0); + copy_to[2] = texture_pos + ivec3(-1, OCT_SIZE - 1, 0); + copy_to[3] = texture_pos + ivec3(OCT_SIZE, OCT_SIZE, 0); + } else if (local_pos == ivec2(OCT_SIZE - 1, 0)) { + copy_to[1] = texture_pos + ivec3(0, -1, 0); + copy_to[2] = texture_pos + ivec3(OCT_SIZE, OCT_SIZE - 1, 0); + copy_to[3] = texture_pos + ivec3(-1, OCT_SIZE, 0); + } else if (local_pos == ivec2(0, OCT_SIZE - 1)) { + copy_to[1] = texture_pos + ivec3(-1, 0, 0); + copy_to[2] = texture_pos + ivec3(OCT_SIZE - 1, OCT_SIZE, 0); + copy_to[3] = texture_pos + ivec3(OCT_SIZE, -1, 0); + } else if (local_pos == ivec2(OCT_SIZE - 1, OCT_SIZE - 1)) { + copy_to[1] = texture_pos + ivec3(0, OCT_SIZE, 0); + copy_to[2] = texture_pos + ivec3(OCT_SIZE, 0, 0); + copy_to[3] = texture_pos + ivec3(-1, -1, 0); + } else if (local_pos.y == 0) { + copy_to[1] = texture_pos + ivec3(OCT_SIZE - local_pos.x - 1, local_pos.y - 1, 0); + } else if (local_pos.x == 0) { + copy_to[1] = texture_pos + ivec3(local_pos.x - 1, OCT_SIZE - local_pos.y - 1, 0); + } else if (local_pos.y == OCT_SIZE - 1) { + copy_to[1] = texture_pos + ivec3(OCT_SIZE - local_pos.x - 1, local_pos.y + 1, 0); + } else if (local_pos.x == OCT_SIZE - 1) { + copy_to[1] = texture_pos + ivec3(local_pos.x + 1, OCT_SIZE - local_pos.y - 1, 0); + } + + for (int i = 0; i < 4; i++) { + if (copy_to[i] == ivec3(-2, -2, -2)) { + continue; + } + imageStore(lightprobe_texture_data, copy_to[i], uvec4(irradiance_rgbe)); + imageStore(lightprobe_texture_data, copy_to[i] + ivec3(0, 0, int(params.max_cascades)), uvec4(radiance_rgbe)); + } + +#endif + +#ifdef MODE_SCROLL + + ivec3 probe_cell; + probe_cell.x = pos.x % int(params.probe_axis_size); + probe_cell.y = pos.y; + probe_cell.z = pos.x / int(params.probe_axis_size); + + ivec3 read_probe = probe_cell - params.scroll; + + if (all(greaterThanEqual(read_probe, ivec3(0))) && all(lessThan(read_probe, ivec3(params.probe_axis_size)))) { + // can scroll + ivec2 tex_pos; + tex_pos = read_probe.xy; + tex_pos.x += read_probe.z * int(params.probe_axis_size); + + //scroll + for (uint j = 0; j < params.history_size; j++) { + for (int i = 0; i < SH_SIZE; i++) { + // copy from history texture + ivec3 src_pos = ivec3(tex_pos.x, tex_pos.y * SH_SIZE + i, int(j)); + ivec3 dst_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(j)); + ivec4 value = imageLoad(lightprobe_history_texture, src_pos); + imageStore(lightprobe_history_scroll_texture, dst_pos, value); + } + } + + for (int i = 0; i < SH_SIZE; i++) { + // copy from average texture + ivec2 src_pos = ivec2(tex_pos.x, tex_pos.y * SH_SIZE + i); + ivec2 dst_pos = ivec2(pos.x, pos.y * SH_SIZE + i); + ivec4 value = imageLoad(lightprobe_average_texture, src_pos); + imageStore(lightprobe_average_scroll_texture, dst_pos, value); + } + } else if (params.cascade < params.max_cascades - 1) { + //can't scroll, must look for position in parent cascade + + //to global coords + float cell_to_probe = float(params.grid_size.x / float(params.probe_axis_size - 1)); + + float probe_cell_size = cell_to_probe / cascades.data[params.cascade].to_cell; + vec3 probe_pos = cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size; + + //to parent local coords + float probe_cell_size_next = cell_to_probe / cascades.data[params.cascade + 1].to_cell; + probe_pos -= cascades.data[params.cascade + 1].offset; + probe_pos /= probe_cell_size_next; + + ivec3 probe_posi = ivec3(probe_pos); + //add up all light, no need to use occlusion here, since occlusion will do its work afterwards + + vec4 average_light[SH_SIZE] = vec4[](vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0) +#if (SH_SIZE == 16) + , + vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0) +#endif + ); + float total_weight = 0.0; + + for (int i = 0; i < 8; i++) { + ivec3 offset = probe_posi + ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); + + vec3 trilinear = vec3(1.0) - abs(probe_pos - vec3(offset)); + float weight = trilinear.x * trilinear.y * trilinear.z; + + ivec2 tex_pos; + tex_pos = offset.xy; + tex_pos.x += offset.z * int(params.probe_axis_size); + + for (int j = 0; j < SH_SIZE; j++) { + // copy from history texture + ivec2 src_pos = ivec2(tex_pos.x, tex_pos.y * SH_SIZE + j); + ivec4 average = imageLoad(lightprobe_average_parent_texture, src_pos); + vec4 value = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS); + average_light[j] += value * weight; + } + + total_weight += weight; + } + + if (total_weight > 0.0) { + total_weight = 1.0 / total_weight; + } + //store the averaged values everywhere + + for (int i = 0; i < SH_SIZE; i++) { + ivec4 ivalue = clamp(ivec4(average_light[i] * total_weight * float(1 << HISTORY_BITS)), ivec4(-32768), ivec4(32767)); //clamp to 16 bits, so higher values don't break average + // copy from history texture + ivec3 dst_pos = ivec3(pos.x, pos.y * SH_SIZE + i, 0); + for (uint j = 0; j < params.history_size; j++) { + dst_pos.z = int(j); + imageStore(lightprobe_history_scroll_texture, dst_pos, ivalue); + } + + ivalue *= int(params.history_size); //average needs to have all history added up + imageStore(lightprobe_average_scroll_texture, dst_pos.xy, ivalue); + } + + } else { + //scroll at the edge of the highest cascade, just copy what is there, + //since its the closest we have anyway + + for (uint j = 0; j < params.history_size; j++) { + ivec2 tex_pos; + tex_pos = probe_cell.xy; + tex_pos.x += probe_cell.z * int(params.probe_axis_size); + + for (int i = 0; i < SH_SIZE; i++) { + // copy from history texture + ivec3 src_pos = ivec3(tex_pos.x, tex_pos.y * SH_SIZE + i, int(j)); + ivec3 dst_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(j)); + ivec4 value = imageLoad(lightprobe_history_texture, dst_pos); + imageStore(lightprobe_history_scroll_texture, dst_pos, value); + } + } + + for (int i = 0; i < SH_SIZE; i++) { + // copy from average texture + ivec2 spos = ivec2(pos.x, pos.y * SH_SIZE + i); + ivec4 average = imageLoad(lightprobe_average_texture, spos); + imageStore(lightprobe_average_scroll_texture, spos, average); + } + } + +#endif + +#ifdef MODE_SCROLL_STORE + + //do not update probe texture, as these will be updated later + + for (uint j = 0; j < params.history_size; j++) { + for (int i = 0; i < SH_SIZE; i++) { + // copy from history texture + ivec3 spos = ivec3(pos.x, pos.y * SH_SIZE + i, int(j)); + ivec4 value = imageLoad(lightprobe_history_scroll_texture, spos); + imageStore(lightprobe_history_texture, spos, value); + } + } + + for (int i = 0; i < SH_SIZE; i++) { + // copy from average texture + ivec2 spos = ivec2(pos.x, pos.y * SH_SIZE + i); + ivec4 average = imageLoad(lightprobe_average_scroll_texture, spos); + imageStore(lightprobe_average_texture, spos, average); + } + +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_preprocess.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_preprocess.glsl new file mode 100644 index 0000000000..916c60ac89 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/sdfgi_preprocess.glsl @@ -0,0 +1,1056 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +#ifdef MODE_JUMPFLOOD_OPTIMIZED +#define GROUP_SIZE 8 + +layout(local_size_x = GROUP_SIZE, local_size_y = GROUP_SIZE, local_size_z = GROUP_SIZE) in; + +#elif defined(MODE_OCCLUSION) || defined(MODE_SCROLL) +//buffer layout +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +#else +//grid layout +layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; + +#endif + +#if defined(MODE_INITIALIZE_JUMP_FLOOD) || defined(MODE_INITIALIZE_JUMP_FLOOD_HALF) +layout(r16ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_color; +layout(rgba8ui, set = 0, binding = 2) uniform restrict writeonly uimage3D dst_positions; +#endif + +#ifdef MODE_UPSCALE_JUMP_FLOOD +layout(r16ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_color; +layout(rgba8ui, set = 0, binding = 2) uniform restrict readonly uimage3D src_positions_half; +layout(rgba8ui, set = 0, binding = 3) uniform restrict writeonly uimage3D dst_positions; +#endif + +#if defined(MODE_JUMPFLOOD) || defined(MODE_JUMPFLOOD_OPTIMIZED) +layout(rgba8ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_positions; +layout(rgba8ui, set = 0, binding = 2) uniform restrict writeonly uimage3D dst_positions; +#endif + +#ifdef MODE_JUMPFLOOD_OPTIMIZED + +shared uvec4 group_positions[(GROUP_SIZE + 2) * (GROUP_SIZE + 2) * (GROUP_SIZE + 2)]; //4x4x4 with margins + +void group_store(ivec3 p_pos, uvec4 p_value) { + uint offset = uint(p_pos.z * (GROUP_SIZE + 2) * (GROUP_SIZE + 2) + p_pos.y * (GROUP_SIZE + 2) + p_pos.x); + group_positions[offset] = p_value; +} + +uvec4 group_load(ivec3 p_pos) { + uint offset = uint(p_pos.z * (GROUP_SIZE + 2) * (GROUP_SIZE + 2) + p_pos.y * (GROUP_SIZE + 2) + p_pos.x); + return group_positions[offset]; +} + +#endif + +#ifdef MODE_OCCLUSION + +layout(r16ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_color; +layout(r8, set = 0, binding = 2) uniform restrict image3D dst_occlusion[8]; +layout(r32ui, set = 0, binding = 3) uniform restrict readonly uimage3D src_facing; + +const uvec2 group_size_offset[11] = uvec2[](uvec2(1, 0), uvec2(3, 1), uvec2(6, 4), uvec2(10, 10), uvec2(15, 20), uvec2(21, 35), uvec2(28, 56), uvec2(36, 84), uvec2(42, 120), uvec2(46, 162), uvec2(48, 208)); +const uint group_pos[256] = uint[](0, + 65536, 256, 1, + 131072, 65792, 512, 65537, 257, 2, + 196608, 131328, 66048, 768, 131073, 65793, 513, 65538, 258, 3, + 262144, 196864, 131584, 66304, 1024, 196609, 131329, 66049, 769, 131074, 65794, 514, 65539, 259, 4, + 327680, 262400, 197120, 131840, 66560, 1280, 262145, 196865, 131585, 66305, 1025, 196610, 131330, 66050, 770, 131075, 65795, 515, 65540, 260, 5, + 393216, 327936, 262656, 197376, 132096, 66816, 1536, 327681, 262401, 197121, 131841, 66561, 1281, 262146, 196866, 131586, 66306, 1026, 196611, 131331, 66051, 771, 131076, 65796, 516, 65541, 261, 6, + 458752, 393472, 328192, 262912, 197632, 132352, 67072, 1792, 393217, 327937, 262657, 197377, 132097, 66817, 1537, 327682, 262402, 197122, 131842, 66562, 1282, 262147, 196867, 131587, 66307, 1027, 196612, 131332, 66052, 772, 131077, 65797, 517, 65542, 262, 7, + 459008, 393728, 328448, 263168, 197888, 132608, 67328, 458753, 393473, 328193, 262913, 197633, 132353, 67073, 1793, 393218, 327938, 262658, 197378, 132098, 66818, 1538, 327683, 262403, 197123, 131843, 66563, 1283, 262148, 196868, 131588, 66308, 1028, 196613, 131333, 66053, 773, 131078, 65798, 518, 65543, 263, + 459264, 393984, 328704, 263424, 198144, 132864, 459009, 393729, 328449, 263169, 197889, 132609, 67329, 458754, 393474, 328194, 262914, 197634, 132354, 67074, 1794, 393219, 327939, 262659, 197379, 132099, 66819, 1539, 327684, 262404, 197124, 131844, 66564, 1284, 262149, 196869, 131589, 66309, 1029, 196614, 131334, 66054, 774, 131079, 65799, 519, + 459520, 394240, 328960, 263680, 198400, 459265, 393985, 328705, 263425, 198145, 132865, 459010, 393730, 328450, 263170, 197890, 132610, 67330, 458755, 393475, 328195, 262915, 197635, 132355, 67075, 1795, 393220, 327940, 262660, 197380, 132100, 66820, 1540, 327685, 262405, 197125, 131845, 66565, 1285, 262150, 196870, 131590, 66310, 1030, 196615, 131335, 66055, 775); + +shared uint occlusion_facing[((OCCLUSION_SIZE * 2) * (OCCLUSION_SIZE * 2) * (OCCLUSION_SIZE * 2)) / 4]; + +uint get_facing(ivec3 p_pos) { + uint ofs = uint(p_pos.z * OCCLUSION_SIZE * 2 * OCCLUSION_SIZE * 2 + p_pos.y * OCCLUSION_SIZE * 2 + p_pos.x); + uint v = occlusion_facing[ofs / 4]; + return (v >> ((ofs % 4) * 8)) & 0xFF; +} + +#endif + +#ifdef MODE_STORE + +layout(rgba8ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_positions; +layout(r16ui, set = 0, binding = 2) uniform restrict readonly uimage3D src_albedo; +layout(r8, set = 0, binding = 3) uniform restrict readonly image3D src_occlusion[8]; +layout(r32ui, set = 0, binding = 4) uniform restrict readonly uimage3D src_light; +layout(r32ui, set = 0, binding = 5) uniform restrict readonly uimage3D src_light_aniso; +layout(r32ui, set = 0, binding = 6) uniform restrict readonly uimage3D src_facing; + +layout(r8, set = 0, binding = 7) uniform restrict writeonly image3D dst_sdf; +layout(r16ui, set = 0, binding = 8) uniform restrict writeonly uimage3D dst_occlusion; + +layout(set = 0, binding = 10, std430) restrict buffer DispatchData { + uint x; + uint y; + uint z; + uint total_count; +} +dispatch_data; + +struct ProcessVoxel { + uint position; //xyz 7 bit packed, extra 11 bits for neigbours + uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neibhbours + uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbours + uint light_aniso; //55555 light anisotropy, extra 2 bits for neighbours + //total neighbours: 26 +}; + +layout(set = 0, binding = 11, std430) restrict buffer writeonly ProcessVoxels { + ProcessVoxel data[]; +} +dst_process_voxels; + +shared ProcessVoxel store_positions[4 * 4 * 4]; +shared uint store_position_count; +shared uint store_from_index; +#endif + +#ifdef MODE_SCROLL + +layout(r16ui, set = 0, binding = 1) uniform restrict writeonly uimage3D dst_albedo; +layout(r32ui, set = 0, binding = 2) uniform restrict writeonly uimage3D dst_facing; +layout(r32ui, set = 0, binding = 3) uniform restrict writeonly uimage3D dst_light; +layout(r32ui, set = 0, binding = 4) uniform restrict writeonly uimage3D dst_light_aniso; + +layout(set = 0, binding = 5, std430) restrict buffer readonly DispatchData { + uint x; + uint y; + uint z; + uint total_count; +} +dispatch_data; + +struct ProcessVoxel { + uint position; //xyz 7 bit packed, extra 11 bits for neigbours + uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neibhbours + uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbours + uint light_aniso; //55555 light anisotropy, extra 2 bits for neighbours + //total neighbours: 26 +}; + +layout(set = 0, binding = 6, std430) restrict buffer readonly ProcessVoxels { + ProcessVoxel data[]; +} +src_process_voxels; + +#endif + +#ifdef MODE_SCROLL_OCCLUSION + +layout(r8, set = 0, binding = 1) uniform restrict image3D dst_occlusion[8]; +layout(r16ui, set = 0, binding = 2) uniform restrict readonly uimage3D src_occlusion; + +#endif + +layout(push_constant, binding = 0, std430) uniform Params { + ivec3 scroll; + + int grid_size; + + ivec3 probe_offset; + int step_size; + + bool half_size; + uint occlusion_index; + int cascade; + uint pad; +} +params; + +void main() { +#ifdef MODE_SCROLL + + // Pixel being shaded + int index = int(gl_GlobalInvocationID.x); + if (index >= dispatch_data.total_count) { //too big + return; + } + + ivec3 read_pos = (ivec3(src_process_voxels.data[index].position) >> ivec3(0, 7, 14)) & ivec3(0x7F); + ivec3 write_pos = read_pos + params.scroll; + + if (any(lessThan(write_pos, ivec3(0))) || any(greaterThanEqual(write_pos, ivec3(params.grid_size)))) { + return; //fits outside the 3D texture, dont do anything + } + + uint albedo = ((src_process_voxels.data[index].albedo & 0x7FFF) << 1) | 1; //add solid bit + imageStore(dst_albedo, write_pos, uvec4(albedo)); + + uint facing = (src_process_voxels.data[index].albedo >> 15) & 0x3F; //6 anisotropic facing bits + imageStore(dst_facing, write_pos, uvec4(facing)); + + uint light = src_process_voxels.data[index].light & 0x3fffffff; //30 bits of RGBE8985 + imageStore(dst_light, write_pos, uvec4(light)); + + uint light_aniso = src_process_voxels.data[index].light_aniso & 0x3fffffff; //30 bits of 6 anisotropic 5 bits values + imageStore(dst_light_aniso, write_pos, uvec4(light_aniso)); + +#endif + +#ifdef MODE_SCROLL_OCCLUSION + + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + if (any(greaterThanEqual(pos, ivec3(params.grid_size) - abs(params.scroll)))) { //too large, do nothing + return; + } + + ivec3 read_pos = pos + max(ivec3(0), -params.scroll); + ivec3 write_pos = pos + max(ivec3(0), params.scroll); + + read_pos.z += params.cascade * params.grid_size; + uint occlusion = imageLoad(src_occlusion, read_pos).r; + read_pos.x += params.grid_size; + occlusion |= imageLoad(src_occlusion, read_pos).r << 16; + + const uint occlusion_shift[8] = uint[](12, 8, 4, 0, 28, 24, 20, 16); + + for (uint i = 0; i < 8; i++) { + float o = float((occlusion >> occlusion_shift[i]) & 0xF) / 15.0; + imageStore(dst_occlusion[i], write_pos, vec4(o)); + } + +#endif + +#ifdef MODE_INITIALIZE_JUMP_FLOOD + + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + + uint c = imageLoad(src_color, pos).r; + uvec4 v; + if (bool(c & 0x1)) { + //bit set means this is solid + v.xyz = uvec3(pos); + v.w = 255; //not zero means used + } else { + v.xyz = uvec3(0); + v.w = 0; // zero means unused + } + + imageStore(dst_positions, pos, v); +#endif + +#ifdef MODE_INITIALIZE_JUMP_FLOOD_HALF + + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + ivec3 base_pos = pos * 2; + + //since we store in half size, lets kind of randomize what we store, so + //the half size jump flood has a bit better chance to find something + uvec4 closest[8]; + int closest_count = 0; + + for (uint i = 0; i < 8; i++) { + ivec3 src_pos = base_pos + ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); + uint c = imageLoad(src_color, src_pos).r; + if (bool(c & 1)) { + uvec4 v = uvec4(uvec3(src_pos), 255); + closest[closest_count] = v; + closest_count++; + } + } + + if (closest_count == 0) { + imageStore(dst_positions, pos, uvec4(0)); + } else { + ivec3 indexv = (pos & ivec3(1, 1, 1)) * ivec3(1, 2, 4); + int index = (indexv.x | indexv.y | indexv.z) % closest_count; + imageStore(dst_positions, pos, closest[index]); + } + +#endif + +#ifdef MODE_JUMPFLOOD + + //regular jumpflood, efficient for large steps, inefficient for small steps + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + + vec3 posf = vec3(pos); + + if (params.half_size) { + posf = posf * 2.0 + 0.5; + } + + uvec4 p = imageLoad(src_positions, pos); + + if (!params.half_size && p == uvec4(uvec3(pos), 255)) { + imageStore(dst_positions, pos, p); + return; //points to itself and valid, nothing better can be done, just pass + } + + float p_dist; + + if (p.w != 0) { + p_dist = distance(posf, vec3(p.xyz)); + } else { + p_dist = 0.0; //should not matter + } + + const uint offset_count = 26; + const ivec3 offsets[offset_count] = ivec3[]( + ivec3(-1, -1, -1), + ivec3(-1, -1, 0), + ivec3(-1, -1, 1), + ivec3(-1, 0, -1), + ivec3(-1, 0, 0), + ivec3(-1, 0, 1), + ivec3(-1, 1, -1), + ivec3(-1, 1, 0), + ivec3(-1, 1, 1), + ivec3(0, -1, -1), + ivec3(0, -1, 0), + ivec3(0, -1, 1), + ivec3(0, 0, -1), + ivec3(0, 0, 1), + ivec3(0, 1, -1), + ivec3(0, 1, 0), + ivec3(0, 1, 1), + ivec3(1, -1, -1), + ivec3(1, -1, 0), + ivec3(1, -1, 1), + ivec3(1, 0, -1), + ivec3(1, 0, 0), + ivec3(1, 0, 1), + ivec3(1, 1, -1), + ivec3(1, 1, 0), + ivec3(1, 1, 1)); + + for (uint i = 0; i < offset_count; i++) { + ivec3 ofs = pos + offsets[i] * params.step_size; + if (any(lessThan(ofs, ivec3(0))) || any(greaterThanEqual(ofs, ivec3(params.grid_size)))) { + continue; + } + uvec4 q = imageLoad(src_positions, ofs); + + if (q.w == 0) { + continue; //was not initialized yet, ignore + } + + float q_dist = distance(posf, vec3(q.xyz)); + if (p.w == 0 || q_dist < p_dist) { + p = q; //just replace because current is unused + p_dist = q_dist; + } + } + + imageStore(dst_positions, pos, p); +#endif + +#ifdef MODE_JUMPFLOOD_OPTIMIZED + //optimized version using shared compute memory + + ivec3 group_offset = ivec3(gl_WorkGroupID.xyz) % params.step_size; + ivec3 group_pos = group_offset + (ivec3(gl_WorkGroupID.xyz) / params.step_size) * ivec3(GROUP_SIZE * params.step_size); + + //load data into local group memory + + if (all(lessThan(ivec3(gl_LocalInvocationID.xyz), ivec3((GROUP_SIZE + 2) / 2)))) { + //use this thread for loading, this method uses less threads for this but its simpler and less divergent + ivec3 base_pos = ivec3(gl_LocalInvocationID.xyz) * 2; + for (uint i = 0; i < 8; i++) { + ivec3 load_pos = base_pos + ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); + ivec3 load_global_pos = group_pos + (load_pos - ivec3(1)) * params.step_size; + uvec4 q; + if (all(greaterThanEqual(load_global_pos, ivec3(0))) && all(lessThan(load_global_pos, ivec3(params.grid_size)))) { + q = imageLoad(src_positions, load_global_pos); + } else { + q = uvec4(0); //unused + } + + group_store(load_pos, q); + } + } + + ivec3 global_pos = group_pos + ivec3(gl_LocalInvocationID.xyz) * params.step_size; + + if (any(lessThan(global_pos, ivec3(0))) || any(greaterThanEqual(global_pos, ivec3(params.grid_size)))) { + return; //do nothing else, end here because outside range + } + + //sync + groupMemoryBarrier(); + barrier(); + + ivec3 local_pos = ivec3(gl_LocalInvocationID.xyz) + ivec3(1); + + const uint offset_count = 27; + const ivec3 offsets[offset_count] = ivec3[]( + ivec3(-1, -1, -1), + ivec3(-1, -1, 0), + ivec3(-1, -1, 1), + ivec3(-1, 0, -1), + ivec3(-1, 0, 0), + ivec3(-1, 0, 1), + ivec3(-1, 1, -1), + ivec3(-1, 1, 0), + ivec3(-1, 1, 1), + ivec3(0, -1, -1), + ivec3(0, -1, 0), + ivec3(0, -1, 1), + ivec3(0, 0, -1), + ivec3(0, 0, 0), + ivec3(0, 0, 1), + ivec3(0, 1, -1), + ivec3(0, 1, 0), + ivec3(0, 1, 1), + ivec3(1, -1, -1), + ivec3(1, -1, 0), + ivec3(1, -1, 1), + ivec3(1, 0, -1), + ivec3(1, 0, 0), + ivec3(1, 0, 1), + ivec3(1, 1, -1), + ivec3(1, 1, 0), + ivec3(1, 1, 1)); + + //only makes sense if point is inside screen + uvec4 closest = uvec4(0); + float closest_dist = 0.0; + + vec3 posf = vec3(global_pos); + + if (params.half_size) { + posf = posf * 2.0 + 0.5; + } + + for (uint i = 0; i < offset_count; i++) { + uvec4 point = group_load(local_pos + offsets[i]); + + if (point.w == 0) { + continue; //was not initialized yet, ignore + } + + float dist = distance(posf, vec3(point.xyz)); + if (closest.w == 0 || dist < closest_dist) { + closest = point; + closest_dist = dist; + } + } + + imageStore(dst_positions, global_pos, closest); + +#endif + +#ifdef MODE_UPSCALE_JUMP_FLOOD + + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + + uint c = imageLoad(src_color, pos).r; + uvec4 v; + if (bool(c & 1)) { + //bit set means this is solid + v.xyz = uvec3(pos); + v.w = 255; //not zero means used + } else { + v = imageLoad(src_positions_half, pos >> 1); + float d = length(vec3(ivec3(v.xyz) - pos)); + + ivec3 vbase = ivec3(v.xyz - (v.xyz & uvec3(1))); + + //search around if there is a better candidate from the same block + for (int i = 0; i < 8; i++) { + ivec3 bits = ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); + ivec3 p = vbase + bits; + + float d2 = length(vec3(p - pos)); + if (d2 < d) { //check valid distance before test so we avoid a read + uint c2 = imageLoad(src_color, p).r; + if (bool(c2 & 1)) { + v.xyz = uvec3(p); + d = d2; + } + } + } + + //could validate better position.. + } + + imageStore(dst_positions, pos, v); + +#endif + +#ifdef MODE_OCCLUSION + + uint invocation_idx = uint(gl_LocalInvocationID.x); + ivec3 region = ivec3(gl_WorkGroupID); + + ivec3 region_offset = -ivec3(OCCLUSION_SIZE); + region_offset += region * OCCLUSION_SIZE * 2; + region_offset += params.probe_offset * OCCLUSION_SIZE; + + if (params.scroll != ivec3(0)) { + //validate scroll region + ivec3 region_offset_to = region_offset + ivec3(OCCLUSION_SIZE * 2); + uvec3 scroll_mask = uvec3(notEqual(params.scroll, ivec3(0))); //save which axes acre scrolling + ivec3 scroll_from = mix(ivec3(0), ivec3(params.grid_size) + params.scroll, lessThan(params.scroll, ivec3(0))); + ivec3 scroll_to = mix(ivec3(params.grid_size), params.scroll, greaterThan(params.scroll, ivec3(0))); + + if ((uvec3(lessThanEqual(region_offset_to, scroll_from)) | uvec3(greaterThanEqual(region_offset, scroll_to))) * scroll_mask == scroll_mask) { //all axes that scroll are out, exit + return; //region outside scroll bounds, quit + } + } + +#define OCC_HALF_SIZE (OCCLUSION_SIZE / 2) + + ivec3 local_ofs = ivec3(uvec3(invocation_idx % OCC_HALF_SIZE, (invocation_idx % (OCC_HALF_SIZE * OCC_HALF_SIZE)) / OCC_HALF_SIZE, invocation_idx / (OCC_HALF_SIZE * OCC_HALF_SIZE))) * 4; + + /* for(int i=0;i<64;i++) { + ivec3 offset = region_offset + local_ofs + ((ivec3(i) >> ivec3(0,2,4)) & ivec3(3,3,3)); + uint facig = + if (all(greaterThanEqual(offset,ivec3(0))) && all(lessThan(offset,ivec3(params.grid_size)))) {*/ + + for (int i = 0; i < 16; i++) { //skip x, so it can be packed + + ivec3 offset = local_ofs + ((ivec3(i * 4) >> ivec3(0, 2, 4)) & ivec3(3, 3, 3)); + + uint facing_pack = 0; + for (int j = 0; j < 4; j++) { + ivec3 foffset = region_offset + offset + ivec3(j, 0, 0); + if (all(greaterThanEqual(foffset, ivec3(0))) && all(lessThan(foffset, ivec3(params.grid_size)))) { + uint f = imageLoad(src_facing, foffset).r; + facing_pack |= f << (j * 8); + } + } + + occlusion_facing[(offset.z * (OCCLUSION_SIZE * 2 * OCCLUSION_SIZE * 2) + offset.y * (OCCLUSION_SIZE * 2) + offset.x) / 4] = facing_pack; + } + + //sync occlusion saved + groupMemoryBarrier(); + barrier(); + + //process occlusion + +#define OCC_STEPS (OCCLUSION_SIZE * 3 - 2) +#define OCC_HALF_STEPS (OCC_STEPS / 2) + + for (int step = 0; step < OCC_STEPS; step++) { + bool shrink = step >= OCC_HALF_STEPS; + int occ_step = shrink ? OCC_HALF_STEPS - (step - OCC_HALF_STEPS) - 1 : step; + + if (invocation_idx < group_size_offset[occ_step].x) { + uint pv = group_pos[group_size_offset[occ_step].y + invocation_idx]; + ivec3 proc_abs = (ivec3(int(pv)) >> ivec3(0, 8, 16)) & ivec3(0xFF); + + if (shrink) { + proc_abs = ivec3(OCCLUSION_SIZE) - proc_abs - ivec3(1); + } + + for (int i = 0; i < 8; i++) { + ivec3 bits = ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); + ivec3 proc_sign = bits * 2 - 1; + ivec3 local_offset = ivec3(OCCLUSION_SIZE) + proc_abs * proc_sign - (ivec3(1) - bits); + ivec3 offset = local_offset + region_offset; + if (all(greaterThanEqual(offset, ivec3(0))) && all(lessThan(offset, ivec3(params.grid_size)))) { + float occ; + + uint facing = get_facing(local_offset); + + if (facing != 0) { //solid + occ = 0.0; + } else if (step == 0) { +#if 0 + occ = 0.0; + if (get_facing(local_offset - ivec3(proc_sign.x,0,0))==0) { + occ+=1.0; + } + if (get_facing(local_offset - ivec3(0,proc_sign.y,0))==0) { + occ+=1.0; + } + if (get_facing(local_offset - ivec3(0,0,proc_sign.z))==0) { + occ+=1.0; + } + /* + if (get_facing(local_offset - proc_sign)==0) { + occ+=1.0; + }*/ + + occ/=3.0; +#endif + occ = 1.0; + + } else { + ivec3 read_dir = -proc_sign; + + ivec3 major_axis; + if (proc_abs.x < proc_abs.y) { + if (proc_abs.z < proc_abs.y) { + major_axis = ivec3(0, 1, 0); + } else { + major_axis = ivec3(0, 0, 1); + } + } else { + if (proc_abs.z < proc_abs.x) { + major_axis = ivec3(1, 0, 0); + } else { + major_axis = ivec3(0, 0, 1); + } + } + + float avg = 0.0; + occ = 0.0; + + ivec3 read_x = offset + ivec3(read_dir.x, 0, 0) + (proc_abs.x == 0 ? major_axis * read_dir : ivec3(0)); + ivec3 read_y = offset + ivec3(0, read_dir.y, 0) + (proc_abs.y == 0 ? major_axis * read_dir : ivec3(0)); + ivec3 read_z = offset + ivec3(0, 0, read_dir.z) + (proc_abs.z == 0 ? major_axis * read_dir : ivec3(0)); + + uint facing_x = get_facing(read_x - region_offset); + if (facing_x == 0) { + if (all(greaterThanEqual(read_x, ivec3(0))) && all(lessThan(read_x, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_x).r; + avg += 1.0; + } + } else { + if (proc_abs.x != 0) { //do not occlude from voxels in the opposite octant + avg += 1.0; + } + } + + uint facing_y = get_facing(read_y - region_offset); + if (facing_y == 0) { + if (all(greaterThanEqual(read_y, ivec3(0))) && all(lessThan(read_y, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_y).r; + avg += 1.0; + } + } else { + if (proc_abs.y != 0) { + avg += 1.0; + } + } + + uint facing_z = get_facing(read_z - region_offset); + if (facing_z == 0) { + if (all(greaterThanEqual(read_z, ivec3(0))) && all(lessThan(read_z, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_z).r; + avg += 1.0; + } + } else { + if (proc_abs.z != 0) { + avg += 1.0; + } + } + + if (avg > 0.0) { + occ /= avg; + } + } + + imageStore(dst_occlusion[params.occlusion_index], offset, vec4(occ)); + } + } + } + + groupMemoryBarrier(); + barrier(); + } +#if 1 + //bias solid voxels away + + for (int i = 0; i < 64; i++) { + ivec3 local_offset = local_ofs + ((ivec3(i) >> ivec3(0, 2, 4)) & ivec3(3, 3, 3)); + ivec3 offset = region_offset + local_offset; + + if (all(greaterThanEqual(offset, ivec3(0))) && all(lessThan(offset, ivec3(params.grid_size)))) { + uint facing = get_facing(local_offset); + + if (facing != 0) { + //only work on solids + + ivec3 proc_pos = local_offset - ivec3(OCCLUSION_SIZE); + proc_pos += mix(ivec3(0), ivec3(1), greaterThanEqual(proc_pos, ivec3(0))); + + float avg = 0.0; + float occ = 0.0; + + ivec3 read_dir = -sign(proc_pos); + ivec3 read_dir_x = ivec3(read_dir.x, 0, 0); + ivec3 read_dir_y = ivec3(0, read_dir.y, 0); + ivec3 read_dir_z = ivec3(0, 0, read_dir.z); + //solid +#if 0 + + uvec3 facing_pos_base = (uvec3(facing) >> uvec3(0,1,2)) & uvec3(1,1,1); + uvec3 facing_neg_base = (uvec3(facing) >> uvec3(3,4,5)) & uvec3(1,1,1); + uvec3 facing_pos= facing_pos_base &((~facing_neg_base)&uvec3(1,1,1)); + uvec3 facing_neg= facing_neg_base &((~facing_pos_base)&uvec3(1,1,1)); +#else + uvec3 facing_pos = (uvec3(facing) >> uvec3(0, 1, 2)) & uvec3(1, 1, 1); + uvec3 facing_neg = (uvec3(facing) >> uvec3(3, 4, 5)) & uvec3(1, 1, 1); +#endif + bvec3 read_valid = bvec3(mix(facing_neg, facing_pos, greaterThan(read_dir, ivec3(0)))); + + //sides + if (read_valid.x) { + ivec3 read_offset = local_offset + read_dir_x; + uint f = get_facing(read_offset); + if (f == 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; + avg += 1.0; + } + } + } + + if (read_valid.y) { + ivec3 read_offset = local_offset + read_dir_y; + uint f = get_facing(read_offset); + if (f == 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; + avg += 1.0; + } + } + } + + if (read_valid.z) { + ivec3 read_offset = local_offset + read_dir_z; + uint f = get_facing(read_offset); + if (f == 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; + avg += 1.0; + } + } + } + + //adjacents + + if (all(read_valid.yz)) { + ivec3 read_offset = local_offset + read_dir_y + read_dir_z; + uint f = get_facing(read_offset); + if (f == 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; + avg += 1.0; + } + } + } + + if (all(read_valid.xz)) { + ivec3 read_offset = local_offset + read_dir_x + read_dir_z; + uint f = get_facing(read_offset); + if (f == 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; + avg += 1.0; + } + } + } + + if (all(read_valid.xy)) { + ivec3 read_offset = local_offset + read_dir_x + read_dir_y; + uint f = get_facing(read_offset); + if (f == 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; + avg += 1.0; + } + } + } + + //diagonal + + if (all(read_valid)) { + ivec3 read_offset = local_offset + read_dir; + uint f = get_facing(read_offset); + if (f == 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; + avg += 1.0; + } + } + } + + if (avg > 0.0) { + occ /= avg; + } + + imageStore(dst_occlusion[params.occlusion_index], offset, vec4(occ)); + } + } + } + +#endif + +#if 1 + groupMemoryBarrier(); + barrier(); + + for (int i = 0; i < 64; i++) { + ivec3 local_offset = local_ofs + ((ivec3(i) >> ivec3(0, 2, 4)) & ivec3(3, 3, 3)); + ivec3 offset = region_offset + local_offset; + + if (all(greaterThanEqual(offset, ivec3(0))) && all(lessThan(offset, ivec3(params.grid_size)))) { + uint facing = get_facing(local_offset); + + if (facing == 0) { + ivec3 proc_pos = local_offset - ivec3(OCCLUSION_SIZE); + proc_pos += mix(ivec3(0), ivec3(1), greaterThanEqual(proc_pos, ivec3(0))); + + ivec3 proc_abs = abs(proc_pos); + + ivec3 read_dir = sign(proc_pos); //opposite direction + ivec3 read_dir_x = ivec3(read_dir.x, 0, 0); + ivec3 read_dir_y = ivec3(0, read_dir.y, 0); + ivec3 read_dir_z = ivec3(0, 0, read_dir.z); + //solid + uvec3 read_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), greaterThan(read_dir, ivec3(0))); //match positive with negative normals + uvec3 block_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), lessThan(read_dir, ivec3(0))); //match positive with negative normals + + block_mask = uvec3(0); + + float visible = 0.0; + float occlude_total = 0.0; + + if (proc_abs.x < OCCLUSION_SIZE) { + ivec3 read_offset = local_offset + read_dir_x; + uint x_mask = get_facing(read_offset); + if (x_mask != 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occlude_total += 1.0; + if (bool(x_mask & read_mask.x) && !bool(x_mask & block_mask.x)) { + visible += 1.0; + } + } + } + } + + if (proc_abs.y < OCCLUSION_SIZE) { + ivec3 read_offset = local_offset + read_dir_y; + uint y_mask = get_facing(read_offset); + if (y_mask != 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occlude_total += 1.0; + if (bool(y_mask & read_mask.y) && !bool(y_mask & block_mask.y)) { + visible += 1.0; + } + } + } + } + + if (proc_abs.z < OCCLUSION_SIZE) { + ivec3 read_offset = local_offset + read_dir_z; + uint z_mask = get_facing(read_offset); + if (z_mask != 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occlude_total += 1.0; + if (bool(z_mask & read_mask.z) && !bool(z_mask & block_mask.z)) { + visible += 1.0; + } + } + } + } + + //if near the cartesian plane, test in opposite direction too + + read_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), lessThan(read_dir, ivec3(0))); //match negative with positive normals + block_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), greaterThan(read_dir, ivec3(0))); //match negative with positive normals + block_mask = uvec3(0); + + if (proc_abs.x == 1) { + ivec3 read_offset = local_offset - read_dir_x; + uint x_mask = get_facing(read_offset); + if (x_mask != 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occlude_total += 1.0; + if (bool(x_mask & read_mask.x) && !bool(x_mask & block_mask.x)) { + visible += 1.0; + } + } + } + } + + if (proc_abs.y == 1) { + ivec3 read_offset = local_offset - read_dir_y; + uint y_mask = get_facing(read_offset); + if (y_mask != 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occlude_total += 1.0; + if (bool(y_mask & read_mask.y) && !bool(y_mask & block_mask.y)) { + visible += 1.0; + } + } + } + } + + if (proc_abs.z == 1) { + ivec3 read_offset = local_offset - read_dir_z; + uint z_mask = get_facing(read_offset); + if (z_mask != 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occlude_total += 1.0; + if (bool(z_mask & read_mask.z) && !bool(z_mask & block_mask.z)) { + visible += 1.0; + } + } + } + } + + if (occlude_total > 0.0) { + float occ = imageLoad(dst_occlusion[params.occlusion_index], offset).r; + occ *= visible / occlude_total; + imageStore(dst_occlusion[params.occlusion_index], offset, vec4(occ)); + } + } + } + } + +#endif + + /* + for(int i=0;i<8;i++) { + ivec3 local_offset = local_pos + ((ivec3(i) >> ivec3(2,1,0)) & ivec3(1,1,1)) * OCCLUSION_SIZE; + ivec3 offset = local_offset - ivec3(OCCLUSION_SIZE); //looking around probe, so starts negative + offset += region * OCCLUSION_SIZE * 2; //offset by region + offset += params.probe_offset * OCCLUSION_SIZE; // offset by probe offset + if (all(greaterThanEqual(offset,ivec3(0))) && all(lessThan(offset,ivec3(params.grid_size)))) { + imageStore(dst_occlusion[params.occlusion_index],offset,vec4( occlusion_data[ to_linear(local_offset) ] )); + //imageStore(dst_occlusion[params.occlusion_index],offset,vec4( occlusion_solid[ to_linear(local_offset) ] )); + } + } +*/ + +#endif + +#ifdef MODE_STORE + + ivec3 local = ivec3(gl_LocalInvocationID.xyz); + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + // store SDF + uvec4 p = imageLoad(src_positions, pos); + + bool solid = false; + float d; + if (ivec3(p.xyz) == pos) { + //solid block + d = 0; + solid = true; + } else { + //distance block + d = 1.0 + length(vec3(p.xyz) - vec3(pos)); + } + + d /= 255.0; + + imageStore(dst_sdf, pos, vec4(d)); + + // STORE OCCLUSION + + uint occlusion = 0; + const uint occlusion_shift[8] = uint[](12, 8, 4, 0, 28, 24, 20, 16); + for (int i = 0; i < 8; i++) { + float occ = imageLoad(src_occlusion[i], pos).r; + occlusion |= uint(clamp(occ * 15.0, 0.0, 15.0)) << occlusion_shift[i]; + } + { + ivec3 occ_pos = pos; + occ_pos.z += params.cascade * params.grid_size; + imageStore(dst_occlusion, occ_pos, uvec4(occlusion & 0xFFFF)); + occ_pos.x += params.grid_size; + imageStore(dst_occlusion, occ_pos, uvec4(occlusion >> 16)); + } + + // STORE POSITIONS + + if (local == ivec3(0)) { + store_position_count = 0; //base one stores as zero, the others wait + } + + groupMemoryBarrier(); + barrier(); + + if (solid) { + uint index = atomicAdd(store_position_count, 1); + // At least do the conversion work in parallel + store_positions[index].position = uint(pos.x | (pos.y << 7) | (pos.z << 14)); + + //see around which voxels point to this one, add them to the list + uint bit_index = 0; + uint neighbour_bits = 0; + for (int i = -1; i <= 1; i++) { + for (int j = -1; j <= 1; j++) { + for (int k = -1; k <= 1; k++) { + if (i == 0 && j == 0 && k == 0) { + continue; + } + ivec3 npos = pos + ivec3(i, j, k); + if (all(greaterThanEqual(npos, ivec3(0))) && all(lessThan(npos, ivec3(params.grid_size)))) { + p = imageLoad(src_positions, npos); + if (ivec3(p.xyz) == pos) { + neighbour_bits |= (1 << bit_index); + } + } + bit_index++; + } + } + } + + uint rgb = imageLoad(src_albedo, pos).r; + uint facing = imageLoad(src_facing, pos).r; + + store_positions[index].albedo = rgb >> 1; //store as it comes (555) to avoid precision loss (and move away the alpha bit) + store_positions[index].albedo |= (facing & 0x3F) << 15; // store facing in bits 15-21 + + store_positions[index].albedo |= neighbour_bits << 21; //store lower 11 bits of neighbours with remaining albedo + store_positions[index].position |= (neighbour_bits >> 11) << 21; //store 11 bits more of neighbours with position + + store_positions[index].light = imageLoad(src_light, pos).r; + store_positions[index].light_aniso = imageLoad(src_light_aniso, pos).r; + //add neighbours + store_positions[index].light |= (neighbour_bits >> 22) << 30; //store 2 bits more of neighbours with light + store_positions[index].light_aniso |= (neighbour_bits >> 24) << 30; //store 2 bits more of neighbours with aniso + } + + groupMemoryBarrier(); + barrier(); + + // global increment only once per group, to reduce pressure + + if (local == ivec3(0) && store_position_count > 0) { + store_from_index = atomicAdd(dispatch_data.total_count, store_position_count); + uint group_count = (store_from_index + store_position_count - 1) / 64 + 1; + atomicMax(dispatch_data.x, group_count); + } + + groupMemoryBarrier(); + barrier(); + + uint read_index = uint(local.z * 4 * 4 + local.y * 4 + local.x); + uint write_index = store_from_index + read_index; + + if (read_index < store_position_count) { + dst_process_voxels.data[write_index] = store_positions[read_index]; + } + + if (pos == ivec3(0)) { + //this thread clears y and z + dispatch_data.y = 1; + dispatch_data.z = 1; + } +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/skeleton.glsl b/servers/rendering/renderer_rd/shaders/skeleton.glsl new file mode 100644 index 0000000000..680d1045cd --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/skeleton.glsl @@ -0,0 +1,199 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 1, std430) buffer restrict writeonly DstVertexData { + uint data[]; +} +dst_vertices; + +layout(set = 0, binding = 2, std430) buffer restrict readonly BlendShapeWeights { + float data[]; +} +blend_shape_weights; + +layout(set = 1, binding = 0, std430) buffer restrict readonly SrcVertexData { + uint data[]; +} +src_vertices; + +layout(set = 1, binding = 1, std430) buffer restrict readonly BoneWeightData { + uint data[]; +} +src_bone_weights; + +layout(set = 1, binding = 2, std430) buffer restrict readonly BlendShapeData { + uint data[]; +} +src_blend_shapes; + +layout(set = 2, binding = 0, std430) buffer restrict readonly SkeletonData { + vec4 data[]; +} +bone_transforms; + +layout(push_constant, binding = 0, std430) uniform Params { + bool has_normal; + bool has_tangent; + bool has_skeleton; + bool has_blend_shape; + + uint vertex_count; + uint vertex_stride; + uint skin_stride; + uint skin_weight_offset; + + uint blend_shape_count; + bool normalized_blend_shapes; + uint pad0; + uint pad1; +} +params; + +vec4 decode_abgr_2_10_10_10(uint base) { + uvec4 abgr_2_10_10_10 = (uvec4(base) >> uvec4(0, 10, 20, 30)) & uvec4(0x3FF, 0x3FF, 0x3FF, 0x3); + return vec4(abgr_2_10_10_10) / vec4(1023.0, 1023.0, 1023.0, 3.0) * 2.0 - 1.0; +} + +uint encode_abgr_2_10_10_10(vec4 base) { + uvec4 abgr_2_10_10_10 = uvec4(clamp(ivec4((base * 0.5 + 0.5) * vec4(1023.0, 1023.0, 1023.0, 3.0)), ivec4(0), ivec4(0x3FF, 0x3FF, 0x3FF, 0x3))) << uvec4(0, 10, 20, 30); + return abgr_2_10_10_10.x | abgr_2_10_10_10.y | abgr_2_10_10_10.z | abgr_2_10_10_10.w; +} + +void main() { + uint index = gl_GlobalInvocationID.x; + if (index >= params.vertex_count) { + return; + } + + uint src_offset = index * params.vertex_stride; + +#ifdef MODE_2D + vec2 vertex = uintBitsToFloat(uvec2(src_vertices.data[src_offset + 0], src_vertices.data[src_offset + 1])); +#else + vec3 vertex; + vec3 normal; + vec4 tangent; + + vertex = uintBitsToFloat(uvec3(src_vertices.data[src_offset + 0], src_vertices.data[src_offset + 1], src_vertices.data[src_offset + 2])); + + src_offset += 3; + + if (params.has_normal) { + normal = decode_abgr_2_10_10_10(src_vertices.data[src_offset]).rgb; + src_offset++; + } + + if (params.has_tangent) { + tangent = decode_abgr_2_10_10_10(src_vertices.data[src_offset]); + } + + if (params.has_blend_shape) { + float blend_total = 0.0; + vec3 blend_vertex = vec3(0.0); + vec3 blend_normal = vec3(0.0); + vec3 blend_tangent = vec3(0.0); + + for (uint i = 0; i < params.blend_shape_count; i++) { + float w = blend_shape_weights.data[i]; + if (abs(w) > 0.0001) { + uint base_offset = (params.vertex_count * i + index) * params.vertex_stride; + + blend_vertex += uintBitsToFloat(uvec3(src_blend_shapes.data[base_offset + 0], src_blend_shapes.data[base_offset + 1], src_blend_shapes.data[base_offset + 2])) * w; + + base_offset += 3; + + if (params.has_normal) { + blend_normal += decode_abgr_2_10_10_10(src_blend_shapes.data[base_offset]).rgb * w; + base_offset++; + } + + if (params.has_tangent) { + blend_tangent += decode_abgr_2_10_10_10(src_blend_shapes.data[base_offset]).rgb; + } + + blend_total += w; + } + } + + if (params.normalized_blend_shapes) { + vertex = (1.0 - blend_total) * vertex; + normal = (1.0 - blend_total) * normal; + tangent.rgb = (1.0 - blend_total) * tangent.rgb; + } + + vertex += blend_vertex; + normal += normalize(normal + blend_normal); + tangent.rgb += normalize(tangent.rgb + blend_tangent); + } + + if (params.has_skeleton) { + uint skin_offset = params.skin_stride * index; + + uvec2 bones = uvec2(src_bone_weights.data[skin_offset + 0], src_bone_weights.data[skin_offset + 1]); + uvec2 bones_01 = uvec2(bones.x & 0xFFFF, bones.x >> 16) * 3; //pre-add xform offset + uvec2 bones_23 = uvec2(bones.y & 0xFFFF, bones.y >> 16) * 3; + + skin_offset += params.skin_weight_offset; + + uvec2 weights = uvec2(src_bone_weights.data[skin_offset + 0], src_bone_weights.data[skin_offset + 1]); + + vec2 weights_01 = unpackUnorm2x16(weights.x); + vec2 weights_23 = unpackUnorm2x16(weights.y); + + mat4 m = mat4(bone_transforms.data[bones_01.x], bone_transforms.data[bones_01.x + 1], bone_transforms.data[bones_01.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.x; + m += mat4(bone_transforms.data[bones_01.y], bone_transforms.data[bones_01.y + 1], bone_transforms.data[bones_01.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.y; + m += mat4(bone_transforms.data[bones_23.x], bone_transforms.data[bones_23.x + 1], bone_transforms.data[bones_23.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.x; + m += mat4(bone_transforms.data[bones_23.y], bone_transforms.data[bones_23.y + 1], bone_transforms.data[bones_23.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.y; + + if (params.skin_weight_offset == 4) { + //using 8 bones/weights + skin_offset = params.skin_stride * index + 2; + + bones = uvec2(src_bone_weights.data[skin_offset + 0], src_bone_weights.data[skin_offset + 1]); + bones_01 = uvec2(bones.x & 0xFFFF, bones.x >> 16) * 3; //pre-add xform offset + bones_23 = uvec2(bones.y & 0xFFFF, bones.y >> 16) * 3; + + skin_offset += params.skin_weight_offset; + + weights = uvec2(src_bone_weights.data[skin_offset + 0], src_bone_weights.data[skin_offset + 1]); + + weights_01 = unpackUnorm2x16(weights.x); + weights_23 = unpackUnorm2x16(weights.y); + + m += mat4(bone_transforms.data[bones_01.x], bone_transforms.data[bones_01.x + 1], bone_transforms.data[bones_01.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.x; + m += mat4(bone_transforms.data[bones_01.y], bone_transforms.data[bones_01.y + 1], bone_transforms.data[bones_01.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.y; + m += mat4(bone_transforms.data[bones_23.x], bone_transforms.data[bones_23.x + 1], bone_transforms.data[bones_23.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.x; + m += mat4(bone_transforms.data[bones_23.y], bone_transforms.data[bones_23.y + 1], bone_transforms.data[bones_23.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.y; + } + + //reverse order because its transposed + vertex = (vec4(vertex, 1.0) * m).xyz; + normal = normalize((vec4(normal, 0.0) * m).xyz); + tangent.xyz = normalize((vec4(tangent.xyz, 0.0) * m).xyz); + } + + uint dst_offset = index * params.vertex_stride; + + uvec3 uvertex = floatBitsToUint(vertex); + dst_vertices.data[dst_offset + 0] = uvertex.x; + dst_vertices.data[dst_offset + 1] = uvertex.y; + dst_vertices.data[dst_offset + 2] = uvertex.z; + + dst_offset += 3; + + if (params.has_normal) { + dst_vertices.data[dst_offset] = encode_abgr_2_10_10_10(vec4(normal, 0.0)); + dst_offset++; + } + + if (params.has_tangent) { + dst_vertices.data[dst_offset] = encode_abgr_2_10_10_10(tangent); + } + +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/sky.glsl b/servers/rendering/renderer_rd/shaders/sky.glsl new file mode 100644 index 0000000000..6c985e1f5c --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/sky.glsl @@ -0,0 +1,250 @@ +#[vertex] + +#version 450 + +VERSION_DEFINES + +layout(location = 0) out vec2 uv_interp; + +layout(push_constant, binding = 1, std430) uniform Params { + mat3 orientation; + vec4 proj; + vec4 position_multiplier; + float time; +} +params; + +void main() { + vec2 base_arr[4] = vec2[](vec2(-1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0), vec2(1.0, -1.0)); + uv_interp = base_arr[gl_VertexIndex]; + gl_Position = vec4(uv_interp, 1.0, 1.0); +} + +#[fragment] + +#version 450 + +VERSION_DEFINES + +#define M_PI 3.14159265359 + +layout(location = 0) in vec2 uv_interp; + +layout(push_constant, binding = 1, std430) uniform Params { + mat3 orientation; + vec4 proj; + vec4 position_multiplier; + float time; //TODO consider adding vec2 screen res, and float radiance size +} +params; + +#define SAMPLER_NEAREST_CLAMP 0 +#define SAMPLER_LINEAR_CLAMP 1 +#define SAMPLER_NEAREST_WITH_MIPMAPS_CLAMP 2 +#define SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP 3 +#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_CLAMP 4 +#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_CLAMP 5 +#define SAMPLER_NEAREST_REPEAT 6 +#define SAMPLER_LINEAR_REPEAT 7 +#define SAMPLER_NEAREST_WITH_MIPMAPS_REPEAT 8 +#define SAMPLER_LINEAR_WITH_MIPMAPS_REPEAT 9 +#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_REPEAT 10 +#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_REPEAT 11 + +layout(set = 0, binding = 0) uniform sampler material_samplers[12]; + +layout(set = 0, binding = 1, std430) restrict readonly buffer GlobalVariableData { + vec4 data[]; +} +global_variables; + +layout(set = 0, binding = 2, std140) uniform SceneData { + bool volumetric_fog_enabled; + float volumetric_fog_inv_length; + float volumetric_fog_detail_spread; + + float fog_aerial_perspective; + + vec3 fog_light_color; + float fog_sun_scatter; + + bool fog_enabled; + float fog_density; + + float z_far; + uint directional_light_count; +} +scene_data; + +struct DirectionalLightData { + vec4 direction_energy; + vec4 color_size; + bool enabled; +}; + +layout(set = 0, binding = 3, std140) uniform DirectionalLights { + DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; +} + +directional_lights; + +#ifdef USE_MATERIAL_UNIFORMS +layout(set = 1, binding = 0, std140) uniform MaterialUniforms{ + /* clang-format off */ + +MATERIAL_UNIFORMS + + /* clang-format on */ +} material; +#endif + +layout(set = 2, binding = 0) uniform textureCube radiance; +#ifdef USE_CUBEMAP_PASS +layout(set = 2, binding = 1) uniform textureCube half_res; +layout(set = 2, binding = 2) uniform textureCube quarter_res; +#else +layout(set = 2, binding = 1) uniform texture2D half_res; +layout(set = 2, binding = 2) uniform texture2D quarter_res; +#endif + +layout(set = 3, binding = 0) uniform texture3D volumetric_fog_texture; + +#ifdef USE_CUBEMAP_PASS +#define AT_CUBEMAP_PASS true +#else +#define AT_CUBEMAP_PASS false +#endif + +#ifdef USE_HALF_RES_PASS +#define AT_HALF_RES_PASS true +#else +#define AT_HALF_RES_PASS false +#endif + +#ifdef USE_QUARTER_RES_PASS +#define AT_QUARTER_RES_PASS true +#else +#define AT_QUARTER_RES_PASS false +#endif + +/* clang-format off */ + +FRAGMENT_SHADER_GLOBALS + +/* clang-format on */ + +layout(location = 0) out vec4 frag_color; + +vec4 volumetric_fog_process(vec2 screen_uv) { + vec3 fog_pos = vec3(screen_uv, 1.0); + + return texture(sampler3D(volumetric_fog_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), fog_pos); +} + +vec4 fog_process(vec3 view, vec3 sky_color) { + vec3 fog_color = mix(scene_data.fog_light_color, sky_color, scene_data.fog_aerial_perspective); + + if (scene_data.fog_sun_scatter > 0.001) { + vec4 sun_scatter = vec4(0.0); + float sun_total = 0.0; + for (uint i = 0; i < scene_data.directional_light_count; i++) { + vec3 light_color = directional_lights.data[i].color_size.xyz * directional_lights.data[i].direction_energy.w; + float light_amount = pow(max(dot(view, directional_lights.data[i].direction_energy.xyz), 0.0), 8.0); + fog_color += light_color * light_amount * scene_data.fog_sun_scatter; + } + } + + float fog_amount = clamp(1.0 - exp(-scene_data.z_far * scene_data.fog_density), 0.0, 1.0); + + return vec4(fog_color, fog_amount); +} + +void main() { + vec3 cube_normal; + cube_normal.z = -1.0; + cube_normal.x = (cube_normal.z * (-uv_interp.x - params.proj.x)) / params.proj.y; + cube_normal.y = -(cube_normal.z * (-uv_interp.y - params.proj.z)) / params.proj.w; + cube_normal = mat3(params.orientation) * cube_normal; + cube_normal.z = -cube_normal.z; + cube_normal = normalize(cube_normal); + + vec2 uv = uv_interp * 0.5 + 0.5; + + vec2 panorama_coords = vec2(atan(cube_normal.x, cube_normal.z), acos(cube_normal.y)); + + if (panorama_coords.x < 0.0) { + panorama_coords.x += M_PI * 2.0; + } + + panorama_coords /= vec2(M_PI * 2.0, M_PI); + + vec3 color = vec3(0.0, 0.0, 0.0); + float alpha = 1.0; // Only available to subpasses + vec4 half_res_color = vec4(1.0); + vec4 quarter_res_color = vec4(1.0); + vec4 custom_fog = vec4(0.0); + +#ifdef USE_CUBEMAP_PASS + vec3 inverted_cube_normal = cube_normal; + inverted_cube_normal.z *= -1.0; +#ifdef USES_HALF_RES_COLOR + half_res_color = texture(samplerCube(half_res, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), inverted_cube_normal); +#endif +#ifdef USES_QUARTER_RES_COLOR + quarter_res_color = texture(samplerCube(quarter_res, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), inverted_cube_normal); +#endif +#else +#ifdef USES_HALF_RES_COLOR + half_res_color = textureLod(sampler2D(half_res, material_samplers[SAMPLER_LINEAR_CLAMP]), uv, 0.0); +#endif +#ifdef USES_QUARTER_RES_COLOR + quarter_res_color = textureLod(sampler2D(quarter_res, material_samplers[SAMPLER_LINEAR_CLAMP]), uv, 0.0); +#endif +#endif + +// unused, just here to make our compiler happy, make sure we don't execute any light code the user adds in.. +#ifndef REALLYINCLUDETHIS + { + /* clang-format off */ + +LIGHT_SHADER_CODE + + /* clang-format on */ + } +#endif + { + /* clang-format off */ + +FRAGMENT_SHADER_CODE + + /* clang-format on */ + } + + frag_color.rgb = color * params.position_multiplier.w; + frag_color.a = alpha; + +#if !defined(DISABLE_FOG) && !defined(USE_CUBEMAP_PASS) + + // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky. + if (scene_data.fog_enabled) { + vec4 fog = fog_process(cube_normal, frag_color.rgb); + frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); + } + + if (scene_data.volumetric_fog_enabled) { + vec4 fog = volumetric_fog_process(uv); + frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); + } + + if (custom_fog.a > 0.0) { + frag_color.rgb = mix(frag_color.rgb, custom_fog.rgb, custom_fog.a); + } + +#endif // DISABLE_FOG + + // Blending is disabled for Sky, so alpha doesn't blend + // alpha is used for subsurface scattering so make sure it doesn't get applied to Sky + if (!AT_CUBEMAP_PASS && !AT_HALF_RES_PASS && !AT_QUARTER_RES_PASS) { + frag_color.a = 0.0; + } +} diff --git a/servers/rendering/renderer_rd/shaders/sort.glsl b/servers/rendering/renderer_rd/shaders/sort.glsl new file mode 100644 index 0000000000..e5ebb9c64b --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/sort.glsl @@ -0,0 +1,203 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +// Original version here: +// https://github.com/GPUOpen-LibrariesAndSDKs/GPUParticles11/blob/master/gpuparticles11/src/Shaders + +// +// Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +#define SORT_SIZE 512 +#define NUM_THREADS (SORT_SIZE / 2) +#define INVERSION (16 * 2 + 8 * 3) +#define ITERATIONS 1 + +layout(local_size_x = NUM_THREADS, local_size_y = 1, local_size_z = 1) in; + +#ifndef MODE_SORT_STEP + +shared vec2 g_LDS[SORT_SIZE]; + +#endif + +layout(set = 1, binding = 0, std430) restrict buffer SortBuffer { + vec2 data[]; +} +sort_buffer; + +layout(push_constant, binding = 0, std430) uniform Params { + uint total_elements; + uint pad[3]; + ivec4 job_params; +} +params; + +void main() { +#ifdef MODE_SORT_BLOCK + + uvec3 Gid = gl_WorkGroupID; + uvec3 DTid = gl_GlobalInvocationID; + uvec3 GTid = gl_LocalInvocationID; + uint GI = gl_LocalInvocationIndex; + + int GlobalBaseIndex = int((Gid.x * SORT_SIZE) + GTid.x); + int LocalBaseIndex = int(GI); + int numElementsInThreadGroup = int(min(SORT_SIZE, params.total_elements - (Gid.x * SORT_SIZE))); + + // Load shared data + + int i; + for (i = 0; i < 2 * ITERATIONS; ++i) { + if (GI + i * NUM_THREADS < numElementsInThreadGroup) + g_LDS[LocalBaseIndex + i * NUM_THREADS] = sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS]; + } + + groupMemoryBarrier(); + barrier(); + + // Bitonic sort + for (int nMergeSize = 2; nMergeSize <= SORT_SIZE; nMergeSize = nMergeSize * 2) { + for (int nMergeSubSize = nMergeSize >> 1; nMergeSubSize > 0; nMergeSubSize = nMergeSubSize >> 1) { + for (i = 0; i < ITERATIONS; ++i) { + int tmp_index = int(GI + NUM_THREADS * i); + int index_low = tmp_index & (nMergeSubSize - 1); + int index_high = 2 * (tmp_index - index_low); + int index = index_high + index_low; + + int nSwapElem = nMergeSubSize == nMergeSize >> 1 ? index_high + (2 * nMergeSubSize - 1) - index_low : index_high + nMergeSubSize + index_low; + if (nSwapElem < numElementsInThreadGroup) { + vec2 a = g_LDS[index]; + vec2 b = g_LDS[nSwapElem]; + + if (a.x > b.x) { + g_LDS[index] = b; + g_LDS[nSwapElem] = a; + } + } + groupMemoryBarrier(); + barrier(); + } + } + } + + // Store shared data + for (i = 0; i < 2 * ITERATIONS; ++i) { + if (GI + i * NUM_THREADS < numElementsInThreadGroup) { + sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS] = g_LDS[LocalBaseIndex + i * NUM_THREADS]; + } + } + +#endif + +#ifdef MODE_SORT_STEP + + uvec3 Gid = gl_WorkGroupID; + uvec3 GTid = gl_LocalInvocationID; + + ivec4 tgp; + + tgp.x = int(Gid.x) * 256; + tgp.y = 0; + tgp.z = int(params.total_elements); + tgp.w = min(512, max(0, tgp.z - int(Gid.x) * 512)); + + uint localID = int(tgp.x) + GTid.x; // calculate threadID within this sortable-array + + uint index_low = localID & (params.job_params.x - 1); + uint index_high = 2 * (localID - index_low); + + uint index = tgp.y + index_high + index_low; + uint nSwapElem = tgp.y + index_high + params.job_params.y + params.job_params.z * index_low; + + if (nSwapElem < tgp.y + tgp.z) { + vec2 a = sort_buffer.data[index]; + vec2 b = sort_buffer.data[nSwapElem]; + + if (a.x > b.x) { + sort_buffer.data[index] = b; + sort_buffer.data[nSwapElem] = a; + } + } + +#endif + +#ifdef MODE_SORT_INNER + + uvec3 Gid = gl_WorkGroupID; + uvec3 DTid = gl_GlobalInvocationID; + uvec3 GTid = gl_LocalInvocationID; + uint GI = gl_LocalInvocationIndex; + + ivec4 tgp; + + tgp.x = int(Gid.x * 256); + tgp.y = 0; + tgp.z = int(params.total_elements.x); + tgp.w = int(min(512, max(0, params.total_elements - Gid.x * 512))); + + int GlobalBaseIndex = int(tgp.y + tgp.x * 2 + GTid.x); + int LocalBaseIndex = int(GI); + int i; + + // Load shared data + for (i = 0; i < 2; ++i) { + if (GI + i * NUM_THREADS < tgp.w) + g_LDS[LocalBaseIndex + i * NUM_THREADS] = sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS]; + } + + groupMemoryBarrier(); + barrier(); + + // sort threadgroup shared memory + for (int nMergeSubSize = SORT_SIZE >> 1; nMergeSubSize > 0; nMergeSubSize = nMergeSubSize >> 1) { + int tmp_index = int(GI); + int index_low = tmp_index & (nMergeSubSize - 1); + int index_high = 2 * (tmp_index - index_low); + int index = index_high + index_low; + + int nSwapElem = index_high + nMergeSubSize + index_low; + + if (nSwapElem < tgp.w) { + vec2 a = g_LDS[index]; + vec2 b = g_LDS[nSwapElem]; + + if (a.x > b.x) { + g_LDS[index] = b; + g_LDS[nSwapElem] = a; + } + } + groupMemoryBarrier(); + barrier(); + } + + // Store shared data + for (i = 0; i < 2; ++i) { + if (GI + i * NUM_THREADS < tgp.w) { + sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS] = g_LDS[LocalBaseIndex + i * NUM_THREADS]; + } + } + +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/specular_merge.glsl b/servers/rendering/renderer_rd/shaders/specular_merge.glsl new file mode 100644 index 0000000000..0b8f406213 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/specular_merge.glsl @@ -0,0 +1,53 @@ +#[vertex] + +#version 450 + +VERSION_DEFINES + +layout(location = 0) out vec2 uv_interp; + +void main() { + vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0)); + uv_interp = base_arr[gl_VertexIndex]; + + gl_Position = vec4(uv_interp * 2.0 - 1.0, 0.0, 1.0); +} + +#[fragment] + +#version 450 + +VERSION_DEFINES + +layout(location = 0) in vec2 uv_interp; + +layout(set = 0, binding = 0) uniform sampler2D specular; + +#ifdef MODE_SSR + +layout(set = 1, binding = 0) uniform sampler2D ssr; + +#endif + +#ifdef MODE_MERGE + +layout(set = 2, binding = 0) uniform sampler2D diffuse; + +#endif + +layout(location = 0) out vec4 frag_color; + +void main() { + frag_color.rgb = texture(specular, uv_interp).rgb; + frag_color.a = 0.0; +#ifdef MODE_SSR + + vec4 ssr_color = texture(ssr, uv_interp); + frag_color.rgb = mix(frag_color.rgb, ssr_color.rgb, ssr_color.a); +#endif + +#ifdef MODE_MERGE + frag_color += texture(diffuse, uv_interp); +#endif + //added using additive blend +} diff --git a/servers/rendering/renderer_rd/shaders/ssao.glsl b/servers/rendering/renderer_rd/shaders/ssao.glsl new file mode 100644 index 0000000000..231f8f91ec --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/ssao.glsl @@ -0,0 +1,486 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +VERSION_DEFINES + +#define SSAO_ADAPTIVE_TAP_BASE_COUNT 5 + +#define INTELSSAO_MAIN_DISK_SAMPLE_COUNT (32) +const vec4 sample_pattern[INTELSSAO_MAIN_DISK_SAMPLE_COUNT] = { + vec4(0.78488064, 0.56661671, 1.500000, -0.126083), vec4(0.26022232, -0.29575172, 1.500000, -1.064030), vec4(0.10459357, 0.08372527, 1.110000, -2.730563), vec4(-0.68286800, 0.04963045, 1.090000, -0.498827), + vec4(-0.13570161, -0.64190155, 1.250000, -0.532765), vec4(-0.26193795, -0.08205118, 0.670000, -1.783245), vec4(-0.61177456, 0.66664219, 0.710000, -0.044234), vec4(0.43675563, 0.25119025, 0.610000, -1.167283), + vec4(0.07884444, 0.86618668, 0.640000, -0.459002), vec4(-0.12790935, -0.29869005, 0.600000, -1.729424), vec4(-0.04031125, 0.02413622, 0.600000, -4.792042), vec4(0.16201244, -0.52851415, 0.790000, -1.067055), + vec4(-0.70991218, 0.47301072, 0.640000, -0.335236), vec4(0.03277707, -0.22349690, 0.600000, -1.982384), vec4(0.68921727, 0.36800742, 0.630000, -0.266718), vec4(0.29251814, 0.37775412, 0.610000, -1.422520), + vec4(-0.12224089, 0.96582592, 0.600000, -0.426142), vec4(0.11071457, -0.16131058, 0.600000, -2.165947), vec4(0.46562141, -0.59747696, 0.600000, -0.189760), vec4(-0.51548797, 0.11804193, 0.600000, -1.246800), + vec4(0.89141309, -0.42090443, 0.600000, 0.028192), vec4(-0.32402530, -0.01591529, 0.600000, -1.543018), vec4(0.60771245, 0.41635221, 0.600000, -0.605411), vec4(0.02379565, -0.08239821, 0.600000, -3.809046), + vec4(0.48951152, -0.23657045, 0.600000, -1.189011), vec4(-0.17611565, -0.81696892, 0.600000, -0.513724), vec4(-0.33930185, -0.20732205, 0.600000, -1.698047), vec4(-0.91974425, 0.05403209, 0.600000, 0.062246), + vec4(-0.15064627, -0.14949332, 0.600000, -1.896062), vec4(0.53180975, -0.35210401, 0.600000, -0.758838), vec4(0.41487166, 0.81442589, 0.600000, -0.505648), vec4(-0.24106961, -0.32721516, 0.600000, -1.665244) +}; + +// these values can be changed (up to SSAO_MAX_TAPS) with no changes required elsewhere; values for 4th and 5th preset are ignored but array needed to avoid compilation errors +// the actual number of texture samples is two times this value (each "tap" has two symmetrical depth texture samples) +const int num_taps[5] = { 3, 5, 12, 0, 0 }; + +#define SSAO_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET (99) // to disable simply set to 99 or similar +#define SSAO_TILT_SAMPLES_AMOUNT (0.4) +// +#define SSAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET (1) // to disable simply set to 99 or similar +#define SSAO_HALOING_REDUCTION_AMOUNT (0.6) // values from 0.0 - 1.0, 1.0 means max weighting (will cause artifacts, 0.8 is more reasonable) +// +#define SSAO_NORMAL_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (2) // to disable simply set to 99 or similar +#define SSAO_NORMAL_BASED_EDGES_DOT_THRESHOLD (0.5) // use 0-0.1 for super-sharp normal-based edges +// +#define SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET (1) // whether to use detail; to disable simply set to 99 or similar +// +#define SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (2) // !!warning!! the MIP generation on the C++ side will be enabled on quality preset 2 regardless of this value, so if changing here, change the C++ side too +#define SSAO_DEPTH_MIPS_GLOBAL_OFFSET (-4.3) // best noise/quality/performance tradeoff, found empirically +// +// !!warning!! the edge handling is hard-coded to 'disabled' on quality level 0, and enabled above, on the C++ side; while toggling it here will work for +// testing purposes, it will not yield performance gains (or correct results) +#define SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (1) +// +#define SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET (1) + +#define SSAO_MAX_TAPS 32 +#define SSAO_MAX_REF_TAPS 512 +#define SSAO_ADAPTIVE_TAP_BASE_COUNT 5 +#define SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT (SSAO_MAX_TAPS - SSAO_ADAPTIVE_TAP_BASE_COUNT) +#define SSAO_DEPTH_MIP_LEVELS 4 + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform sampler2DArray source_depth_mipmaps; +layout(rgba8, set = 0, binding = 1) uniform restrict readonly image2D source_normal; +layout(set = 0, binding = 2) uniform Constants { //get into a lower set + vec4 rotation_matrices[20]; +} +constants; + +#ifdef ADAPTIVE +layout(rg8, set = 1, binding = 0) uniform restrict readonly image2DArray source_ssao; +layout(set = 1, binding = 1) uniform sampler2D source_importance; +layout(set = 1, binding = 2, std430) buffer Counter { + uint sum; +} +counter; +#endif + +layout(rg8, set = 2, binding = 0) uniform restrict writeonly image2D dest_image; + +// This push_constant is full - 128 bytes - if you need to add more data, consider adding to the uniform buffer instead +layout(push_constant, binding = 3, std430) uniform Params { + ivec2 screen_size; + int pass; + int quality; + + vec2 half_screen_pixel_size; + int size_multiplier; + float detail_intensity; + + vec2 NDC_to_view_mul; + vec2 NDC_to_view_add; + + vec2 pad2; + vec2 half_screen_pixel_size_x025; + + float radius; + float intensity; + float shadow_power; + float shadow_clamp; + + float fade_out_mul; + float fade_out_add; + float horizon_angle_threshold; + float inv_radius_near_limit; + + bool is_orthogonal; + float neg_inv_radius; + float load_counter_avg_div; + float adaptive_sample_limit; + + ivec2 pass_coord_offset; + vec2 pass_uv_offset; +} +params; + +// packing/unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions! +float pack_edges(vec4 p_edgesLRTB) { + p_edgesLRTB = round(clamp(p_edgesLRTB, 0.0, 1.0) * 3.05); + return dot(p_edgesLRTB, vec4(64.0 / 255.0, 16.0 / 255.0, 4.0 / 255.0, 1.0 / 255.0)); +} + +vec3 NDC_to_view_space(vec2 p_pos, float p_viewspace_depth) { + if (params.is_orthogonal) { + return vec3((params.NDC_to_view_mul * p_pos.xy + params.NDC_to_view_add), p_viewspace_depth); + } else { + return vec3((params.NDC_to_view_mul * p_pos.xy + params.NDC_to_view_add) * p_viewspace_depth, p_viewspace_depth); + } +} + +// calculate effect radius and fit our screen sampling pattern inside it +void calculate_radius_parameters(const float p_pix_center_length, const vec2 p_pixel_size_at_center, out float r_lookup_radius, out float r_radius, out float r_fallof_sq) { + r_radius = params.radius; + + // when too close, on-screen sampling disk will grow beyond screen size; limit this to avoid closeup temporal artifacts + const float too_close_limit = clamp(p_pix_center_length * params.inv_radius_near_limit, 0.0, 1.0) * 0.8 + 0.2; + + r_radius *= too_close_limit; + + // 0.85 is to reduce the radius to allow for more samples on a slope to still stay within influence + r_lookup_radius = (0.85 * r_radius) / p_pixel_size_at_center.x; + + // used to calculate falloff (both for AO samples and per-sample weights) + r_fallof_sq = -1.0 / (r_radius * r_radius); +} + +vec4 calculate_edges(const float p_center_z, const float p_left_z, const float p_right_z, const float p_top_z, const float p_bottom_z) { + // slope-sensitive depth-based edge detection + vec4 edgesLRTB = vec4(p_left_z, p_right_z, p_top_z, p_bottom_z) - p_center_z; + vec4 edgesLRTB_slope_adjusted = edgesLRTB + edgesLRTB.yxwz; + edgesLRTB = min(abs(edgesLRTB), abs(edgesLRTB_slope_adjusted)); + return clamp((1.3 - edgesLRTB / (p_center_z * 0.040)), 0.0, 1.0); +} + +vec3 decode_normal(vec3 p_encoded_normal) { + vec3 normal = p_encoded_normal * 2.0 - 1.0; + return normal; +} + +vec3 load_normal(ivec2 p_pos) { + vec3 encoded_normal = imageLoad(source_normal, p_pos).xyz; + encoded_normal.z = 1.0 - encoded_normal.z; + return decode_normal(encoded_normal); +} + +vec3 load_normal(ivec2 p_pos, ivec2 p_offset) { + vec3 encoded_normal = imageLoad(source_normal, p_pos + p_offset).xyz; + encoded_normal.z = 1.0 - encoded_normal.z; + return decode_normal(encoded_normal); +} + +// all vectors in viewspace +float calculate_pixel_obscurance(vec3 p_pixel_normal, vec3 p_hit_delta, float p_fallof_sq) { + float length_sq = dot(p_hit_delta, p_hit_delta); + float NdotD = dot(p_pixel_normal, p_hit_delta) / sqrt(length_sq); + + float falloff_mult = max(0.0, length_sq * p_fallof_sq + 1.0); + + return max(0, NdotD - params.horizon_angle_threshold) * falloff_mult; +} + +void SSAO_tap_inner(const int p_quality_level, inout float r_obscurance_sum, inout float r_weight_sum, const vec2 p_sampling_uv, const float p_mip_level, const vec3 p_pix_center_pos, vec3 p_pixel_normal, const float p_fallof_sq, const float p_weight_mod) { + // get depth at sample + float viewspace_sample_z = textureLod(source_depth_mipmaps, vec3(p_sampling_uv, params.pass), p_mip_level).x; + + // convert to viewspace + vec3 hit_pos = NDC_to_view_space(p_sampling_uv.xy, viewspace_sample_z).xyz; + vec3 hit_delta = hit_pos - p_pix_center_pos; + + float obscurance = calculate_pixel_obscurance(p_pixel_normal, hit_delta, p_fallof_sq); + float weight = 1.0; + + if (p_quality_level >= SSAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET) { + float reduct = max(0, -hit_delta.z); + reduct = clamp(reduct * params.neg_inv_radius + 2.0, 0.0, 1.0); + weight = SSAO_HALOING_REDUCTION_AMOUNT * reduct + (1.0 - SSAO_HALOING_REDUCTION_AMOUNT); + } + weight *= p_weight_mod; + r_obscurance_sum += obscurance * weight; + r_weight_sum += weight; +} + +void SSAOTap(const int p_quality_level, inout float r_obscurance_sum, inout float r_weight_sum, const int p_tap_index, const mat2 p_rot_scale, const vec3 p_pix_center_pos, vec3 p_pixel_normal, const vec2 p_normalized_screen_pos, const float p_mip_offset, const float p_fallof_sq, float p_weight_mod, vec2 p_norm_xy, float p_norm_xy_length) { + vec2 sample_offset; + float sample_pow_2_len; + + // patterns + { + vec4 new_sample = sample_pattern[p_tap_index]; + sample_offset = new_sample.xy * p_rot_scale; + sample_pow_2_len = new_sample.w; // precalculated, same as: sample_pow_2_len = log2( length( new_sample.xy ) ); + p_weight_mod *= new_sample.z; + } + + // snap to pixel center (more correct obscurance math, avoids artifacts) + sample_offset = round(sample_offset); + + // calculate MIP based on the sample distance from the centre, similar to as described + // in http://graphics.cs.williams.edu/papers/SAOHPG12/. + float mip_level = (p_quality_level < SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (sample_pow_2_len + p_mip_offset); + + vec2 sampling_uv = sample_offset * params.half_screen_pixel_size + p_normalized_screen_pos; + + SSAO_tap_inner(p_quality_level, r_obscurance_sum, r_weight_sum, sampling_uv, mip_level, p_pix_center_pos, p_pixel_normal, p_fallof_sq, p_weight_mod); + + // for the second tap, just use the mirrored offset + vec2 sample_offset_mirrored_uv = -sample_offset; + + // tilt the second set of samples so that the disk is effectively rotated by the normal + // effective at removing one set of artifacts, but too expensive for lower quality settings + if (p_quality_level >= SSAO_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET) { + float dot_norm = dot(sample_offset_mirrored_uv, p_norm_xy); + sample_offset_mirrored_uv -= dot_norm * p_norm_xy_length * p_norm_xy; + sample_offset_mirrored_uv = round(sample_offset_mirrored_uv); + } + + // snap to pixel center (more correct obscurance math, avoids artifacts) + vec2 sampling_mirrored_uv = sample_offset_mirrored_uv * params.half_screen_pixel_size + p_normalized_screen_pos; + + SSAO_tap_inner(p_quality_level, r_obscurance_sum, r_weight_sum, sampling_mirrored_uv, mip_level, p_pix_center_pos, p_pixel_normal, p_fallof_sq, p_weight_mod); +} + +void generate_SSAO_shadows_internal(out float r_shadow_term, out vec4 r_edges, out float r_weight, const vec2 p_pos, int p_quality_level, bool p_adaptive_base) { + vec2 pos_rounded = trunc(p_pos); + uvec2 upos = uvec2(pos_rounded); + + const int number_of_taps = (p_adaptive_base) ? (SSAO_ADAPTIVE_TAP_BASE_COUNT) : (num_taps[p_quality_level]); + float pix_z, pix_left_z, pix_top_z, pix_right_z, pix_bottom_z; + + vec4 valuesUL = textureGather(source_depth_mipmaps, vec3(pos_rounded * params.half_screen_pixel_size, params.pass)); + vec4 valuesBR = textureGather(source_depth_mipmaps, vec3((pos_rounded + vec2(1.0)) * params.half_screen_pixel_size, params.pass)); + + // get this pixel's viewspace depth + pix_z = valuesUL.y; + + // get left right top bottom neighbouring pixels for edge detection (gets compiled out on quality_level == 0) + pix_left_z = valuesUL.x; + pix_top_z = valuesUL.z; + pix_right_z = valuesBR.z; + pix_bottom_z = valuesBR.x; + + vec2 normalized_screen_pos = pos_rounded * params.half_screen_pixel_size + params.half_screen_pixel_size_x025; + vec3 pix_center_pos = NDC_to_view_space(normalized_screen_pos, pix_z); + + // Load this pixel's viewspace normal + uvec2 full_res_coord = upos * 2 * params.size_multiplier + params.pass_coord_offset.xy; + vec3 pixel_normal = load_normal(ivec2(full_res_coord)); + + const vec2 pixel_size_at_center = NDC_to_view_space(normalized_screen_pos.xy + params.half_screen_pixel_size, pix_center_pos.z).xy - pix_center_pos.xy; + + float pixel_lookup_radius; + float fallof_sq; + + // calculate effect radius and fit our screen sampling pattern inside it + float viewspace_radius; + calculate_radius_parameters(length(pix_center_pos), pixel_size_at_center, pixel_lookup_radius, viewspace_radius, fallof_sq); + + // calculate samples rotation/scaling + mat2 rot_scale_matrix; + uint pseudo_random_index; + + { + vec4 rotation_scale; + // reduce effect radius near the screen edges slightly; ideally, one would render a larger depth buffer (5% on each side) instead + if (!p_adaptive_base && (p_quality_level >= SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET)) { + float near_screen_border = min(min(normalized_screen_pos.x, 1.0 - normalized_screen_pos.x), min(normalized_screen_pos.y, 1.0 - normalized_screen_pos.y)); + near_screen_border = clamp(10.0 * near_screen_border + 0.6, 0.0, 1.0); + pixel_lookup_radius *= near_screen_border; + } + + // load & update pseudo-random rotation matrix + pseudo_random_index = uint(pos_rounded.y * 2 + pos_rounded.x) % 5; + rotation_scale = constants.rotation_matrices[params.pass * 5 + pseudo_random_index]; + rot_scale_matrix = mat2(rotation_scale.x * pixel_lookup_radius, rotation_scale.y * pixel_lookup_radius, rotation_scale.z * pixel_lookup_radius, rotation_scale.w * pixel_lookup_radius); + } + + // the main obscurance & sample weight storage + float obscurance_sum = 0.0; + float weight_sum = 0.0; + + // edge mask for between this and left/right/top/bottom neighbour pixels - not used in quality level 0 so initialize to "no edge" (1 is no edge, 0 is edge) + vec4 edgesLRTB = vec4(1.0, 1.0, 1.0, 1.0); + + // Move center pixel slightly towards camera to avoid imprecision artifacts due to using of 16bit depth buffer; a lot smaller offsets needed when using 32bit floats + pix_center_pos *= 0.9992; + + if (!p_adaptive_base && (p_quality_level >= SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { + edgesLRTB = calculate_edges(pix_z, pix_left_z, pix_right_z, pix_top_z, pix_bottom_z); + } + + // adds a more high definition sharp effect, which gets blurred out (reuses left/right/top/bottom samples that we used for edge detection) + if (!p_adaptive_base && (p_quality_level >= SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET)) { + // disable in case of quality level 4 (reference) + if (p_quality_level != 4) { + //approximate neighbouring pixels positions (actually just deltas or "positions - pix_center_pos" ) + vec3 normalized_viewspace_dir = vec3(pix_center_pos.xy / pix_center_pos.zz, 1.0); + vec3 pixel_left_delta = vec3(-pixel_size_at_center.x, 0.0, 0.0) + normalized_viewspace_dir * (pix_left_z - pix_center_pos.z); + vec3 pixel_right_delta = vec3(+pixel_size_at_center.x, 0.0, 0.0) + normalized_viewspace_dir * (pix_right_z - pix_center_pos.z); + vec3 pixel_top_delta = vec3(0.0, -pixel_size_at_center.y, 0.0) + normalized_viewspace_dir * (pix_top_z - pix_center_pos.z); + vec3 pixel_bottom_delta = vec3(0.0, +pixel_size_at_center.y, 0.0) + normalized_viewspace_dir * (pix_bottom_z - pix_center_pos.z); + + const float range_reduction = 4.0f; // this is to avoid various artifacts + const float modified_fallof_sq = range_reduction * fallof_sq; + + vec4 additional_obscurance; + additional_obscurance.x = calculate_pixel_obscurance(pixel_normal, pixel_left_delta, modified_fallof_sq); + additional_obscurance.y = calculate_pixel_obscurance(pixel_normal, pixel_right_delta, modified_fallof_sq); + additional_obscurance.z = calculate_pixel_obscurance(pixel_normal, pixel_top_delta, modified_fallof_sq); + additional_obscurance.w = calculate_pixel_obscurance(pixel_normal, pixel_bottom_delta, modified_fallof_sq); + + obscurance_sum += params.detail_intensity * dot(additional_obscurance, edgesLRTB); + } + } + + // Sharp normals also create edges - but this adds to the cost as well + if (!p_adaptive_base && (p_quality_level >= SSAO_NORMAL_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { + vec3 neighbour_normal_left = load_normal(ivec2(full_res_coord), ivec2(-2, 0)); + vec3 neighbour_normal_right = load_normal(ivec2(full_res_coord), ivec2(2, 0)); + vec3 neighbour_normal_top = load_normal(ivec2(full_res_coord), ivec2(0, -2)); + vec3 neighbour_normal_bottom = load_normal(ivec2(full_res_coord), ivec2(0, 2)); + + const float dot_threshold = SSAO_NORMAL_BASED_EDGES_DOT_THRESHOLD; + + vec4 normal_edgesLRTB; + normal_edgesLRTB.x = clamp((dot(pixel_normal, neighbour_normal_left) + dot_threshold), 0.0, 1.0); + normal_edgesLRTB.y = clamp((dot(pixel_normal, neighbour_normal_right) + dot_threshold), 0.0, 1.0); + normal_edgesLRTB.z = clamp((dot(pixel_normal, neighbour_normal_top) + dot_threshold), 0.0, 1.0); + normal_edgesLRTB.w = clamp((dot(pixel_normal, neighbour_normal_bottom) + dot_threshold), 0.0, 1.0); + + edgesLRTB *= normal_edgesLRTB; + } + + const float global_mip_offset = SSAO_DEPTH_MIPS_GLOBAL_OFFSET; + float mip_offset = (p_quality_level < SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (log2(pixel_lookup_radius) + global_mip_offset); + + // Used to tilt the second set of samples so that the disk is effectively rotated by the normal + // effective at removing one set of artifacts, but too expensive for lower quality settings + vec2 norm_xy = vec2(pixel_normal.x, pixel_normal.y); + float norm_xy_length = length(norm_xy); + norm_xy /= vec2(norm_xy_length, -norm_xy_length); + norm_xy_length *= SSAO_TILT_SAMPLES_AMOUNT; + + // standard, non-adaptive approach + if ((p_quality_level != 3) || p_adaptive_base) { + for (int i = 0; i < number_of_taps; i++) { + SSAOTap(p_quality_level, obscurance_sum, weight_sum, i, rot_scale_matrix, pix_center_pos, pixel_normal, normalized_screen_pos, mip_offset, fallof_sq, 1.0, norm_xy, norm_xy_length); + } + } +#ifdef ADAPTIVE + else { + // add new ones if needed + vec2 full_res_uv = normalized_screen_pos + params.pass_uv_offset.xy; + float importance = textureLod(source_importance, full_res_uv, 0.0).x; + + // this is to normalize SSAO_DETAIL_AO_AMOUNT across all pixel regardless of importance + obscurance_sum *= (SSAO_ADAPTIVE_TAP_BASE_COUNT / float(SSAO_MAX_TAPS)) + (importance * SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT / float(SSAO_MAX_TAPS)); + + // load existing base values + vec2 base_values = imageLoad(source_ssao, ivec3(upos, params.pass)).xy; + weight_sum += base_values.y * float(SSAO_ADAPTIVE_TAP_BASE_COUNT * 4.0); + obscurance_sum += (base_values.x) * weight_sum; + + // increase importance around edges + float edge_count = dot(1.0 - edgesLRTB, vec4(1.0, 1.0, 1.0, 1.0)); + + float avg_total_importance = float(counter.sum) * params.load_counter_avg_div; + + float importance_limiter = clamp(params.adaptive_sample_limit / avg_total_importance, 0.0, 1.0); + importance *= importance_limiter; + + float additional_sample_count = SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT * importance; + + const float blend_range = 3.0; + const float blend_range_inv = 1.0 / blend_range; + + additional_sample_count += 0.5; + uint additional_samples = uint(additional_sample_count); + uint additional_samples_to = min(SSAO_MAX_TAPS, additional_samples + SSAO_ADAPTIVE_TAP_BASE_COUNT); + + for (uint i = SSAO_ADAPTIVE_TAP_BASE_COUNT; i < additional_samples_to; i++) { + additional_sample_count -= 1.0f; + float weight_mod = clamp(additional_sample_count * blend_range_inv, 0.0, 1.0); + SSAOTap(p_quality_level, obscurance_sum, weight_sum, int(i), rot_scale_matrix, pix_center_pos, pixel_normal, normalized_screen_pos, mip_offset, fallof_sq, weight_mod, norm_xy, norm_xy_length); + } + } +#endif + + // early out for adaptive base - just output weight (used for the next pass) + if (p_adaptive_base) { + float obscurance = obscurance_sum / weight_sum; + + r_shadow_term = obscurance; + r_edges = vec4(0.0); + r_weight = weight_sum; + return; + } + + // calculate weighted average + float obscurance = obscurance_sum / weight_sum; + + // calculate fadeout (1 close, gradient, 0 far) + float fade_out = clamp(pix_center_pos.z * params.fade_out_mul + params.fade_out_add, 0.0, 1.0); + + // Reduce the SSAO shadowing if we're on the edge to remove artifacts on edges (we don't care for the lower quality one) + if (!p_adaptive_base && (p_quality_level >= SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { + // when there's more than 2 opposite edges, start fading out the occlusion to reduce aliasing artifacts + float edge_fadeout_factor = clamp((1.0 - edgesLRTB.x - edgesLRTB.y) * 0.35, 0.0, 1.0) + clamp((1.0 - edgesLRTB.z - edgesLRTB.w) * 0.35, 0.0, 1.0); + + fade_out *= clamp(1.0 - edge_fadeout_factor, 0.0, 1.0); + } + + // strength + obscurance = params.intensity * obscurance; + + // clamp + obscurance = min(obscurance, params.shadow_clamp); + + // fadeout + obscurance *= fade_out; + + // conceptually switch to occlusion with the meaning being visibility (grows with visibility, occlusion == 1 implies full visibility), + // to be in line with what is more commonly used. + float occlusion = 1.0 - obscurance; + + // modify the gradient + // note: this cannot be moved to a later pass because of loss of precision after storing in the render target + occlusion = pow(clamp(occlusion, 0.0, 1.0), params.shadow_power); + + // outputs! + r_shadow_term = occlusion; // Our final 'occlusion' term (0 means fully occluded, 1 means fully lit) + r_edges = edgesLRTB; // These are used to prevent blurring across edges, 1 means no edge, 0 means edge, 0.5 means half way there, etc. + r_weight = weight_sum; +} + +void main() { + float out_shadow_term; + float out_weight; + vec4 out_edges; + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing + return; + } + + vec2 uv = vec2(gl_GlobalInvocationID) + vec2(0.5); +#ifdef SSAO_BASE + generate_SSAO_shadows_internal(out_shadow_term, out_edges, out_weight, uv, params.quality, true); + + imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(out_shadow_term, out_weight / (float(SSAO_ADAPTIVE_TAP_BASE_COUNT) * 4.0), 0.0, 0.0)); +#else + generate_SSAO_shadows_internal(out_shadow_term, out_edges, out_weight, uv, params.quality, false); // pass in quality levels + if (params.quality == 0) { + out_edges = vec4(1.0); + } + + imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(out_shadow_term, pack_edges(out_edges), 0.0, 0.0)); +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/ssao_blur.glsl b/servers/rendering/renderer_rd/shaders/ssao_blur.glsl new file mode 100644 index 0000000000..510a777048 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/ssao_blur.glsl @@ -0,0 +1,154 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform sampler2D source_ssao; + +layout(rg8, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; + +layout(push_constant, binding = 1, std430) uniform Params { + float edge_sharpness; + float pad; + vec2 half_screen_pixel_size; +} +params; + +vec4 unpack_edges(float p_packed_val) { + uint packed_val = uint(p_packed_val * 255.5); + vec4 edgesLRTB; + edgesLRTB.x = float((packed_val >> 6) & 0x03) / 3.0; + edgesLRTB.y = float((packed_val >> 4) & 0x03) / 3.0; + edgesLRTB.z = float((packed_val >> 2) & 0x03) / 3.0; + edgesLRTB.w = float((packed_val >> 0) & 0x03) / 3.0; + + return clamp(edgesLRTB + params.edge_sharpness, 0.0, 1.0); +} + +void add_sample(float p_ssao_value, float p_edge_value, inout float r_sum, inout float r_sum_weight) { + float weight = p_edge_value; + + r_sum += (weight * p_ssao_value); + r_sum_weight += weight; +} + +#ifdef MODE_WIDE +vec2 sample_blurred_wide(vec2 p_coord) { + vec2 vC = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(0, 0)).xy; + vec2 vL = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(-2, 0)).xy; + vec2 vT = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(0, -2)).xy; + vec2 vR = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(2, 0)).xy; + vec2 vB = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(0, 2)).xy; + + float packed_edges = vC.y; + vec4 edgesLRTB = unpack_edges(packed_edges); + edgesLRTB.x *= unpack_edges(vL.y).y; + edgesLRTB.z *= unpack_edges(vT.y).w; + edgesLRTB.y *= unpack_edges(vR.y).x; + edgesLRTB.w *= unpack_edges(vB.y).z; + + float ssao_value = vC.x; + float ssao_valueL = vL.x; + float ssao_valueT = vT.x; + float ssao_valueR = vR.x; + float ssao_valueB = vB.x; + + float sum_weight = 0.8f; + float sum = ssao_value * sum_weight; + + add_sample(ssao_valueL, edgesLRTB.x, sum, sum_weight); + add_sample(ssao_valueR, edgesLRTB.y, sum, sum_weight); + add_sample(ssao_valueT, edgesLRTB.z, sum, sum_weight); + add_sample(ssao_valueB, edgesLRTB.w, sum, sum_weight); + + float ssao_avg = sum / sum_weight; + + ssao_value = ssao_avg; + + return vec2(ssao_value, packed_edges); +} +#endif + +#ifdef MODE_SMART +vec2 sample_blurred(vec3 p_pos, vec2 p_coord) { + float packed_edges = texelFetch(source_ssao, ivec2(p_pos.xy), 0).y; + vec4 edgesLRTB = unpack_edges(packed_edges); + + vec4 valuesUL = textureGather(source_ssao, vec2(p_coord - params.half_screen_pixel_size * 0.5)); + vec4 valuesBR = textureGather(source_ssao, vec2(p_coord + params.half_screen_pixel_size * 0.5)); + + float ssao_value = valuesUL.y; + float ssao_valueL = valuesUL.x; + float ssao_valueT = valuesUL.z; + float ssao_valueR = valuesBR.z; + float ssao_valueB = valuesBR.x; + + float sum_weight = 0.5; + float sum = ssao_value * sum_weight; + + add_sample(ssao_valueL, edgesLRTB.x, sum, sum_weight); + add_sample(ssao_valueR, edgesLRTB.y, sum, sum_weight); + + add_sample(ssao_valueT, edgesLRTB.z, sum, sum_weight); + add_sample(ssao_valueB, edgesLRTB.w, sum, sum_weight); + + float ssao_avg = sum / sum_weight; + + ssao_value = ssao_avg; + + return vec2(ssao_value, packed_edges); +} +#endif + +void main() { + // Pixel being shaded + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + +#ifdef MODE_NON_SMART + + vec2 halfPixel = params.half_screen_pixel_size * 0.5f; + + vec2 uv = (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size; + + vec2 centre = textureLod(source_ssao, vec2(uv), 0.0).xy; + + vec4 vals; + vals.x = textureLod(source_ssao, vec2(uv + vec2(-halfPixel.x * 3, -halfPixel.y)), 0.0).x; + vals.y = textureLod(source_ssao, vec2(uv + vec2(+halfPixel.x, -halfPixel.y * 3)), 0.0).x; + vals.z = textureLod(source_ssao, vec2(uv + vec2(-halfPixel.x, +halfPixel.y * 3)), 0.0).x; + vals.w = textureLod(source_ssao, vec2(uv + vec2(+halfPixel.x * 3, +halfPixel.y)), 0.0).x; + + vec2 sampled = vec2(dot(vals, vec4(0.2)) + centre.x * 0.2, centre.y); + +#else +#ifdef MODE_SMART + vec2 sampled = sample_blurred(vec3(gl_GlobalInvocationID), (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size); +#else // MODE_WIDE + vec2 sampled = sample_blurred_wide((vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size); +#endif + +#endif + imageStore(dest_image, ivec2(ssC), vec4(sampled, 0.0, 0.0)); +} diff --git a/servers/rendering/renderer_rd/shaders/ssao_downsample.glsl b/servers/rendering/renderer_rd/shaders/ssao_downsample.glsl new file mode 100644 index 0000000000..cb2d31f70d --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/ssao_downsample.glsl @@ -0,0 +1,206 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(push_constant, binding = 1, std430) uniform Params { + vec2 pixel_size; + float z_far; + float z_near; + bool orthogonal; + float radius_sq; + uvec2 pad; +} +params; + +layout(set = 0, binding = 0) uniform sampler2D source_depth; + +layout(r16f, set = 1, binding = 0) uniform restrict writeonly image2DArray dest_image0; //rename +#ifdef GENERATE_MIPS +layout(r16f, set = 2, binding = 0) uniform restrict writeonly image2DArray dest_image1; +layout(r16f, set = 2, binding = 1) uniform restrict writeonly image2DArray dest_image2; +layout(r16f, set = 2, binding = 2) uniform restrict writeonly image2DArray dest_image3; +#endif + +vec4 screen_space_to_view_space_depth(vec4 p_depth) { + if (params.orthogonal) { + vec4 depth = p_depth * 2.0 - 1.0; + return ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; + } + + float depth_linearize_mul = params.z_near; + float depth_linearize_add = params.z_far; + + // Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar" + + // Set your depth_linearize_mul and depth_linearize_add to: + // depth_linearize_mul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear ); + // depth_linearize_add = cameraClipFar / ( cameraClipFar - cameraClipNear ); + + return depth_linearize_mul / (depth_linearize_add - p_depth); +} + +float screen_space_to_view_space_depth(float p_depth) { + if (params.orthogonal) { + float depth = p_depth * 2.0 - 1.0; + return ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / (2.0 * params.z_far); + } + + float depth_linearize_mul = params.z_near; + float depth_linearize_add = params.z_far; + + return depth_linearize_mul / (depth_linearize_add - p_depth); +} + +#ifdef GENERATE_MIPS + +shared float depth_buffer[4][8][8]; + +float mip_smart_average(vec4 p_depths) { + float closest = min(min(p_depths.x, p_depths.y), min(p_depths.z, p_depths.w)); + float fallof_sq = -1.0f / params.radius_sq; + vec4 dists = p_depths - closest.xxxx; + vec4 weights = clamp(dists * dists * fallof_sq + 1.0, 0.0, 1.0); + return dot(weights, p_depths) / dot(weights, vec4(1.0, 1.0, 1.0, 1.0)); +} + +void prepare_depths_and_mips(vec4 p_samples, uvec2 p_output_coord, uvec2 p_gtid) { + p_samples = screen_space_to_view_space_depth(p_samples); + + depth_buffer[0][p_gtid.x][p_gtid.y] = p_samples.w; + depth_buffer[1][p_gtid.x][p_gtid.y] = p_samples.z; + depth_buffer[2][p_gtid.x][p_gtid.y] = p_samples.x; + depth_buffer[3][p_gtid.x][p_gtid.y] = p_samples.y; + + imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 0), vec4(p_samples.w)); + imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 1), vec4(p_samples.z)); + imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 2), vec4(p_samples.x)); + imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 3), vec4(p_samples.y)); + + uint depth_array_index = 2 * (p_gtid.y % 2) + (p_gtid.x % 2); + uvec2 depth_array_offset = ivec2(p_gtid.x % 2, p_gtid.y % 2); + ivec2 buffer_coord = ivec2(p_gtid) - ivec2(depth_array_offset); + + p_output_coord /= 2; + groupMemoryBarrier(); + barrier(); + + // if (still_alive) <-- all threads alive here + { + float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0]; + float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 1]; + float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 1][buffer_coord.y + 0]; + float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 1][buffer_coord.y + 1]; + + float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11)); + imageStore(dest_image1, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg)); + depth_buffer[depth_array_index][buffer_coord.x][buffer_coord.y] = avg; + } + + bool still_alive = p_gtid.x % 4 == depth_array_offset.x && p_gtid.y % 4 == depth_array_offset.y; + + p_output_coord /= 2; + groupMemoryBarrier(); + barrier(); + + if (still_alive) { + float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0]; + float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 2]; + float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 2][buffer_coord.y + 0]; + float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 2][buffer_coord.y + 2]; + + float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11)); + imageStore(dest_image2, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg)); + depth_buffer[depth_array_index][buffer_coord.x][buffer_coord.y] = avg; + } + + still_alive = p_gtid.x % 8 == depth_array_offset.x && depth_array_offset.y % 8 == depth_array_offset.y; + + p_output_coord /= 2; + groupMemoryBarrier(); + barrier(); + + if (still_alive) { + float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0]; + float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 4]; + float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 4][buffer_coord.y + 0]; + float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 4][buffer_coord.y + 4]; + + float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11)); + imageStore(dest_image3, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg)); + } +} +#else +#ifndef USE_HALF_BUFFERS +void prepare_depths(vec4 p_samples, uvec2 p_tid) { + p_samples = screen_space_to_view_space_depth(p_samples); + + imageStore(dest_image0, ivec3(p_tid, 0), vec4(p_samples.w)); + imageStore(dest_image0, ivec3(p_tid, 1), vec4(p_samples.z)); + imageStore(dest_image0, ivec3(p_tid, 2), vec4(p_samples.x)); + imageStore(dest_image0, ivec3(p_tid, 3), vec4(p_samples.y)); +} +#endif +#endif + +void main() { +#ifdef USE_HALF_BUFFERS +#ifdef USE_HALF_SIZE + float sample_00 = texelFetch(source_depth, ivec2(4 * gl_GlobalInvocationID.x + 0, 4 * gl_GlobalInvocationID.y + 0), 0).x; + float sample_11 = texelFetch(source_depth, ivec2(4 * gl_GlobalInvocationID.x + 2, 4 * gl_GlobalInvocationID.y + 2), 0).x; +#else + float sample_00 = texelFetch(source_depth, ivec2(2 * gl_GlobalInvocationID.x + 0, 2 * gl_GlobalInvocationID.y + 0), 0).x; + float sample_11 = texelFetch(source_depth, ivec2(2 * gl_GlobalInvocationID.x + 1, 2 * gl_GlobalInvocationID.y + 1), 0).x; +#endif + sample_00 = screen_space_to_view_space_depth(sample_00); + sample_11 = screen_space_to_view_space_depth(sample_11); + + imageStore(dest_image0, ivec3(gl_GlobalInvocationID.xy, 0), vec4(sample_00)); + imageStore(dest_image0, ivec3(gl_GlobalInvocationID.xy, 3), vec4(sample_11)); +#else //!USE_HALF_BUFFERS +#ifdef USE_HALF_SIZE + ivec2 depth_buffer_coord = 4 * ivec2(gl_GlobalInvocationID.xy); + ivec2 output_coord = ivec2(gl_GlobalInvocationID); + + vec2 uv = (vec2(depth_buffer_coord) + 0.5f) * params.pixel_size; + vec4 samples; + samples.x = textureLodOffset(source_depth, uv, 0, ivec2(0, 2)).x; + samples.y = textureLodOffset(source_depth, uv, 0, ivec2(2, 2)).x; + samples.z = textureLodOffset(source_depth, uv, 0, ivec2(2, 0)).x; + samples.w = textureLodOffset(source_depth, uv, 0, ivec2(0, 0)).x; +#else + ivec2 depth_buffer_coord = 2 * ivec2(gl_GlobalInvocationID.xy); + ivec2 output_coord = ivec2(gl_GlobalInvocationID); + + vec2 uv = (vec2(depth_buffer_coord) + 0.5f) * params.pixel_size; + vec4 samples = textureGather(source_depth, uv); +#endif +#ifdef GENERATE_MIPS + prepare_depths_and_mips(samples, output_coord, gl_LocalInvocationID.xy); +#else + prepare_depths(samples, gl_GlobalInvocationID.xy); +#endif +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl b/servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl new file mode 100644 index 0000000000..6aa7624261 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl @@ -0,0 +1,126 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#ifdef GENERATE_MAP +layout(set = 0, binding = 0) uniform sampler2DArray source_ssao; +#else +layout(set = 0, binding = 0) uniform sampler2D source_importance; +#endif +layout(r8, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; + +#ifdef PROCESS_MAPB +layout(set = 2, binding = 0, std430) buffer Counter { + uint sum; +} +counter; +#endif + +layout(push_constant, binding = 1, std430) uniform Params { + vec2 half_screen_pixel_size; + float intensity; + float power; +} +params; + +void main() { + // Pixel being shaded + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + +#ifdef GENERATE_MAP + // importance map stuff + uvec2 base_position = ssC * 2; + + vec2 base_uv = (vec2(base_position) + vec2(0.5f, 0.5f)) * params.half_screen_pixel_size; + + float avg = 0.0; + float minV = 1.0; + float maxV = 0.0; + for (int i = 0; i < 4; i++) { + vec4 vals = textureGather(source_ssao, vec3(base_uv, i)); + + // apply the same modifications that would have been applied in the main shader + vals = params.intensity * vals; + + vals = 1 - vals; + + vals = pow(clamp(vals, 0.0, 1.0), vec4(params.power)); + + avg += dot(vec4(vals.x, vals.y, vals.z, vals.w), vec4(1.0 / 16.0, 1.0 / 16.0, 1.0 / 16.0, 1.0 / 16.0)); + + maxV = max(maxV, max(max(vals.x, vals.y), max(vals.z, vals.w))); + minV = min(minV, min(min(vals.x, vals.y), min(vals.z, vals.w))); + } + + float min_max_diff = maxV - minV; + + imageStore(dest_image, ssC, vec4(pow(clamp(min_max_diff * 2.0, 0.0, 1.0), 0.8))); +#endif + +#ifdef PROCESS_MAPA + vec2 uv = (vec2(ssC) + 0.5f) * params.half_screen_pixel_size * 2.0; + + float centre = textureLod(source_importance, uv, 0.0).x; + + vec2 half_pixel = params.half_screen_pixel_size; + + vec4 vals; + vals.x = textureLod(source_importance, uv + vec2(-half_pixel.x * 3, -half_pixel.y), 0.0).x; + vals.y = textureLod(source_importance, uv + vec2(+half_pixel.x, -half_pixel.y * 3), 0.0).x; + vals.z = textureLod(source_importance, uv + vec2(+half_pixel.x * 3, +half_pixel.y), 0.0).x; + vals.w = textureLod(source_importance, uv + vec2(-half_pixel.x, +half_pixel.y * 3), 0.0).x; + + float avg = dot(vals, vec4(0.25, 0.25, 0.25, 0.25)); + + imageStore(dest_image, ssC, vec4(avg)); +#endif + +#ifdef PROCESS_MAPB + vec2 uv = (vec2(ssC) + 0.5f) * params.half_screen_pixel_size * 2.0; + + float centre = textureLod(source_importance, uv, 0.0).x; + + vec2 half_pixel = params.half_screen_pixel_size; + + vec4 vals; + vals.x = textureLod(source_importance, uv + vec2(-half_pixel.x, -half_pixel.y * 3), 0.0).x; + vals.y = textureLod(source_importance, uv + vec2(+half_pixel.x * 3, -half_pixel.y), 0.0).x; + vals.z = textureLod(source_importance, uv + vec2(+half_pixel.x, +half_pixel.y * 3), 0.0).x; + vals.w = textureLod(source_importance, uv + vec2(-half_pixel.x * 3, +half_pixel.y), 0.0).x; + + float avg = dot(vals, vec4(0.25, 0.25, 0.25, 0.25)); + + imageStore(dest_image, ssC, vec4(avg)); + + // sum the average; to avoid overflowing we assume max AO resolution is not bigger than 16384x16384; so quarter res (used here) will be 4096x4096, which leaves us with 8 bits per pixel + uint sum = uint(clamp(avg, 0.0, 1.0) * 255.0 + 0.5); + + // save every 9th to avoid InterlockedAdd congestion - since we're blurring, this is good enough; compensated by multiplying load_counter_avg_div by 9 + if (((ssC.x % 3) + (ssC.y % 3)) == 0) { + atomicAdd(counter.sum, sum); + } +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/ssao_interleave.glsl b/servers/rendering/renderer_rd/shaders/ssao_interleave.glsl new file mode 100644 index 0000000000..4fdf334aa5 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/ssao_interleave.glsl @@ -0,0 +1,119 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(rgba8, set = 0, binding = 0) uniform restrict writeonly image2D dest_image; +layout(set = 1, binding = 0) uniform sampler2DArray source_texture; + +layout(push_constant, binding = 1, std430) uniform Params { + float inv_sharpness; + uint size_modifier; + vec2 pixel_size; +} +params; + +vec4 unpack_edges(float p_packed_val) { + uint packed_val = uint(p_packed_val * 255.5); + vec4 edgesLRTB; + edgesLRTB.x = float((packed_val >> 6) & 0x03) / 3.0; + edgesLRTB.y = float((packed_val >> 4) & 0x03) / 3.0; + edgesLRTB.z = float((packed_val >> 2) & 0x03) / 3.0; + edgesLRTB.w = float((packed_val >> 0) & 0x03) / 3.0; + + return clamp(edgesLRTB + params.inv_sharpness, 0.0, 1.0); +} + +void main() { + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(ssC, ivec2(1.0 / params.pixel_size)))) { //too large, do nothing + return; + } + +#ifdef MODE_SMART + float ao; + uvec2 pix_pos = uvec2(gl_GlobalInvocationID.xy); + vec2 uv = (gl_GlobalInvocationID.xy + vec2(0.5)) * params.pixel_size; + + // calculate index in the four deinterleaved source array texture + int mx = int(pix_pos.x % 2); + int my = int(pix_pos.y % 2); + int index_center = mx + my * 2; // center index + int index_horizontal = (1 - mx) + my * 2; // neighbouring, horizontal + int index_vertical = mx + (1 - my) * 2; // neighbouring, vertical + int index_diagonal = (1 - mx) + (1 - my) * 2; // diagonal + + vec2 center_val = texelFetch(source_texture, ivec3(pix_pos / uvec2(params.size_modifier), index_center), 0).xy; + + ao = center_val.x; + + vec4 edgesLRTB = unpack_edges(center_val.y); + + // convert index shifts to sampling offsets + float fmx = float(mx); + float fmy = float(my); + + // in case of an edge, push sampling offsets away from the edge (towards pixel center) + float fmxe = (edgesLRTB.y - edgesLRTB.x); + float fmye = (edgesLRTB.w - edgesLRTB.z); + + // calculate final sampling offsets and sample using bilinear filter + vec2 uv_horizontal = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(fmx + fmxe - 0.5, 0.5 - fmy)) * params.pixel_size; + float ao_horizontal = textureLod(source_texture, vec3(uv_horizontal, index_horizontal), 0.0).x; + vec2 uv_vertical = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(0.5 - fmx, fmy - 0.5 + fmye)) * params.pixel_size; + float ao_vertical = textureLod(source_texture, vec3(uv_vertical, index_vertical), 0.0).x; + vec2 uv_diagonal = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(fmx - 0.5 + fmxe, fmy - 0.5 + fmye)) * params.pixel_size; + float ao_diagonal = textureLod(source_texture, vec3(uv_diagonal, index_diagonal), 0.0).x; + + // reduce weight for samples near edge - if the edge is on both sides, weight goes to 0 + vec4 blendWeights; + blendWeights.x = 1.0; + blendWeights.y = (edgesLRTB.x + edgesLRTB.y) * 0.5; + blendWeights.z = (edgesLRTB.z + edgesLRTB.w) * 0.5; + blendWeights.w = (blendWeights.y + blendWeights.z) * 0.5; + + // calculate weighted average + float blendWeightsSum = dot(blendWeights, vec4(1.0, 1.0, 1.0, 1.0)); + ao = dot(vec4(ao, ao_horizontal, ao_vertical, ao_diagonal), blendWeights) / blendWeightsSum; + + imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(ao)); +#else // !MODE_SMART + + vec2 uv = (gl_GlobalInvocationID.xy + vec2(0.5)) * params.pixel_size; +#ifdef MODE_HALF + float a = textureLod(source_texture, vec3(uv, 0), 0.0).x; + float d = textureLod(source_texture, vec3(uv, 3), 0.0).x; + float avg = (a + d) * 0.5; + +#else + float a = textureLod(source_texture, vec3(uv, 0), 0.0).x; + float b = textureLod(source_texture, vec3(uv, 1), 0.0).x; + float c = textureLod(source_texture, vec3(uv, 2), 0.0).x; + float d = textureLod(source_texture, vec3(uv, 3), 0.0).x; + float avg = (a + b + c + d) * 0.25; + +#endif + imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(avg)); +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/subsurface_scattering.glsl b/servers/rendering/renderer_rd/shaders/subsurface_scattering.glsl new file mode 100644 index 0000000000..88a953562f --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/subsurface_scattering.glsl @@ -0,0 +1,189 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#ifdef USE_25_SAMPLES +const int kernel_size = 13; + +const vec2 kernel[kernel_size] = vec2[]( + vec2(0.530605, 0.0), + vec2(0.0211412, 0.0208333), + vec2(0.0402784, 0.0833333), + vec2(0.0493588, 0.1875), + vec2(0.0410172, 0.333333), + vec2(0.0263642, 0.520833), + vec2(0.017924, 0.75), + vec2(0.0128496, 1.02083), + vec2(0.0094389, 1.33333), + vec2(0.00700976, 1.6875), + vec2(0.00500364, 2.08333), + vec2(0.00333804, 2.52083), + vec2(0.000973794, 3.0)); + +const vec4 skin_kernel[kernel_size] = vec4[]( + vec4(0.530605, 0.613514, 0.739601, 0), + vec4(0.0211412, 0.0459286, 0.0378196, 0.0208333), + vec4(0.0402784, 0.0657244, 0.04631, 0.0833333), + vec4(0.0493588, 0.0367726, 0.0219485, 0.1875), + vec4(0.0410172, 0.0199899, 0.0118481, 0.333333), + vec4(0.0263642, 0.0119715, 0.00684598, 0.520833), + vec4(0.017924, 0.00711691, 0.00347194, 0.75), + vec4(0.0128496, 0.00356329, 0.00132016, 1.02083), + vec4(0.0094389, 0.00139119, 0.000416598, 1.33333), + vec4(0.00700976, 0.00049366, 0.000151938, 1.6875), + vec4(0.00500364, 0.00020094, 5.28848e-005, 2.08333), + vec4(0.00333804, 7.85443e-005, 1.2945e-005, 2.52083), + vec4(0.000973794, 1.11862e-005, 9.43437e-007, 3)); + +#endif //USE_25_SAMPLES + +#ifdef USE_17_SAMPLES +const int kernel_size = 9; +const vec2 kernel[kernel_size] = vec2[]( + vec2(0.536343, 0.0), + vec2(0.0324462, 0.03125), + vec2(0.0582416, 0.125), + vec2(0.0571056, 0.28125), + vec2(0.0347317, 0.5), + vec2(0.0216301, 0.78125), + vec2(0.0144609, 1.125), + vec2(0.0100386, 1.53125), + vec2(0.00317394, 2.0)); + +const vec4 skin_kernel[kernel_size] = vec4[]( + vec4(0.536343, 0.624624, 0.748867, 0), + vec4(0.0324462, 0.0656718, 0.0532821, 0.03125), + vec4(0.0582416, 0.0659959, 0.0411329, 0.125), + vec4(0.0571056, 0.0287432, 0.0172844, 0.28125), + vec4(0.0347317, 0.0151085, 0.00871983, 0.5), + vec4(0.0216301, 0.00794618, 0.00376991, 0.78125), + vec4(0.0144609, 0.00317269, 0.00106399, 1.125), + vec4(0.0100386, 0.000914679, 0.000275702, 1.53125), + vec4(0.00317394, 0.000134823, 3.77269e-005, 2)); +#endif //USE_17_SAMPLES + +#ifdef USE_11_SAMPLES +const int kernel_size = 6; +const vec2 kernel[kernel_size] = vec2[]( + vec2(0.560479, 0.0), + vec2(0.0771802, 0.08), + vec2(0.0821904, 0.32), + vec2(0.03639, 0.72), + vec2(0.0192831, 1.28), + vec2(0.00471691, 2.0)); + +const vec4 skin_kernel[kernel_size] = vec4[]( + + vec4(0.560479, 0.669086, 0.784728, 0), + vec4(0.0771802, 0.113491, 0.0793803, 0.08), + vec4(0.0821904, 0.0358608, 0.0209261, 0.32), + vec4(0.03639, 0.0130999, 0.00643685, 0.72), + vec4(0.0192831, 0.00282018, 0.00084214, 1.28), + vec4(0.00471691, 0.000184771, 5.07565e-005, 2)); + +#endif //USE_11_SAMPLES + +layout(push_constant, binding = 1, std430) uniform Params { + ivec2 screen_size; + float camera_z_far; + float camera_z_near; + + bool vertical; + bool orthogonal; + float unit_size; + float scale; + + float depth_scale; + uint pad[3]; +} +params; + +layout(set = 0, binding = 0) uniform sampler2D source_image; +layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; +layout(set = 2, binding = 0) uniform sampler2D source_depth; + +void do_filter(inout vec3 color_accum, inout vec3 divisor, vec2 uv, vec2 step, bool p_skin) { + // Accumulate the other samples: + for (int i = 1; i < kernel_size; i++) { + // Fetch color and depth for current sample: + vec2 offset = uv + kernel[i].y * step; + vec4 color = texture(source_image, offset); + + if (abs(color.a) < 0.001) { + break; //mix no more + } + + vec3 w; + if (p_skin) { + //skin + w = skin_kernel[i].rgb; + } else { + w = vec3(kernel[i].x); + } + + color_accum += color.rgb * w; + divisor += w; + } +} + +void main() { + // Pixel being shaded + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + + if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing + return; + } + + vec2 uv = (vec2(ssC) + 0.5) / vec2(params.screen_size); + + // Fetch color of current pixel: + vec4 base_color = texture(source_image, uv); + float strength = abs(base_color.a); + + if (strength > 0.0) { + vec2 dir = params.vertical ? vec2(0.0, 1.0) : vec2(1.0, 0.0); + + // Fetch linear depth of current pixel: + float depth = texture(source_depth, uv).r * 2.0 - 1.0; + float depth_scale; + + if (params.orthogonal) { + depth = ((depth + (params.camera_z_far + params.camera_z_near) / (params.camera_z_far - params.camera_z_near)) * (params.camera_z_far - params.camera_z_near)) / 2.0; + depth_scale = params.unit_size; //remember depth is negative by default in OpenGL + } else { + depth = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - depth * (params.camera_z_far - params.camera_z_near)); + depth_scale = params.unit_size / depth; //remember depth is negative by default in OpenGL + } + + float scale = mix(params.scale, depth_scale, params.depth_scale); + + // Calculate the final step to fetch the surrounding pixels: + vec2 step = scale * dir; + step *= strength; + step /= 3.0; + // Accumulate the center sample: + + vec3 divisor; + bool skin = bool(base_color.a < 0.0); + + if (skin) { + //skin + divisor = skin_kernel[0].rgb; + } else { + divisor = vec3(kernel[0].x); + } + + vec3 color = base_color.rgb * divisor; + + do_filter(color, divisor, uv, step, skin); + do_filter(color, divisor, uv, -step, skin); + + base_color.rgb = color / divisor; + } + + imageStore(dest_image, ssC, base_color); +} diff --git a/servers/rendering/renderer_rd/shaders/tonemap.glsl b/servers/rendering/renderer_rd/shaders/tonemap.glsl new file mode 100644 index 0000000000..7de91fd541 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/tonemap.glsl @@ -0,0 +1,386 @@ +#[vertex] + +#version 450 + +VERSION_DEFINES + +layout(location = 0) out vec2 uv_interp; + +void main() { + vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0)); + uv_interp = base_arr[gl_VertexIndex]; + gl_Position = vec4(uv_interp * 2.0 - 1.0, 0.0, 1.0); +} + +#[fragment] + +#version 450 + +VERSION_DEFINES + +layout(location = 0) in vec2 uv_interp; + +layout(set = 0, binding = 0) uniform sampler2D source_color; +layout(set = 1, binding = 0) uniform sampler2D source_auto_exposure; +layout(set = 2, binding = 0) uniform sampler2D source_glow; +#ifdef USE_1D_LUT +layout(set = 3, binding = 0) uniform sampler2D source_color_correction; +#else +layout(set = 3, binding = 0) uniform sampler3D source_color_correction; +#endif + +layout(push_constant, binding = 1, std430) uniform Params { + vec3 bcs; + bool use_bcs; + + bool use_glow; + bool use_auto_exposure; + bool use_color_correction; + uint tonemapper; + + uvec2 glow_texture_size; + float glow_intensity; + uint pad3; + + uint glow_mode; + float glow_levels[7]; + + float exposure; + float white; + float auto_exposure_grey; + uint pad2; + + vec2 pixel_size; + bool use_fxaa; + bool use_debanding; +} +params; + +layout(location = 0) out vec4 frag_color; + +#ifdef USE_GLOW_FILTER_BICUBIC +// w0, w1, w2, and w3 are the four cubic B-spline basis functions +float w0(float a) { + return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f); +} + +float w1(float a) { + return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f); +} + +float w2(float a) { + return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f); +} + +float w3(float a) { + return (1.0f / 6.0f) * (a * a * a); +} + +// g0 and g1 are the two amplitude functions +float g0(float a) { + return w0(a) + w1(a); +} + +float g1(float a) { + return w2(a) + w3(a); +} + +// h0 and h1 are the two offset functions +float h0(float a) { + return -1.0f + w1(a) / (w0(a) + w1(a)); +} + +float h1(float a) { + return 1.0f + w3(a) / (w2(a) + w3(a)); +} + +vec4 texture2D_bicubic(sampler2D tex, vec2 uv, int p_lod) { + float lod = float(p_lod); + vec2 tex_size = vec2(params.glow_texture_size >> p_lod); + vec2 pixel_size = vec2(1.0f) / tex_size; + + uv = uv * tex_size + vec2(0.5f); + + vec2 iuv = floor(uv); + vec2 fuv = fract(uv); + + float g0x = g0(fuv.x); + float g1x = g1(fuv.x); + float h0x = h0(fuv.x); + float h1x = h1(fuv.x); + float h0y = h0(fuv.y); + float h1y = h1(fuv.y); + + vec2 p0 = (vec2(iuv.x + h0x, iuv.y + h0y) - vec2(0.5f)) * pixel_size; + vec2 p1 = (vec2(iuv.x + h1x, iuv.y + h0y) - vec2(0.5f)) * pixel_size; + vec2 p2 = (vec2(iuv.x + h0x, iuv.y + h1y) - vec2(0.5f)) * pixel_size; + vec2 p3 = (vec2(iuv.x + h1x, iuv.y + h1y) - vec2(0.5f)) * pixel_size; + + return (g0(fuv.y) * (g0x * textureLod(tex, p0, lod) + g1x * textureLod(tex, p1, lod))) + + (g1(fuv.y) * (g0x * textureLod(tex, p2, lod) + g1x * textureLod(tex, p3, lod))); +} + +#define GLOW_TEXTURE_SAMPLE(m_tex, m_uv, m_lod) texture2D_bicubic(m_tex, m_uv, m_lod) + +#else + +#define GLOW_TEXTURE_SAMPLE(m_tex, m_uv, m_lod) textureLod(m_tex, m_uv, float(m_lod)) + +#endif + +vec3 tonemap_filmic(vec3 color, float white) { + // exposure bias: input scale (color *= bias, white *= bias) to make the brightness consistent with other tonemappers + // also useful to scale the input to the range that the tonemapper is designed for (some require very high input values) + // has no effect on the curve's general shape or visual properties + const float exposure_bias = 2.0f; + const float A = 0.22f * exposure_bias * exposure_bias; // bias baked into constants for performance + const float B = 0.30f * exposure_bias; + const float C = 0.10f; + const float D = 0.20f; + const float E = 0.01f; + const float F = 0.30f; + + vec3 color_tonemapped = ((color * (A * color + C * B) + D * E) / (color * (A * color + B) + D * F)) - E / F; + float white_tonemapped = ((white * (A * white + C * B) + D * E) / (white * (A * white + B) + D * F)) - E / F; + + return color_tonemapped / white_tonemapped; +} + +vec3 tonemap_aces(vec3 color, float white) { + const float exposure_bias = 0.85f; + const float A = 2.51f * exposure_bias * exposure_bias; + const float B = 0.03f * exposure_bias; + const float C = 2.43f * exposure_bias * exposure_bias; + const float D = 0.59f * exposure_bias; + const float E = 0.14f; + + vec3 color_tonemapped = (color * (A * color + B)) / (color * (C * color + D) + E); + float white_tonemapped = (white * (A * white + B)) / (white * (C * white + D) + E); + + return color_tonemapped / white_tonemapped; +} + +vec3 tonemap_reinhard(vec3 color, float white) { + // Ensure color values are positive. + // They can be negative in the case of negative lights, which leads to undesired behavior. + color = max(vec3(0.0), color); + + return (white * color + color) / (color * white + white); +} + +vec3 linear_to_srgb(vec3 color) { + //if going to srgb, clamp from 0 to 1. + color = clamp(color, vec3(0.0), vec3(1.0)); + const vec3 a = vec3(0.055f); + return mix((vec3(1.0f) + a) * pow(color.rgb, vec3(1.0f / 2.4f)) - a, 12.92f * color.rgb, lessThan(color.rgb, vec3(0.0031308f))); +} + +#define TONEMAPPER_LINEAR 0 +#define TONEMAPPER_REINHARD 1 +#define TONEMAPPER_FILMIC 2 +#define TONEMAPPER_ACES 3 + +vec3 apply_tonemapping(vec3 color, float white) { // inputs are LINEAR, always outputs clamped [0;1] color + + if (params.tonemapper == TONEMAPPER_LINEAR) { + return color; + } else if (params.tonemapper == TONEMAPPER_REINHARD) { + return tonemap_reinhard(color, white); + } else if (params.tonemapper == TONEMAPPER_FILMIC) { + return tonemap_filmic(color, white); + } else { //aces + return tonemap_aces(color, white); + } +} + +vec3 gather_glow(sampler2D tex, vec2 uv) { // sample all selected glow levels + vec3 glow = vec3(0.0f); + + if (params.glow_levels[0] > 0.0001) { + glow += GLOW_TEXTURE_SAMPLE(tex, uv, 0).rgb * params.glow_levels[0]; + } + + if (params.glow_levels[1] > 0.0001) { + glow += GLOW_TEXTURE_SAMPLE(tex, uv, 1).rgb * params.glow_levels[1]; + } + + if (params.glow_levels[2] > 0.0001) { + glow += GLOW_TEXTURE_SAMPLE(tex, uv, 2).rgb * params.glow_levels[2]; + } + + if (params.glow_levels[3] > 0.0001) { + glow += GLOW_TEXTURE_SAMPLE(tex, uv, 3).rgb * params.glow_levels[3]; + } + + if (params.glow_levels[4] > 0.0001) { + glow += GLOW_TEXTURE_SAMPLE(tex, uv, 4).rgb * params.glow_levels[4]; + } + + if (params.glow_levels[5] > 0.0001) { + glow += GLOW_TEXTURE_SAMPLE(tex, uv, 5).rgb * params.glow_levels[5]; + } + + if (params.glow_levels[6] > 0.0001) { + glow += GLOW_TEXTURE_SAMPLE(tex, uv, 6).rgb * params.glow_levels[6]; + } + + return glow; +} + +#define GLOW_MODE_ADD 0 +#define GLOW_MODE_SCREEN 1 +#define GLOW_MODE_SOFTLIGHT 2 +#define GLOW_MODE_REPLACE 3 +#define GLOW_MODE_MIX 4 + +vec3 apply_glow(vec3 color, vec3 glow) { // apply glow using the selected blending mode + if (params.glow_mode == GLOW_MODE_ADD) { + return color + glow; + } else if (params.glow_mode == GLOW_MODE_SCREEN) { + //need color clamping + return max((color + glow) - (color * glow), vec3(0.0)); + } else if (params.glow_mode == GLOW_MODE_SOFTLIGHT) { + //need color clamping + glow = glow * vec3(0.5f) + vec3(0.5f); + + color.r = (glow.r <= 0.5f) ? (color.r - (1.0f - 2.0f * glow.r) * color.r * (1.0f - color.r)) : (((glow.r > 0.5f) && (color.r <= 0.25f)) ? (color.r + (2.0f * glow.r - 1.0f) * (4.0f * color.r * (4.0f * color.r + 1.0f) * (color.r - 1.0f) + 7.0f * color.r)) : (color.r + (2.0f * glow.r - 1.0f) * (sqrt(color.r) - color.r))); + color.g = (glow.g <= 0.5f) ? (color.g - (1.0f - 2.0f * glow.g) * color.g * (1.0f - color.g)) : (((glow.g > 0.5f) && (color.g <= 0.25f)) ? (color.g + (2.0f * glow.g - 1.0f) * (4.0f * color.g * (4.0f * color.g + 1.0f) * (color.g - 1.0f) + 7.0f * color.g)) : (color.g + (2.0f * glow.g - 1.0f) * (sqrt(color.g) - color.g))); + color.b = (glow.b <= 0.5f) ? (color.b - (1.0f - 2.0f * glow.b) * color.b * (1.0f - color.b)) : (((glow.b > 0.5f) && (color.b <= 0.25f)) ? (color.b + (2.0f * glow.b - 1.0f) * (4.0f * color.b * (4.0f * color.b + 1.0f) * (color.b - 1.0f) + 7.0f * color.b)) : (color.b + (2.0f * glow.b - 1.0f) * (sqrt(color.b) - color.b))); + return color; + } else { //replace + return glow; + } +} + +vec3 apply_bcs(vec3 color, vec3 bcs) { + color = mix(vec3(0.0f), color, bcs.x); + color = mix(vec3(0.5f), color, bcs.y); + color = mix(vec3(dot(vec3(1.0f), color) * 0.33333f), color, bcs.z); + + return color; +} +#ifdef USE_1D_LUT +vec3 apply_color_correction(vec3 color) { + color.r = texture(source_color_correction, vec2(color.r, 0.0f)).r; + color.g = texture(source_color_correction, vec2(color.g, 0.0f)).g; + color.b = texture(source_color_correction, vec2(color.b, 0.0f)).b; + return color; +} +#else +vec3 apply_color_correction(vec3 color) { + return textureLod(source_color_correction, color, 0.0).rgb; +} +#endif + +vec3 do_fxaa(vec3 color, float exposure, vec2 uv_interp) { + const float FXAA_REDUCE_MIN = (1.0 / 128.0); + const float FXAA_REDUCE_MUL = (1.0 / 8.0); + const float FXAA_SPAN_MAX = 8.0; + + vec3 rgbNW = textureLod(source_color, uv_interp + vec2(-1.0, -1.0) * params.pixel_size, 0.0).xyz * exposure; + vec3 rgbNE = textureLod(source_color, uv_interp + vec2(1.0, -1.0) * params.pixel_size, 0.0).xyz * exposure; + vec3 rgbSW = textureLod(source_color, uv_interp + vec2(-1.0, 1.0) * params.pixel_size, 0.0).xyz * exposure; + vec3 rgbSE = textureLod(source_color, uv_interp + vec2(1.0, 1.0) * params.pixel_size, 0.0).xyz * exposure; + vec3 rgbM = color; + vec3 luma = vec3(0.299, 0.587, 0.114); + float lumaNW = dot(rgbNW, luma); + float lumaNE = dot(rgbNE, luma); + float lumaSW = dot(rgbSW, luma); + float lumaSE = dot(rgbSE, luma); + float lumaM = dot(rgbM, luma); + float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE))); + float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE))); + + vec2 dir; + dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE)); + dir.y = ((lumaNW + lumaSW) - (lumaNE + lumaSE)); + + float dirReduce = max((lumaNW + lumaNE + lumaSW + lumaSE) * + (0.25 * FXAA_REDUCE_MUL), + FXAA_REDUCE_MIN); + + float rcpDirMin = 1.0 / (min(abs(dir.x), abs(dir.y)) + dirReduce); + dir = min(vec2(FXAA_SPAN_MAX, FXAA_SPAN_MAX), + max(vec2(-FXAA_SPAN_MAX, -FXAA_SPAN_MAX), + dir * rcpDirMin)) * + params.pixel_size; + + vec3 rgbA = 0.5 * exposure * (textureLod(source_color, uv_interp + dir * (1.0 / 3.0 - 0.5), 0.0).xyz + textureLod(source_color, uv_interp + dir * (2.0 / 3.0 - 0.5), 0.0).xyz); + vec3 rgbB = rgbA * 0.5 + 0.25 * exposure * (textureLod(source_color, uv_interp + dir * -0.5, 0.0).xyz + textureLod(source_color, uv_interp + dir * 0.5, 0.0).xyz); + + float lumaB = dot(rgbB, luma); + if ((lumaB < lumaMin) || (lumaB > lumaMax)) { + return rgbA; + } else { + return rgbB; + } +} + +// From http://alex.vlachos.com/graphics/Alex_Vlachos_Advanced_VR_Rendering_GDC2015.pdf +// and https://www.shadertoy.com/view/MslGR8 (5th one starting from the bottom) +// NOTE: `frag_coord` is in pixels (i.e. not normalized UV). +vec3 screen_space_dither(vec2 frag_coord) { + // Iestyn's RGB dither (7 asm instructions) from Portal 2 X360, slightly modified for VR. + vec3 dither = vec3(dot(vec2(171.0, 231.0), frag_coord)); + dither.rgb = fract(dither.rgb / vec3(103.0, 71.0, 97.0)); + + // Subtract 0.5 to avoid slightly brightening the whole viewport. + return (dither.rgb - 0.5) / 255.0; +} + +void main() { + vec3 color = textureLod(source_color, uv_interp, 0.0f).rgb; + + // Exposure + + float exposure = params.exposure; + + if (params.use_auto_exposure) { + exposure *= 1.0 / (texelFetch(source_auto_exposure, ivec2(0, 0), 0).r / params.auto_exposure_grey); + } + + color *= exposure; + + // Early Tonemap & SRGB Conversion + + if (params.use_glow && params.glow_mode == GLOW_MODE_MIX) { + vec3 glow = gather_glow(source_glow, uv_interp); + color.rgb = mix(color.rgb, glow, params.glow_intensity); + } + + if (params.use_fxaa) { + color = do_fxaa(color, exposure, uv_interp); + } + if (params.use_debanding) { + // For best results, debanding should be done before tonemapping. + // Otherwise, we're adding noise to an already-quantized image. + color += screen_space_dither(gl_FragCoord.xy); + } + color = apply_tonemapping(color, params.white); + + color = linear_to_srgb(color); // regular linear -> SRGB conversion + + // Glow + + if (params.use_glow && params.glow_mode != GLOW_MODE_MIX) { + vec3 glow = gather_glow(source_glow, uv_interp) * params.glow_intensity; + + // high dynamic range -> SRGB + glow = apply_tonemapping(glow, params.white); + glow = linear_to_srgb(glow); + + color = apply_glow(color, glow); + } + + // Additional effects + + if (params.use_bcs) { + color = apply_bcs(color, params.bcs); + } + + if (params.use_color_correction) { + color = apply_color_correction(color); + } + + frag_color = vec4(color, 1.0f); +} diff --git a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl new file mode 100644 index 0000000000..e7ba8feb80 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl @@ -0,0 +1,702 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +/* Do not use subgroups here, seems there is not much advantage and causes glitches +#extension GL_KHR_shader_subgroup_ballot: enable +#extension GL_KHR_shader_subgroup_arithmetic: enable + +#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic) +#define USE_SUBGROUPS +#endif +*/ + +#if defined(MODE_FOG) || defined(MODE_FILTER) + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#endif + +#if defined(MODE_DENSITY) + +layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; + +#endif + +#include "cluster_data_inc.glsl" + +#define M_PI 3.14159265359 + +layout(set = 0, binding = 1) uniform texture2D shadow_atlas; +layout(set = 0, binding = 2) uniform texture2D directional_shadow_atlas; + +layout(set = 0, binding = 3, std430) restrict readonly buffer OmniLights { + LightData data[]; +} +omni_lights; + +layout(set = 0, binding = 4, std430) restrict readonly buffer SpotLights { + LightData data[]; +} +spot_lights; + +layout(set = 0, binding = 5, std140) uniform DirectionalLights { + DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; +} +directional_lights; + +layout(set = 0, binding = 6, std430) buffer restrict readonly ClusterBuffer { + uint data[]; +} +cluster_buffer; + +layout(set = 0, binding = 7) uniform sampler linear_sampler; + +#ifdef MODE_DENSITY +layout(rgba16f, set = 0, binding = 8) uniform restrict writeonly image3D density_map; +layout(rgba16f, set = 0, binding = 9) uniform restrict readonly image3D fog_map; //unused +#endif + +#ifdef MODE_FOG +layout(rgba16f, set = 0, binding = 8) uniform restrict readonly image3D density_map; +layout(rgba16f, set = 0, binding = 9) uniform restrict writeonly image3D fog_map; +#endif + +#ifdef MODE_FILTER +layout(rgba16f, set = 0, binding = 8) uniform restrict readonly image3D source_map; +layout(rgba16f, set = 0, binding = 9) uniform restrict writeonly image3D dest_map; +#endif + +layout(set = 0, binding = 10) uniform sampler shadow_sampler; + +#define MAX_GI_PROBES 8 + +struct GIProbeData { + mat4 xform; + vec3 bounds; + float dynamic_range; + + float bias; + float normal_bias; + bool blend_ambient; + uint texture_slot; + + float anisotropy_strength; + float ambient_occlusion; + float ambient_occlusion_size; + uint mipmaps; +}; + +layout(set = 0, binding = 11, std140) uniform GIProbes { + GIProbeData data[MAX_GI_PROBES]; +} +gi_probes; + +layout(set = 0, binding = 12) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; + +layout(set = 0, binding = 13) uniform sampler linear_sampler_with_mipmaps; + +#ifdef ENABLE_SDFGI + +// SDFGI Integration on set 1 +#define SDFGI_MAX_CASCADES 8 + +struct SDFGIProbeCascadeData { + vec3 position; + float to_probe; + ivec3 probe_world_offset; + float to_cell; // 1/bounds * grid_size +}; + +layout(set = 1, binding = 0, std140) uniform SDFGI { + vec3 grid_size; + uint max_cascades; + + bool use_occlusion; + int probe_axis_size; + float probe_to_uvw; + float normal_bias; + + vec3 lightprobe_tex_pixel_size; + float energy; + + vec3 lightprobe_uv_offset; + float y_mult; + + vec3 occlusion_clamp; + uint pad3; + + vec3 occlusion_renormalize; + uint pad4; + + vec3 cascade_probe_size; + uint pad5; + + SDFGIProbeCascadeData cascades[SDFGI_MAX_CASCADES]; +} +sdfgi; + +layout(set = 1, binding = 1) uniform texture2DArray sdfgi_ambient_texture; + +layout(set = 1, binding = 2) uniform texture3D sdfgi_occlusion_texture; + +#endif //SDFGI + +layout(set = 0, binding = 14, std140) uniform Params { + vec2 fog_frustum_size_begin; + vec2 fog_frustum_size_end; + + float fog_frustum_end; + float z_near; + float z_far; + int filter_axis; + + ivec3 fog_volume_size; + uint directional_light_count; + + vec3 light_color; + float base_density; + + float detail_spread; + float gi_inject; + uint max_gi_probes; + uint cluster_type_size; + + vec2 screen_size; + uint cluster_shift; + uint cluster_width; + + uint max_cluster_element_count_div_32; + bool use_temporal_reprojection; + uint temporal_frame; + float temporal_blend; + + mat3x4 cam_rotation; + mat4 to_prev_view; +} +params; + +layout(set = 0, binding = 15) uniform texture3D prev_density_texture; + +float get_depth_at_pos(float cell_depth_size, int z) { + float d = float(z) * cell_depth_size + cell_depth_size * 0.5; //center of voxels + d = pow(d, params.detail_spread); + return params.fog_frustum_end * d; +} + +vec3 hash3f(uvec3 x) { + x = ((x >> 16) ^ x) * 0x45d9f3b; + x = ((x >> 16) ^ x) * 0x45d9f3b; + x = (x >> 16) ^ x; + return vec3(x & 0xFFFFF) / vec3(float(0xFFFFF)); +} + +float get_omni_attenuation(float distance, float inv_range, float decay) { + float nd = distance * inv_range; + nd *= nd; + nd *= nd; // nd^4 + nd = max(1.0 - nd, 0.0); + nd *= nd; // nd^2 + return nd * pow(max(distance, 0.0001), -decay); +} + +void cluster_get_item_range(uint p_offset, out uint item_min, out uint item_max, out uint item_from, out uint item_to) { + uint item_min_max = cluster_buffer.data[p_offset]; + item_min = item_min_max & 0xFFFF; + item_max = item_min_max >> 16; + ; + + item_from = item_min >> 5; + item_to = (item_max == 0) ? 0 : ((item_max - 1) >> 5) + 1; //side effect of how it is stored, as item_max 0 means no elements +} + +uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) { + int local_min = clamp(int(z_min) - int(i) * 32, 0, 31); + int mask_width = min(int(z_max) - int(z_min), 32 - local_min); + return bitfieldInsert(uint(0), uint(0xFFFFFFFF), local_min, mask_width); +} + +#define TEMPORAL_FRAMES 16 + +const vec3 halton_map[TEMPORAL_FRAMES] = vec3[]( + vec3(0.5, 0.33333333, 0.2), + vec3(0.25, 0.66666667, 0.4), + vec3(0.75, 0.11111111, 0.6), + vec3(0.125, 0.44444444, 0.8), + vec3(0.625, 0.77777778, 0.04), + vec3(0.375, 0.22222222, 0.24), + vec3(0.875, 0.55555556, 0.44), + vec3(0.0625, 0.88888889, 0.64), + vec3(0.5625, 0.03703704, 0.84), + vec3(0.3125, 0.37037037, 0.08), + vec3(0.8125, 0.7037037, 0.28), + vec3(0.1875, 0.14814815, 0.48), + vec3(0.6875, 0.48148148, 0.68), + vec3(0.4375, 0.81481481, 0.88), + vec3(0.9375, 0.25925926, 0.12), + vec3(0.03125, 0.59259259, 0.32)); + +void main() { + vec3 fog_cell_size = 1.0 / vec3(params.fog_volume_size); + +#ifdef MODE_DENSITY + + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + if (any(greaterThanEqual(pos, params.fog_volume_size))) { + return; //do not compute + } + + vec3 posf = vec3(pos); + + //posf += mix(vec3(0.0),vec3(1.0),0.3) * hash3f(uvec3(pos)) * 2.0 - 1.0; + + vec3 fog_unit_pos = posf * fog_cell_size + fog_cell_size * 0.5; //center of voxels + + uvec2 screen_pos = uvec2(fog_unit_pos.xy * params.screen_size); + uvec2 cluster_pos = screen_pos >> params.cluster_shift; + uint cluster_offset = (params.cluster_width * cluster_pos.y + cluster_pos.x) * (params.max_cluster_element_count_div_32 + 32); + //positions in screen are too spread apart, no hopes for optimizing with subgroups + + fog_unit_pos.z = pow(fog_unit_pos.z, params.detail_spread); + + vec3 view_pos; + view_pos.xy = (fog_unit_pos.xy * 2.0 - 1.0) * mix(params.fog_frustum_size_begin, params.fog_frustum_size_end, vec2(fog_unit_pos.z)); + view_pos.z = -params.fog_frustum_end * fog_unit_pos.z; + view_pos.y = -view_pos.y; + + vec4 reprojected_density = vec4(0.0); + float reproject_amount = 0.0; + + if (params.use_temporal_reprojection) { + vec3 prev_view = (params.to_prev_view * vec4(view_pos, 1.0)).xyz; + //undo transform into prev view + prev_view.y = -prev_view.y; + //z back to unit size + prev_view.z /= -params.fog_frustum_end; + //xy back to unit size + prev_view.xy /= mix(params.fog_frustum_size_begin, params.fog_frustum_size_end, vec2(prev_view.z)); + prev_view.xy = prev_view.xy * 0.5 + 0.5; + //z back to unspread value + prev_view.z = pow(prev_view.z, 1.0 / params.detail_spread); + + if (all(greaterThan(prev_view, vec3(0.0))) && all(lessThan(prev_view, vec3(1.0)))) { + //reprojectinon fits + + reprojected_density = textureLod(sampler3D(prev_density_texture, linear_sampler), prev_view, 0.0); + reproject_amount = params.temporal_blend; + + // Since we can reproject, now we must jitter the current view pos. + // This is done here because cells that can't reproject should not jitter. + + fog_unit_pos = posf * fog_cell_size + fog_cell_size * halton_map[params.temporal_frame]; //center of voxels, offset by halton table + + screen_pos = uvec2(fog_unit_pos.xy * params.screen_size); + cluster_pos = screen_pos >> params.cluster_shift; + cluster_offset = (params.cluster_width * cluster_pos.y + cluster_pos.x) * (params.max_cluster_element_count_div_32 + 32); + //positions in screen are too spread apart, no hopes for optimizing with subgroups + + fog_unit_pos.z = pow(fog_unit_pos.z, params.detail_spread); + + view_pos.xy = (fog_unit_pos.xy * 2.0 - 1.0) * mix(params.fog_frustum_size_begin, params.fog_frustum_size_end, vec2(fog_unit_pos.z)); + view_pos.z = -params.fog_frustum_end * fog_unit_pos.z; + view_pos.y = -view_pos.y; + } + } + + uint cluster_z = uint(clamp((abs(view_pos.z) / params.z_far) * 32.0, 0.0, 31.0)); + + vec3 total_light = params.light_color; + + float total_density = params.base_density; + float cell_depth_size = abs(view_pos.z - get_depth_at_pos(fog_cell_size.z, pos.z + 1)); + //compute directional lights + + for (uint i = 0; i < params.directional_light_count; i++) { + vec3 shadow_attenuation = vec3(1.0); + + if (directional_lights.data[i].shadow_enabled) { + float depth_z = -view_pos.z; + + vec4 pssm_coord; + vec3 shadow_color = directional_lights.data[i].shadow_color1.rgb; + vec3 light_dir = directional_lights.data[i].direction; + vec4 v = vec4(view_pos, 1.0); + float z_range; + + if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { + pssm_coord = (directional_lights.data[i].shadow_matrix1 * v); + pssm_coord /= pssm_coord.w; + z_range = directional_lights.data[i].shadow_z_range.x; + + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { + pssm_coord = (directional_lights.data[i].shadow_matrix2 * v); + pssm_coord /= pssm_coord.w; + z_range = directional_lights.data[i].shadow_z_range.y; + + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { + pssm_coord = (directional_lights.data[i].shadow_matrix3 * v); + pssm_coord /= pssm_coord.w; + z_range = directional_lights.data[i].shadow_z_range.z; + + } else { + pssm_coord = (directional_lights.data[i].shadow_matrix4 * v); + pssm_coord /= pssm_coord.w; + z_range = directional_lights.data[i].shadow_z_range.w; + } + + float depth = texture(sampler2D(directional_shadow_atlas, linear_sampler), pssm_coord.xy).r; + float shadow = exp(min(0.0, (depth - pssm_coord.z)) * z_range * directional_lights.data[i].shadow_volumetric_fog_fade); + + /* + //float shadow = textureProj(sampler2DShadow(directional_shadow_atlas,shadow_sampler),pssm_coord); + float shadow = 0.0; + for(float xi=-1;xi<=1;xi++) { + for(float yi=-1;yi<=1;yi++) { + vec2 ofs = vec2(xi,yi) * 1.5 * params.directional_shadow_pixel_size; + shadow += textureProj(sampler2DShadow(directional_shadow_atlas,shadow_sampler),pssm_coord + vec4(ofs,0.0,0.0)); + } + + } + + shadow /= 3.0 * 3.0; + +*/ + shadow = mix(shadow, 1.0, smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, view_pos.z)); //done with negative values for performance + + shadow_attenuation = mix(shadow_color, vec3(1.0), shadow); + } + + total_light += shadow_attenuation * directional_lights.data[i].color * directional_lights.data[i].energy / M_PI; + } + + //compute lights from cluster + + { //omni lights + + uint cluster_omni_offset = cluster_offset; + + uint item_min; + uint item_max; + uint item_from; + uint item_to; + + cluster_get_item_range(cluster_omni_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); + +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif + + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_omni_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif + + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + uint light_index = 32 * i + bit; + + //if (!bool(omni_omni_lights.data[light_index].mask & draw_call.layer_mask)) { + // continue; //not masked + //} + + vec3 light_pos = omni_lights.data[light_index].position; + float d = distance(omni_lights.data[light_index].position, view_pos); + float shadow_attenuation = 1.0; + + if (d * omni_lights.data[light_index].inv_radius < 1.0) { + float attenuation = get_omni_attenuation(d, omni_lights.data[light_index].inv_radius, omni_lights.data[light_index].attenuation); + + vec3 light = omni_lights.data[light_index].color / M_PI; + + if (omni_lights.data[light_index].shadow_enabled) { + //has shadow + vec4 v = vec4(view_pos, 1.0); + + vec4 splane = (omni_lights.data[light_index].shadow_matrix * v); + float shadow_len = length(splane.xyz); //need to remember shadow len from here + + splane.xyz = normalize(splane.xyz); + vec4 clamp_rect = omni_lights.data[light_index].atlas_rect; + + if (splane.z >= 0.0) { + splane.z += 1.0; + + clamp_rect.y += clamp_rect.w; + + } else { + splane.z = 1.0 - splane.z; + } + + splane.xy /= splane.z; + + splane.xy = splane.xy * 0.5 + 0.5; + splane.z = shadow_len * omni_lights.data[light_index].inv_radius; + splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; + splane.w = 1.0; //needed? i think it should be 1 already + + float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; + + shadow_attenuation = exp(min(0.0, (depth - splane.z)) / omni_lights.data[light_index].inv_radius * omni_lights.data[light_index].shadow_volumetric_fog_fade); + } + total_light += light * attenuation * shadow_attenuation; + } + } + } + } + + { //spot lights + + uint cluster_spot_offset = cluster_offset + params.cluster_type_size; + + uint item_min; + uint item_max; + uint item_from; + uint item_to; + + cluster_get_item_range(cluster_spot_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); + +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif + + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_spot_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif + + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + + //if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) { + // continue; //not masked + //} + + uint light_index = 32 * i + bit; + + vec3 light_pos = spot_lights.data[light_index].position; + vec3 light_rel_vec = spot_lights.data[light_index].position - view_pos; + float d = length(light_rel_vec); + float shadow_attenuation = 1.0; + + if (d * spot_lights.data[light_index].inv_radius < 1.0) { + float attenuation = get_omni_attenuation(d, spot_lights.data[light_index].inv_radius, spot_lights.data[light_index].attenuation); + + vec3 spot_dir = spot_lights.data[light_index].direction; + float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_lights.data[light_index].cone_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_lights.data[light_index].cone_angle)); + attenuation *= 1.0 - pow(spot_rim, spot_lights.data[light_index].cone_attenuation); + + vec3 light = spot_lights.data[light_index].color / M_PI; + + if (spot_lights.data[light_index].shadow_enabled) { + //has shadow + vec4 v = vec4(view_pos, 1.0); + + vec4 splane = (spot_lights.data[light_index].shadow_matrix * v); + splane /= splane.w; + + float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; + + shadow_attenuation = exp(min(0.0, (depth - splane.z)) / spot_lights.data[light_index].inv_radius * spot_lights.data[light_index].shadow_volumetric_fog_fade); + } + + total_light += light * attenuation * shadow_attenuation; + } + } + } + } + + vec3 world_pos = mat3(params.cam_rotation) * view_pos; + + for (uint i = 0; i < params.max_gi_probes; i++) { + vec3 position = (gi_probes.data[i].xform * vec4(world_pos, 1.0)).xyz; + + //this causes corrupted pixels, i have no idea why.. + if (all(bvec2(all(greaterThanEqual(position, vec3(0.0))), all(lessThan(position, gi_probes.data[i].bounds))))) { + position /= gi_probes.data[i].bounds; + + vec4 light = vec4(0.0); + for (uint j = 0; j < gi_probes.data[i].mipmaps; j++) { + vec4 slight = textureLod(sampler3D(gi_probe_textures[i], linear_sampler_with_mipmaps), position, float(j)); + float a = (1.0 - light.a); + light += a * slight; + } + + light.rgb *= gi_probes.data[i].dynamic_range * params.gi_inject; + + total_light += light.rgb; + } + } + + //sdfgi +#ifdef ENABLE_SDFGI + + { + float blend = -1.0; + vec3 ambient_total = vec3(0.0); + + for (uint i = 0; i < sdfgi.max_cascades; i++) { + vec3 cascade_pos = (world_pos - sdfgi.cascades[i].position) * sdfgi.cascades[i].to_probe; + + if (any(lessThan(cascade_pos, vec3(0.0))) || any(greaterThanEqual(cascade_pos, sdfgi.cascade_probe_size))) { + continue; //skip cascade + } + + vec3 base_pos = floor(cascade_pos); + ivec3 probe_base_pos = ivec3(base_pos); + + vec4 ambient_accum = vec4(0.0); + + ivec3 tex_pos = ivec3(probe_base_pos.xy, int(i)); + tex_pos.x += probe_base_pos.z * sdfgi.probe_axis_size; + + for (uint j = 0; j < 8; j++) { + ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); + ivec3 probe_posi = probe_base_pos; + probe_posi += offset; + + // Compute weight + + vec3 probe_pos = vec3(probe_posi); + vec3 probe_to_pos = cascade_pos - probe_pos; + + vec3 trilinear = vec3(1.0) - abs(probe_to_pos); + float weight = trilinear.x * trilinear.y * trilinear.z; + + // Compute lightprobe occlusion + + if (sdfgi.use_occlusion) { + ivec3 occ_indexv = abs((sdfgi.cascades[i].probe_world_offset + probe_posi) & ivec3(1, 1, 1)) * ivec3(1, 2, 4); + vec4 occ_mask = mix(vec4(0.0), vec4(1.0), equal(ivec4(occ_indexv.x | occ_indexv.y), ivec4(0, 1, 2, 3))); + + vec3 occ_pos = clamp(cascade_pos, probe_pos - sdfgi.occlusion_clamp, probe_pos + sdfgi.occlusion_clamp) * sdfgi.probe_to_uvw; + occ_pos.z += float(i); + if (occ_indexv.z != 0) { //z bit is on, means index is >=4, so make it switch to the other half of textures + occ_pos.x += 1.0; + } + + occ_pos *= sdfgi.occlusion_renormalize; + float occlusion = dot(textureLod(sampler3D(sdfgi_occlusion_texture, linear_sampler), occ_pos, 0.0), occ_mask); + + weight *= max(occlusion, 0.01); + } + + // Compute ambient texture position + + ivec3 uvw = tex_pos; + uvw.xy += offset.xy; + uvw.x += offset.z * sdfgi.probe_axis_size; + + vec3 ambient = texelFetch(sampler2DArray(sdfgi_ambient_texture, linear_sampler), uvw, 0).rgb; + + ambient_accum.rgb += ambient * weight; + ambient_accum.a += weight; + } + + if (ambient_accum.a > 0) { + ambient_accum.rgb /= ambient_accum.a; + } + ambient_total = ambient_accum.rgb; + break; + } + + total_light += ambient_total * params.gi_inject; + } + +#endif + + vec4 final_density = vec4(total_light, total_density); + + final_density = mix(final_density, reprojected_density, reproject_amount); + + imageStore(density_map, pos, final_density); +#endif + +#ifdef MODE_FOG + + ivec3 pos = ivec3(gl_GlobalInvocationID.xy, 0); + + if (any(greaterThanEqual(pos, params.fog_volume_size))) { + return; //do not compute + } + + vec4 fog_accum = vec4(0.0); + float prev_z = 0.0; + + float t = 1.0; + + for (int i = 0; i < params.fog_volume_size.z; i++) { + //compute fog position + ivec3 fog_pos = pos + ivec3(0, 0, i); + //get fog value + vec4 fog = imageLoad(density_map, fog_pos); + + //get depth at cell pos + float z = get_depth_at_pos(fog_cell_size.z, i); + //get distance from previous pos + float d = abs(prev_z - z); + //compute exinction based on beer's + float extinction = t * exp(-d * fog.a); + //compute alpha based on different of extinctions + float alpha = t - extinction; + //update extinction + t = extinction; + + fog_accum += vec4(fog.rgb * alpha, alpha); + prev_z = z; + + vec4 fog_value; + + if (fog_accum.a > 0.0) { + fog_value = vec4(fog_accum.rgb / fog_accum.a, 1.0 - t); + } else { + fog_value = vec4(0.0); + } + + imageStore(fog_map, fog_pos, fog_value); + } + +#endif + +#ifdef MODE_FILTER + + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + + const float gauss[7] = float[](0.071303, 0.131514, 0.189879, 0.214607, 0.189879, 0.131514, 0.071303); + + const ivec3 filter_dir[3] = ivec3[](ivec3(1, 0, 0), ivec3(0, 1, 0), ivec3(0, 0, 1)); + ivec3 offset = filter_dir[params.filter_axis]; + + vec4 accum = vec4(0.0); + for (int i = -3; i <= 3; i++) { + accum += imageLoad(source_map, clamp(pos + offset * i, ivec3(0), params.fog_volume_size - ivec3(1))) * gauss[i + 3]; + } + + imageStore(dest_map, pos, accum); + +#endif +} |