diff options
Diffstat (limited to 'servers/rendering/renderer_rd')
32 files changed, 2829 insertions, 869 deletions
diff --git a/servers/rendering/renderer_rd/effects_rd.cpp b/servers/rendering/renderer_rd/effects_rd.cpp index cbb000cb8c..a73eb3782c 100644 --- a/servers/rendering/renderer_rd/effects_rd.cpp +++ b/servers/rendering/renderer_rd/effects_rd.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -31,6 +31,7 @@ #include "effects_rd.h" #include "core/config/project_settings.h" +#include "core/math/math_defs.h" #include "core/os/os.h" #include "thirdparty/misc/cubemap_coeffs.h" @@ -125,6 +126,33 @@ RID EffectsRD::_get_compute_uniform_set_from_texture(RID p_texture, bool p_use_m return uniform_set; } +RID EffectsRD::_get_compute_uniform_set_from_texture_and_sampler(RID p_texture, RID p_sampler) { + TextureSamplerPair tsp; + tsp.texture = p_texture; + tsp.sampler = p_sampler; + + if (texture_sampler_to_compute_uniform_set_cache.has(tsp)) { + RID uniform_set = texture_sampler_to_compute_uniform_set_cache[tsp]; + if (RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + return uniform_set; + } + } + + Vector<RD::Uniform> uniforms; + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 0; + u.ids.push_back(p_sampler); + u.ids.push_back(p_texture); + uniforms.push_back(u); + //any thing with the same configuration (one texture in binding 0 for set 0), is good + RID uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.blur_shader.version_get_shader(ssao.blur_shader_version, 0), 0); + + texture_sampler_to_compute_uniform_set_cache[tsp] = uniform_set; + + return uniform_set; +} + RID EffectsRD::_get_compute_uniform_set_from_texture_pair(RID p_texture1, RID p_texture2, bool p_use_mipmaps) { TexturePair tp; tp.texture1 = p_texture1; @@ -951,157 +979,345 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_end(); } -void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, const Size2i &p_depth_buffer_size, RID p_depth_mipmaps_texture, const Vector<RID> &depth_mipmaps, RID p_ao1, bool p_half_size, RID p_ao2, RID p_upscale_buffer, float p_intensity, float p_radius, float p_bias, const CameraMatrix &p_projection, RS::EnvironmentSSAOQuality p_quality, RS::EnvironmentSSAOBlur p_blur, float p_edge_sharpness) { - //minify first - ssao.minify_push_constant.orthogonal = p_projection.is_orthogonal(); - ssao.minify_push_constant.z_near = p_projection.get_z_near(); - ssao.minify_push_constant.z_far = p_projection.get_z_far(); - ssao.minify_push_constant.pixel_size[0] = 1.0 / p_depth_buffer_size.x; - ssao.minify_push_constant.pixel_size[1] = 1.0 / p_depth_buffer_size.y; - ssao.minify_push_constant.source_size[0] = p_depth_buffer_size.x; - ssao.minify_push_constant.source_size[1] = p_depth_buffer_size.y; +void EffectsRD::gather_ssao(RD::ComputeListID p_compute_list, const Vector<RID> p_ao_slices, const SSAOSettings &p_settings, bool p_adaptive_base_pass) { + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, ssao.gather_uniform_set, 0); + if ((p_settings.quality == RS::ENV_SSAO_QUALITY_ULTRA) && !p_adaptive_base_pass) { + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, ssao.importance_map_uniform_set, 1); + } + + for (int i = 0; i < 4; i++) { + if ((p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) && ((i == 1) || (i == 2))) { + continue; + } + + ssao.gather_push_constant.pass_coord_offset[0] = i % 2; + ssao.gather_push_constant.pass_coord_offset[1] = i / 2; + ssao.gather_push_constant.pass_uv_offset[0] = ((i % 2) - 0.0) / p_settings.screen_size.x; + ssao.gather_push_constant.pass_uv_offset[1] = ((i / 2) - 0.0) / p_settings.screen_size.y; + ssao.gather_push_constant.pass = i; + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_uniform_set_from_image(p_ao_slices[i]), 2); + RD::get_singleton()->compute_list_set_push_constant(p_compute_list, &ssao.gather_push_constant, sizeof(SSAOGatherPushConstant)); + + int x_groups = ((p_settings.screen_size.x >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; + int y_groups = ((p_settings.screen_size.y >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; + RD::get_singleton()->compute_list_dispatch(p_compute_list, x_groups, y_groups, 1); + } + RD::get_singleton()->compute_list_add_barrier(p_compute_list); +} + +void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_depth_mipmaps_texture, const Vector<RID> &depth_mipmaps, RID p_ao, const Vector<RID> p_ao_slices, RID p_ao_pong, const Vector<RID> p_ao_pong_slices, RID p_upscale_buffer, RID p_importance_map, RID p_importance_map_pong, const CameraMatrix &p_projection, const SSAOSettings &p_settings, bool p_invalidate_uniform_sets) { RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); /* FIRST PASS */ - // Minify the depth buffer. - - for (int i = 0; i < depth_mipmaps.size(); i++) { - if (i == 0) { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_MINIFY_FIRST]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_buffer), 0); - } else { - if (i == 1) { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_MINIFY_MIPMAP]); + // Downsample and deinterleave the depth buffer. + { + if (p_invalidate_uniform_sets) { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 0; + u.ids.push_back(depth_mipmaps[1]); + uniforms.push_back(u); } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.ids.push_back(depth_mipmaps[2]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.ids.push_back(depth_mipmaps[3]); + uniforms.push_back(u); + } + ssao.downsample_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.downsample_shader.version_get_shader(ssao.downsample_shader_version, 2), 2); + } + + float depth_linearize_mul = -p_projection.matrix[3][2]; + float depth_linearize_add = p_projection.matrix[2][2]; + if (depth_linearize_mul * depth_linearize_add < 0) { + depth_linearize_add = -depth_linearize_add; + } + + ssao.downsample_push_constant.orthogonal = p_projection.is_orthogonal(); + ssao.downsample_push_constant.z_near = depth_linearize_mul; + ssao.downsample_push_constant.z_far = depth_linearize_add; + if (ssao.downsample_push_constant.orthogonal) { + ssao.downsample_push_constant.z_near = p_projection.get_z_near(); + ssao.downsample_push_constant.z_far = p_projection.get_z_far(); + } + ssao.downsample_push_constant.pixel_size[0] = 1.0 / p_settings.screen_size.x; + ssao.downsample_push_constant.pixel_size[1] = 1.0 / p_settings.screen_size.y; + ssao.downsample_push_constant.radius_sq = p_settings.radius * p_settings.radius; + + int downsample_pipeline = SSAO_DOWNSAMPLE; + if (p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) { + downsample_pipeline = SSAO_DOWNSAMPLE_HALF; + } else if (p_settings.quality > RS::ENV_SSAO_QUALITY_MEDIUM) { + downsample_pipeline = SSAO_DOWNSAMPLE_MIPMAP; + } - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(depth_mipmaps[i - 1]), 0); + if (p_settings.half_size) { + downsample_pipeline++; } - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(depth_mipmaps[i]), 1); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.minify_push_constant, sizeof(SSAOMinifyPushConstant)); - // shrink after set - ssao.minify_push_constant.source_size[0] = MAX(1, ssao.minify_push_constant.source_size[0] >> 1); - ssao.minify_push_constant.source_size[1] = MAX(1, ssao.minify_push_constant.source_size[1] >> 1); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[downsample_pipeline]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_buffer), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(depth_mipmaps[0]), 1); + if (p_settings.quality > RS::ENV_SSAO_QUALITY_MEDIUM) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, ssao.downsample_uniform_set, 2); + } + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.downsample_push_constant, sizeof(SSAODownsamplePushConstant)); - int x_groups = (ssao.minify_push_constant.source_size[0] - 1) / 8 + 1; - int y_groups = (ssao.minify_push_constant.source_size[1] - 1) / 8 + 1; + int x_groups = (MAX(1, p_settings.screen_size.x >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; + int y_groups = (MAX(1, p_settings.screen_size.y >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); } /* SECOND PASS */ - // Gather samples - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[(SSAO_GATHER_LOW + p_quality) + (p_half_size ? 4 : 0)]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_mipmaps_texture), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_ao1), 1); - if (!p_half_size) { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_buffer), 2); - } - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_normal_buffer), 3); - - ssao.gather_push_constant.screen_size[0] = p_depth_buffer_size.x; - ssao.gather_push_constant.screen_size[1] = p_depth_buffer_size.y; - if (p_half_size) { - ssao.gather_push_constant.screen_size[0] >>= 1; - ssao.gather_push_constant.screen_size[1] >>= 1; - } - ssao.gather_push_constant.z_far = p_projection.get_z_far(); - ssao.gather_push_constant.z_near = p_projection.get_z_near(); - ssao.gather_push_constant.orthogonal = p_projection.is_orthogonal(); - - ssao.gather_push_constant.proj_info[0] = -2.0f / (ssao.gather_push_constant.screen_size[0] * p_projection.matrix[0][0]); - ssao.gather_push_constant.proj_info[1] = -2.0f / (ssao.gather_push_constant.screen_size[1] * p_projection.matrix[1][1]); - ssao.gather_push_constant.proj_info[2] = (1.0f - p_projection.matrix[0][2]) / p_projection.matrix[0][0]; - ssao.gather_push_constant.proj_info[3] = (1.0f + p_projection.matrix[1][2]) / p_projection.matrix[1][1]; - //ssao.gather_push_constant.proj_info[2] = (1.0f - p_projection.matrix[0][2]) / p_projection.matrix[0][0]; - //ssao.gather_push_constant.proj_info[3] = -(1.0f + p_projection.matrix[1][2]) / p_projection.matrix[1][1]; - - ssao.gather_push_constant.radius = p_radius; - - ssao.gather_push_constant.proj_scale = float(p_projection.get_pixels_per_meter(ssao.gather_push_constant.screen_size[0])); - ssao.gather_push_constant.bias = p_bias; - ssao.gather_push_constant.intensity_div_r6 = p_intensity / pow(p_radius, 6.0f); - - ssao.gather_push_constant.pixel_size[0] = 1.0 / p_depth_buffer_size.x; - ssao.gather_push_constant.pixel_size[1] = 1.0 / p_depth_buffer_size.y; - - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.gather_push_constant, sizeof(SSAOGatherPushConstant)); + // Sample SSAO + { + ssao.gather_push_constant.screen_size[0] = p_settings.screen_size.x; + ssao.gather_push_constant.screen_size[1] = p_settings.screen_size.y; + + ssao.gather_push_constant.half_screen_pixel_size[0] = 1.0 / p_settings.half_screen_size.x; + ssao.gather_push_constant.half_screen_pixel_size[1] = 1.0 / p_settings.half_screen_size.y; + float tan_half_fov_x = 1.0 / p_projection.matrix[0][0]; + float tan_half_fov_y = 1.0 / p_projection.matrix[1][1]; + ssao.gather_push_constant.NDC_to_view_mul[0] = tan_half_fov_x * 2.0; + ssao.gather_push_constant.NDC_to_view_mul[1] = tan_half_fov_y * -2.0; + ssao.gather_push_constant.NDC_to_view_add[0] = tan_half_fov_x * -1.0; + ssao.gather_push_constant.NDC_to_view_add[1] = tan_half_fov_y; + ssao.gather_push_constant.is_orthogonal = p_projection.is_orthogonal(); + + ssao.gather_push_constant.half_screen_pixel_size_x025[0] = ssao.gather_push_constant.half_screen_pixel_size[0] * 0.25; + ssao.gather_push_constant.half_screen_pixel_size_x025[1] = ssao.gather_push_constant.half_screen_pixel_size[1] * 0.25; + + float radius_near_limit = (p_settings.radius * 1.2f); + if (p_settings.quality <= RS::ENV_SSAO_QUALITY_LOW) { + radius_near_limit *= 1.50f; + + if (p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) { + ssao.gather_push_constant.radius *= 0.8f; + } + if (p_settings.half_size) { + ssao.gather_push_constant.radius *= 0.5f; + } + } + radius_near_limit /= tan_half_fov_y; + ssao.gather_push_constant.radius = p_settings.radius; + ssao.gather_push_constant.intensity = p_settings.intensity; + ssao.gather_push_constant.shadow_power = p_settings.power; + ssao.gather_push_constant.shadow_clamp = 0.98; + ssao.gather_push_constant.fade_out_mul = -1.0 / (p_settings.fadeout_to - p_settings.fadeout_from); + ssao.gather_push_constant.fade_out_add = p_settings.fadeout_from / (p_settings.fadeout_to - p_settings.fadeout_from) + 1.0; + ssao.gather_push_constant.horizon_angle_threshold = p_settings.horizon; + ssao.gather_push_constant.inv_radius_near_limit = 1.0f / radius_near_limit; + ssao.gather_push_constant.neg_inv_radius = -1.0 / ssao.gather_push_constant.radius; + + ssao.gather_push_constant.load_counter_avg_div = 9.0 / float((p_settings.quarter_size.x) * (p_settings.quarter_size.y) * 255); + ssao.gather_push_constant.adaptive_sample_limit = p_settings.adaptive_target; + + ssao.gather_push_constant.detail_intensity = p_settings.detail; + ssao.gather_push_constant.quality = MAX(0, p_settings.quality - 1); + ssao.gather_push_constant.size_multiplier = p_settings.half_size ? 2 : 1; + + if (p_invalidate_uniform_sets) { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 0; + u.ids.push_back(ssao.mirror_sampler); + u.ids.push_back(p_depth_mipmaps_texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.ids.push_back(p_normal_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 2; + u.ids.push_back(ssao.gather_constants_buffer); + uniforms.push_back(u); + } + ssao.gather_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.gather_shader.version_get_shader(ssao.gather_shader_version, 0), 0); + } - int x_groups = (ssao.gather_push_constant.screen_size[0] - 1) / 8 + 1; - int y_groups = (ssao.gather_push_constant.screen_size[1] - 1) / 8 + 1; + if (p_invalidate_uniform_sets) { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 0; + u.ids.push_back(p_ao_pong); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 1; + u.ids.push_back(default_sampler); + u.ids.push_back(p_importance_map); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + u.ids.push_back(ssao.importance_map_load_counter); + uniforms.push_back(u); + } + ssao.importance_map_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.gather_shader.version_get_shader(ssao.gather_shader_version, 2), 1); + } - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); - RD::get_singleton()->compute_list_add_barrier(compute_list); + if (p_settings.quality == RS::ENV_SSAO_QUALITY_ULTRA) { + ssao.importance_map_push_constant.half_screen_pixel_size[0] = 1.0 / p_settings.half_screen_size.x; + ssao.importance_map_push_constant.half_screen_pixel_size[1] = 1.0 / p_settings.half_screen_size.y; + ssao.importance_map_push_constant.intensity = p_settings.intensity; + ssao.importance_map_push_constant.power = p_settings.power; + //base pass + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER_BASE]); + gather_ssao(compute_list, p_ao_pong_slices, p_settings, true); + //generate importance map + int x_groups = (p_settings.quarter_size.x - 1) / 8 + 1; + int y_groups = (p_settings.quarter_size.y - 1) / 8 + 1; + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GENERATE_IMPORTANCE_MAP]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao_pong), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map), 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); + RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + //process importance map A + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_PROCESS_IMPORTANCE_MAPA]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_importance_map), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map_pong), 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); + RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + //process Importance Map B + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_PROCESS_IMPORTANCE_MAPB]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_importance_map_pong), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map), 1); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, ssao.counter_uniform_set, 2); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); + RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); - /* THIRD PASS */ - // Blur horizontal - - ssao.blur_push_constant.edge_sharpness = p_edge_sharpness; - ssao.blur_push_constant.filter_scale = p_blur; - ssao.blur_push_constant.screen_size[0] = ssao.gather_push_constant.screen_size[0]; - ssao.blur_push_constant.screen_size[1] = ssao.gather_push_constant.screen_size[1]; - ssao.blur_push_constant.z_far = p_projection.get_z_far(); - ssao.blur_push_constant.z_near = p_projection.get_z_near(); - ssao.blur_push_constant.orthogonal = p_projection.is_orthogonal(); - ssao.blur_push_constant.axis[0] = 1; - ssao.blur_push_constant.axis[1] = 0; - - if (p_blur != RS::ENV_SSAO_BLUR_DISABLED) { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[p_half_size ? SSAO_BLUR_PASS_HALF : SSAO_BLUR_PASS]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao1), 0); - if (p_half_size) { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_mipmaps_texture), 1); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER_ADAPTIVE]); } else { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_buffer), 1); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER]); } - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_ao2), 3); - - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.blur_push_constant, sizeof(SSAOBlurPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); - RD::get_singleton()->compute_list_add_barrier(compute_list); - - /* THIRD PASS */ - // Blur vertical - - ssao.blur_push_constant.axis[0] = 0; - ssao.blur_push_constant.axis[1] = 1; + gather_ssao(compute_list, p_ao_slices, p_settings, false); + } - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao2), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_ao1), 3); + // /* THIRD PASS */ + // // Blur + // + { + ssao.blur_push_constant.edge_sharpness = 1.0 - p_settings.sharpness; + ssao.blur_push_constant.half_screen_pixel_size[0] = 1.0 / p_settings.half_screen_size.x; + ssao.blur_push_constant.half_screen_pixel_size[1] = 1.0 / p_settings.half_screen_size.y; + + int blur_passes = p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW ? p_settings.blur_passes : 1; + + for (int pass = 0; pass < blur_passes; pass++) { + int blur_pipeline = SSAO_BLUR_PASS; + if (p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW) { + if (pass < blur_passes - 2) { + blur_pipeline = SSAO_BLUR_PASS_WIDE; + } + blur_pipeline = SSAO_BLUR_PASS_SMART; + } - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.blur_push_constant, sizeof(SSAOBlurPushConstant)); + for (int i = 0; i < 4; i++) { + if ((p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) && ((i == 1) || (i == 2))) { + continue; + } + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[blur_pipeline]); + if (pass % 2 == 0) { + if (p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao_slices[i]), 0); + } else { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture_and_sampler(p_ao_slices[i], ssao.mirror_sampler), 0); + } + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_ao_pong_slices[i]), 1); + } else { + if (p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao_pong_slices[i]), 0); + } else { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture_and_sampler(p_ao_pong_slices[i], ssao.mirror_sampler), 0); + } + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_ao_slices[i]), 1); + } + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.blur_push_constant, sizeof(SSAOBlurPushConstant)); + + int x_groups = ((p_settings.screen_size.x >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; + int y_groups = ((p_settings.screen_size.y >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; + + RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + } - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + if (p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW) { + RD::get_singleton()->compute_list_add_barrier(compute_list); + } + } } - if (p_half_size) { //must upscale - /* FOURTH PASS */ - // upscale if half size - //back to full size - ssao.blur_push_constant.screen_size[0] = p_depth_buffer_size.x; - ssao.blur_push_constant.screen_size[1] = p_depth_buffer_size.y; + /* FOURTH PASS */ + // Interleave buffers + // back to full size + { + ssao.interleave_push_constant.inv_sharpness = 1.0 - p_settings.sharpness; + ssao.interleave_push_constant.pixel_size[0] = 1.0 / p_settings.screen_size.x; + ssao.interleave_push_constant.pixel_size[1] = 1.0 / p_settings.screen_size.y; + ssao.interleave_push_constant.size_modifier = uint32_t(p_settings.half_size ? 4 : 2); + + int interleave_pipeline = SSAO_INTERLEAVE_HALF; + if (p_settings.quality == RS::ENV_SSAO_QUALITY_LOW) { + interleave_pipeline = SSAO_INTERLEAVE; + } else if (p_settings.quality >= RS::ENV_SSAO_QUALITY_MEDIUM) { + interleave_pipeline = SSAO_INTERLEAVE_SMART; + } - RD::get_singleton()->compute_list_add_barrier(compute_list); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_BLUR_UPSCALE]); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[interleave_pipeline]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao1), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_upscale_buffer), 3); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_buffer), 1); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_mipmaps_texture), 2); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_upscale_buffer), 0); + if (p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW && p_settings.blur_passes % 2 == 0) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao), 1); + } else { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao_pong), 1); + } - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.blur_push_constant, sizeof(SSAOBlurPushConstant)); //not used but set anyway + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.interleave_push_constant, sizeof(SSAOInterleavePushConstant)); - x_groups = (p_depth_buffer_size.x - 1) / 8 + 1; - y_groups = (p_depth_buffer_size.y - 1) / 8 + 1; + int x_groups = (p_settings.screen_size.x - 1) / 8 + 1; + int y_groups = (p_settings.screen_size.y - 1) / 8 + 1; RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); } RD::get_singleton()->compute_list_end(); + + int zero[1] = { 0 }; + RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero, false); } void EffectsRD::roughness_limit(RID p_source_normal, RID p_roughness, const Size2i &p_size, float p_curve) { @@ -1486,57 +1702,142 @@ EffectsRD::EffectsRD() { { // Initialize ssao + + RD::SamplerState sampler; + sampler.mag_filter = RD::SAMPLER_FILTER_NEAREST; + sampler.min_filter = RD::SAMPLER_FILTER_NEAREST; + sampler.mip_filter = RD::SAMPLER_FILTER_NEAREST; + sampler.repeat_u = RD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT; + sampler.repeat_v = RD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT; + sampler.repeat_w = RD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT; + sampler.max_lod = 4; + + ssao.mirror_sampler = RD::get_singleton()->sampler_create(sampler); + uint32_t pipeline = 0; { Vector<String> ssao_modes; - ssao_modes.push_back("\n#define MINIFY_START\n"); ssao_modes.push_back("\n"); + ssao_modes.push_back("\n#define USE_HALF_SIZE\n"); + ssao_modes.push_back("\n#define GENERATE_MIPS\n"); + ssao_modes.push_back("\n#define GENERATE_MIPS\n#define USE_HALF_SIZE"); + ssao_modes.push_back("\n#define USE_HALF_BUFFERS\n"); + ssao_modes.push_back("\n#define USE_HALF_BUFFERS\n#define USE_HALF_SIZE"); - ssao.minify_shader.initialize(ssao_modes); + ssao.downsample_shader.initialize(ssao_modes); - ssao.minify_shader_version = ssao.minify_shader.version_create(); + ssao.downsample_shader_version = ssao.downsample_shader.version_create(); - for (int i = 0; i <= SSAO_MINIFY_MIPMAP; i++) { - ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.minify_shader.version_get_shader(ssao.minify_shader_version, i)); + for (int i = 0; i <= SSAO_DOWNSAMPLE_HALF_RES_HALF; i++) { + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.downsample_shader.version_get_shader(ssao.downsample_shader_version, i)); pipeline++; } } { Vector<String> ssao_modes; - ssao_modes.push_back("\n#define SSAO_QUALITY_LOW\n"); + ssao_modes.push_back("\n"); - ssao_modes.push_back("\n#define SSAO_QUALITY_HIGH\n"); - ssao_modes.push_back("\n#define SSAO_QUALITY_ULTRA\n"); - ssao_modes.push_back("\n#define SSAO_QUALITY_LOW\n#define USE_HALF_SIZE\n"); - ssao_modes.push_back("\n#define USE_HALF_SIZE\n"); - ssao_modes.push_back("\n#define SSAO_QUALITY_HIGH\n#define USE_HALF_SIZE\n"); - ssao_modes.push_back("\n#define SSAO_QUALITY_ULTRA\n#define USE_HALF_SIZE\n"); + ssao_modes.push_back("\n#define SSAO_BASE\n"); + ssao_modes.push_back("\n#define ADAPTIVE\n"); ssao.gather_shader.initialize(ssao_modes); ssao.gather_shader_version = ssao.gather_shader.version_create(); - for (int i = SSAO_GATHER_LOW; i <= SSAO_GATHER_ULTRA_HALF; i++) { - ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.gather_shader.version_get_shader(ssao.gather_shader_version, i - SSAO_GATHER_LOW)); + for (int i = SSAO_GATHER; i <= SSAO_GATHER_ADAPTIVE; i++) { + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.gather_shader.version_get_shader(ssao.gather_shader_version, i - SSAO_GATHER)); pipeline++; } + + ssao.gather_constants_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(SSAOGatherConstants)); + SSAOGatherConstants gather_constants; + + const int sub_pass_count = 5; + for (int pass = 0; pass < 4; pass++) { + for (int subPass = 0; subPass < sub_pass_count; subPass++) { + int a = pass; + int b = subPass; + + int spmap[5]{ 0, 1, 4, 3, 2 }; + b = spmap[subPass]; + + float ca, sa; + float angle0 = (float(a) + float(b) / float(sub_pass_count)) * Math_PI * 0.5f; + + ca = Math::cos(angle0); + sa = Math::sin(angle0); + + float scale = 1.0f + (a - 1.5f + (b - (sub_pass_count - 1.0f) * 0.5f) / float(sub_pass_count)) * 0.07f; + + gather_constants.rotation_matrices[pass * 20 + subPass * 4 + 0] = scale * ca; + gather_constants.rotation_matrices[pass * 20 + subPass * 4 + 1] = scale * -sa; + gather_constants.rotation_matrices[pass * 20 + subPass * 4 + 2] = -scale * sa; + gather_constants.rotation_matrices[pass * 20 + subPass * 4 + 3] = -scale * ca; + } + } + + RD::get_singleton()->buffer_update(ssao.gather_constants_buffer, 0, sizeof(SSAOGatherConstants), &gather_constants, false); } { Vector<String> ssao_modes; - ssao_modes.push_back("\n#define MODE_FULL_SIZE\n"); - ssao_modes.push_back("\n"); - ssao_modes.push_back("\n#define MODE_UPSCALE\n"); + ssao_modes.push_back("\n#define GENERATE_MAP\n"); + ssao_modes.push_back("\n#define PROCESS_MAPA\n"); + ssao_modes.push_back("\n#define PROCESS_MAPB\n"); + + ssao.importance_map_shader.initialize(ssao_modes); + + ssao.importance_map_shader_version = ssao.importance_map_shader.version_create(); + + for (int i = SSAO_GENERATE_IMPORTANCE_MAP; i <= SSAO_PROCESS_IMPORTANCE_MAPB; i++) { + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.importance_map_shader.version_get_shader(ssao.importance_map_shader_version, i - SSAO_GENERATE_IMPORTANCE_MAP)); + + pipeline++; + } + ssao.importance_map_load_counter = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t)); + int zero[1] = { 0 }; + RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero, false); + + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 0; + u.ids.push_back(ssao.importance_map_load_counter); + uniforms.push_back(u); + } + ssao.counter_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.importance_map_shader.version_get_shader(ssao.importance_map_shader_version, 2), 2); + } + { + Vector<String> ssao_modes; + ssao_modes.push_back("\n#define MODE_NON_SMART\n"); + ssao_modes.push_back("\n#define MODE_SMART\n"); + ssao_modes.push_back("\n#define MODE_WIDE\n"); ssao.blur_shader.initialize(ssao_modes); ssao.blur_shader_version = ssao.blur_shader.version_create(); - for (int i = SSAO_BLUR_PASS; i <= SSAO_BLUR_UPSCALE; i++) { + for (int i = SSAO_BLUR_PASS; i <= SSAO_BLUR_PASS_WIDE; i++) { ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.blur_shader.version_get_shader(ssao.blur_shader_version, i - SSAO_BLUR_PASS)); pipeline++; } } + { + Vector<String> ssao_modes; + ssao_modes.push_back("\n#define MODE_NON_SMART\n"); + ssao_modes.push_back("\n#define MODE_SMART\n"); + ssao_modes.push_back("\n#define MODE_HALF\n"); + + ssao.interleave_shader.initialize(ssao_modes); + + ssao.interleave_shader_version = ssao.interleave_shader.version_create(); + for (int i = SSAO_INTERLEAVE; i <= SSAO_INTERLEAVE_HALF; i++) { + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.interleave_shader.version_get_shader(ssao.interleave_shader_version, i - SSAO_INTERLEAVE)); + + pipeline++; + } + } ERR_FAIL_COND(pipeline != SSAO_MAX); } @@ -1777,6 +2078,10 @@ EffectsRD::~EffectsRD() { RD::get_singleton()->free(index_buffer); //array gets freed as dependency RD::get_singleton()->free(filter.coefficient_buffer); + RD::get_singleton()->free(ssao.mirror_sampler); + RD::get_singleton()->free(ssao.gather_constants_buffer); + RD::get_singleton()->free(ssao.importance_map_load_counter); + bokeh.shader.version_free(bokeh.shader_version); copy.shader.version_free(copy.shader_version); copy_to_fb.shader.version_free(copy_to_fb.shader_version); @@ -1791,7 +2096,9 @@ EffectsRD::~EffectsRD() { specular_merge.shader.version_free(specular_merge.shader_version); ssao.blur_shader.version_free(ssao.blur_shader_version); ssao.gather_shader.version_free(ssao.gather_shader_version); - ssao.minify_shader.version_free(ssao.minify_shader_version); + ssao.downsample_shader.version_free(ssao.downsample_shader_version); + ssao.interleave_shader.version_free(ssao.interleave_shader_version); + ssao.importance_map_shader.version_free(ssao.importance_map_shader_version); ssr.shader.version_free(ssr.shader_version); ssr_filter.shader.version_free(ssr_filter.shader_version); ssr_scale.shader.version_free(ssr_scale.shader_version); diff --git a/servers/rendering/renderer_rd/effects_rd.h b/servers/rendering/renderer_rd/effects_rd.h index 3afc111b9d..ad4a660944 100644 --- a/servers/rendering/renderer_rd/effects_rd.h +++ b/servers/rendering/renderer_rd/effects_rd.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -51,7 +51,9 @@ #include "servers/rendering/renderer_rd/shaders/specular_merge.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/ssao.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/ssao_blur.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/ssao_minify.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/ssao_downsample.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/ssao_interleave.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/subsurface_scattering.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/tonemap.glsl.gen.h" @@ -288,71 +290,121 @@ class EffectsRD { } bokeh; enum SSAOMode { - SSAO_MINIFY_FIRST, - SSAO_MINIFY_MIPMAP, - SSAO_GATHER_LOW, - SSAO_GATHER_MEDIUM, - SSAO_GATHER_HIGH, - SSAO_GATHER_ULTRA, - SSAO_GATHER_LOW_HALF, - SSAO_GATHER_MEDIUM_HALF, - SSAO_GATHER_HIGH_HALF, - SSAO_GATHER_ULTRA_HALF, + SSAO_DOWNSAMPLE, + SSAO_DOWNSAMPLE_HALF_RES, + SSAO_DOWNSAMPLE_MIPMAP, + SSAO_DOWNSAMPLE_MIPMAP_HALF_RES, + SSAO_DOWNSAMPLE_HALF, + SSAO_DOWNSAMPLE_HALF_RES_HALF, + SSAO_GATHER, + SSAO_GATHER_BASE, + SSAO_GATHER_ADAPTIVE, + SSAO_GENERATE_IMPORTANCE_MAP, + SSAO_PROCESS_IMPORTANCE_MAPA, + SSAO_PROCESS_IMPORTANCE_MAPB, SSAO_BLUR_PASS, - SSAO_BLUR_PASS_HALF, - SSAO_BLUR_UPSCALE, + SSAO_BLUR_PASS_SMART, + SSAO_BLUR_PASS_WIDE, + SSAO_INTERLEAVE, + SSAO_INTERLEAVE_SMART, + SSAO_INTERLEAVE_HALF, SSAO_MAX }; - struct SSAOMinifyPushConstant { + struct SSAODownsamplePushConstant { float pixel_size[2]; float z_far; float z_near; - int32_t source_size[2]; uint32_t orthogonal; - uint32_t pad; + float radius_sq; + uint32_t pad[2]; }; struct SSAOGatherPushConstant { int32_t screen_size[2]; - float z_far; - float z_near; + int pass; + int quality; + + float half_screen_pixel_size[2]; + int size_multiplier; + float detail_intensity; + + float NDC_to_view_mul[2]; + float NDC_to_view_add[2]; + + float pad[2]; + float half_screen_pixel_size_x025[2]; - uint32_t orthogonal; - float intensity_div_r6; float radius; - float bias; + float intensity; + float shadow_power; + float shadow_clamp; - float proj_info[4]; - float pixel_size[2]; - float proj_scale; - uint32_t pad; + float fade_out_mul; + float fade_out_add; + float horizon_angle_threshold; + float inv_radius_near_limit; + + bool is_orthogonal; + float neg_inv_radius; + float load_counter_avg_div; + float adaptive_sample_limit; + + int32_t pass_coord_offset[2]; + float pass_uv_offset[2]; + }; + + struct SSAOGatherConstants { + float rotation_matrices[80]; //5 vec4s * 4 + }; + + struct SSAOImportanceMapPushConstant { + float half_screen_pixel_size[2]; + float intensity; + float power; }; struct SSAOBlurPushConstant { float edge_sharpness; - int32_t filter_scale; - float z_far; - float z_near; - uint32_t orthogonal; - uint32_t pad[3]; - int32_t axis[2]; - int32_t screen_size[2]; + float pad; + float half_screen_pixel_size[2]; + }; + + struct SSAOInterleavePushConstant { + float inv_sharpness; + uint32_t size_modifier; + float pixel_size[2]; }; struct SSAO { - SSAOMinifyPushConstant minify_push_constant; - SsaoMinifyShaderRD minify_shader; - RID minify_shader_version; + SSAODownsamplePushConstant downsample_push_constant; + SsaoDownsampleShaderRD downsample_shader; + RID downsample_shader_version; + RID downsample_uniform_set; SSAOGatherPushConstant gather_push_constant; SsaoShaderRD gather_shader; RID gather_shader_version; + RID gather_uniform_set; + RID gather_constants_buffer; + bool gather_initialized = false; + + SSAOImportanceMapPushConstant importance_map_push_constant; + SsaoImportanceMapShaderRD importance_map_shader; + RID importance_map_shader_version; + RID importance_map_load_counter; + RID importance_map_uniform_set; + RID counter_uniform_set; SSAOBlurPushConstant blur_push_constant; SsaoBlurShaderRD blur_shader; RID blur_shader_version; + SSAOInterleavePushConstant interleave_push_constant; + SsaoInterleaveShaderRD interleave_shader; + RID interleave_shader_version; + + RID mirror_sampler; RID pipelines[SSAO_MAX]; } ssao; @@ -598,13 +650,27 @@ class EffectsRD { } }; + struct TextureSamplerPair { + RID texture; + RID sampler; + _FORCE_INLINE_ bool operator<(const TextureSamplerPair &p_pair) const { + if (texture == p_pair.texture) { + return sampler < p_pair.sampler; + } else { + return texture < p_pair.texture; + } + } + }; + Map<RID, RID> texture_to_compute_uniform_set_cache; Map<TexturePair, RID> texture_pair_to_compute_uniform_set_cache; Map<TexturePair, RID> image_pair_to_compute_uniform_set_cache; + Map<TextureSamplerPair, RID> texture_sampler_to_compute_uniform_set_cache; RID _get_uniform_set_from_image(RID p_texture); RID _get_uniform_set_from_texture(RID p_texture, bool p_use_mipmaps = false); RID _get_compute_uniform_set_from_texture(RID p_texture, bool p_use_mipmaps = false); + RID _get_compute_uniform_set_from_texture_and_sampler(RID p_texture, RID p_sampler); RID _get_compute_uniform_set_from_texture_pair(RID p_texture, RID p_texture2, bool p_use_mipmaps = false); RID _get_compute_uniform_set_from_image_pair(RID p_texture, RID p_texture2); @@ -664,9 +730,30 @@ public: Vector2i texture_size; }; + struct SSAOSettings { + float radius = 1.0; + float intensity = 2.0; + float power = 1.5; + float detail = 0.5; + float horizon = 0.06; + float sharpness = 0.98; + + RS::EnvironmentSSAOQuality quality = RS::ENV_SSAO_QUALITY_MEDIUM; + bool half_size = false; + float adaptive_target = 0.5; + int blur_passes = 2; + float fadeout_from = 50.0; + float fadeout_to = 300.0; + + Size2i screen_size = Size2i(); + Size2i half_screen_size = Size2i(); + Size2i quarter_size = Size2i(); + }; + void tonemapper(RID p_source_color, RID p_dst_framebuffer, const TonemapSettings &p_settings); - void generate_ssao(RID p_depth_buffer, RID p_normal_buffer, const Size2i &p_depth_buffer_size, RID p_depth_mipmaps_texture, const Vector<RID> &depth_mipmaps, RID p_ao1, bool p_half_size, RID p_ao2, RID p_upscale_buffer, float p_intensity, float p_radius, float p_bias, const CameraMatrix &p_projection, RS::EnvironmentSSAOQuality p_quality, RS::EnvironmentSSAOBlur p_blur, float p_edge_sharpness); + void gather_ssao(RD::ComputeListID p_compute_list, const Vector<RID> p_ao_slices, const SSAOSettings &p_settings, bool p_adaptive_base_pass); + void generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_depth_mipmaps_texture, const Vector<RID> &depth_mipmaps, RID p_ao, const Vector<RID> p_ao_slices, RID p_ao_pong, const Vector<RID> p_ao_pong_slices, RID p_upscale_buffer, RID p_importance_map, RID p_importance_map_pong, const CameraMatrix &p_projection, const SSAOSettings &p_settings, bool p_invalidate_uniform_sets); void roughness_limit(RID p_source_normal, RID p_roughness, const Size2i &p_size, float p_curve); void cubemap_downsample(RID p_source_cubemap, RID p_dest_cubemap, const Size2i &p_size); diff --git a/servers/rendering/renderer_rd/light_cluster_builder.cpp b/servers/rendering/renderer_rd/light_cluster_builder.cpp index b76b41ba26..bb807ca4ca 100644 --- a/servers/rendering/renderer_rd/light_cluster_builder.cpp +++ b/servers/rendering/renderer_rd/light_cluster_builder.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/servers/rendering/renderer_rd/light_cluster_builder.h b/servers/rendering/renderer_rd/light_cluster_builder.h index 0767a96817..8f77ece6f5 100644 --- a/servers/rendering/renderer_rd/light_cluster_builder.h +++ b/servers/rendering/renderer_rd/light_cluster_builder.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/servers/rendering/renderer_rd/pipeline_cache_rd.cpp b/servers/rendering/renderer_rd/pipeline_cache_rd.cpp index 8319e3eed1..b2b919c40e 100644 --- a/servers/rendering/renderer_rd/pipeline_cache_rd.cpp +++ b/servers/rendering/renderer_rd/pipeline_cache_rd.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/servers/rendering/renderer_rd/pipeline_cache_rd.h b/servers/rendering/renderer_rd/pipeline_cache_rd.h index 2f91c3c3b5..b1c8f21ecc 100644 --- a/servers/rendering/renderer_rd/pipeline_cache_rd.h +++ b/servers/rendering/renderer_rd/pipeline_cache_rd.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp b/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp index 8fa56b182c..508d56cfab 100644 --- a/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -601,10 +601,10 @@ void RendererCanvasRenderRD::_render_item(RD::DrawListID p_draw_list, const Item push_constant.flags |= FLAGS_NINEPACH_DRAW_CENTER; } - push_constant.ninepatch_margins[0] = np->margin[MARGIN_LEFT]; - push_constant.ninepatch_margins[1] = np->margin[MARGIN_TOP]; - push_constant.ninepatch_margins[2] = np->margin[MARGIN_RIGHT]; - push_constant.ninepatch_margins[3] = np->margin[MARGIN_BOTTOM]; + push_constant.ninepatch_margins[0] = np->margin[SIDE_LEFT]; + push_constant.ninepatch_margins[1] = np->margin[SIDE_TOP]; + push_constant.ninepatch_margins[2] = np->margin[SIDE_RIGHT]; + push_constant.ninepatch_margins[3] = np->margin[SIDE_BOTTOM]; RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(PushConstant)); RD::get_singleton()->draw_list_bind_index_array(p_draw_list, shader.quad_index_array); @@ -1689,7 +1689,7 @@ void RendererCanvasRenderRD::light_update_directional_shadow(RID p_rid, int p_sh to_light_xform[2] = from_pos; to_light_xform[1] = light_dir; - to_light_xform[0] = -light_dir.tangent(); + to_light_xform[0] = -light_dir.orthogonal(); to_light_xform.invert(); @@ -2488,8 +2488,8 @@ RendererCanvasRenderRD::RendererCanvasRenderRD(RendererStorageRD *p_storage) { actions.renames["COLOR"] = "color"; actions.renames["NORMAL"] = "normal"; - actions.renames["NORMALMAP"] = "normal_map"; - actions.renames["NORMALMAP_DEPTH"] = "normal_depth"; + actions.renames["NORMAL_MAP"] = "normal_map"; + actions.renames["NORMAL_MAP_DEPTH"] = "normal_depth"; actions.renames["TEXTURE"] = "color_texture"; actions.renames["TEXTURE_PIXEL_SIZE"] = "draw_data.color_texture_pixel_size"; actions.renames["NORMAL_TEXTURE"] = "normal_texture"; @@ -2517,7 +2517,7 @@ RendererCanvasRenderRD::RendererCanvasRenderRD(RendererStorageRD *p_storage) { actions.usage_defines["SCREEN_UV"] = "#define SCREEN_UV_USED\n"; actions.usage_defines["SCREEN_PIXEL_SIZE"] = "@SCREEN_UV"; actions.usage_defines["NORMAL"] = "#define NORMAL_USED\n"; - actions.usage_defines["NORMALMAP"] = "#define NORMALMAP_USED\n"; + actions.usage_defines["NORMAL_MAP"] = "#define NORMAL_MAP_USED\n"; actions.usage_defines["LIGHT"] = "#define LIGHT_SHADER_CODE_USED\n"; actions.render_mode_defines["skip_vertex_transform"] = "#define SKIP_TRANSFORM_USED\n"; diff --git a/servers/rendering/renderer_rd/renderer_canvas_render_rd.h b/servers/rendering/renderer_rd/renderer_canvas_render_rd.h index 203d7a4890..545eeaa106 100644 --- a/servers/rendering/renderer_rd/renderer_canvas_render_rd.h +++ b/servers/rendering/renderer_rd/renderer_canvas_render_rd.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/servers/rendering/renderer_rd/renderer_compositor_rd.cpp b/servers/rendering/renderer_rd/renderer_compositor_rd.cpp index 4ae7e68219..fb9c114ade 100644 --- a/servers/rendering/renderer_rd/renderer_compositor_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_compositor_rd.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/servers/rendering/renderer_rd/renderer_compositor_rd.h b/servers/rendering/renderer_rd/renderer_compositor_rd.h index 877f47d702..e1995872af 100644 --- a/servers/rendering/renderer_rd/renderer_compositor_rd.h +++ b/servers/rendering/renderer_rd/renderer_compositor_rd.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp b/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp index 5412688e3f..6881d7913f 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -112,7 +112,7 @@ void RendererSceneRenderForward::ShaderData::set_code(const String &p_code) { actions.usage_flag_pointers["TIME"] = &uses_time; actions.usage_flag_pointers["ROUGHNESS"] = &uses_roughness; actions.usage_flag_pointers["NORMAL"] = &uses_normal; - actions.usage_flag_pointers["NORMALMAP"] = &uses_normal; + actions.usage_flag_pointers["NORMAL_MAP"] = &uses_normal; actions.usage_flag_pointers["POINT_SIZE"] = &uses_point_size; actions.usage_flag_pointers["POINT_COORD"] = &uses_point_size; @@ -766,7 +766,7 @@ void RendererSceneRenderForward::_allocate_normal_roughness_texture(RenderBuffer tf.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; tf.width = rb->width; tf.height = rb->height; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; if (rb->msaa != RS::VIEWPORT_MSAA_DISABLED) { tf.usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; @@ -782,7 +782,7 @@ void RendererSceneRenderForward::_allocate_normal_roughness_texture(RenderBuffer fb.push_back(rb->normal_roughness_buffer); rb->depth_normal_roughness_fb = RD::get_singleton()->framebuffer_create(fb); } else { - tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; + tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; tf.samples = rb->texture_samples; rb->normal_roughness_buffer_msaa = RD::get_singleton()->texture_create(tf, RD::TextureView()); @@ -894,7 +894,7 @@ void RendererSceneRenderForward::_fill_instances(RenderList::Element **p_element } else { id.gi_offset = 0xFFFFFFFF; } - } else if (!e->instance->lightmap_sh.empty()) { + } else if (!e->instance->lightmap_sh.is_empty()) { if (lightmap_captures_used < scene_state.max_lightmap_captures) { const Color *src_capture = e->instance->lightmap_sh.ptr(); LightmapCaptureData &lcd = scene_state.lightmap_captures[lightmap_captures_used]; @@ -914,7 +914,7 @@ void RendererSceneRenderForward::_fill_instances(RenderList::Element **p_element id.flags |= INSTANCE_DATA_FLAG_USE_GI_BUFFERS; } - if (!low_end && !e->instance->gi_probe_instances.empty()) { + if (!low_end && !e->instance->gi_probe_instances.is_empty()) { uint32_t written = 0; for (int j = 0; j < e->instance->gi_probe_instances.size(); j++) { RID probe = e->instance->gi_probe_instances[j]; @@ -953,7 +953,7 @@ void RendererSceneRenderForward::_fill_instances(RenderList::Element **p_element /// RENDERING /// -void RendererSceneRenderForward::_render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderList::Element **p_elements, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe, const Vector2 &p_uv_offset) { +void RendererSceneRenderForward::_render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderList::Element **p_elements, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe, const Vector2 &p_uv_offset, const Plane &p_lod_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold) { RD::DrawListID draw_list = p_draw_list; RD::FramebufferFormatID framebuffer_format = p_framebuffer_Format; @@ -996,10 +996,13 @@ void RendererSceneRenderForward::_render_list(RenderingDevice::DrawListID p_draw //find primitive and vertex format RS::PrimitiveType primitive; + void *mesh_surface = nullptr; switch (e->instance->base_type) { case RS::INSTANCE_MESH: { - primitive = storage->mesh_surface_get_primitive(e->instance->base, e->surface_index); + mesh_surface = storage->mesh_get_surface(e->instance->base, e->surface_index); + + primitive = storage->mesh_surface_get_primitive(mesh_surface); if (e->instance->skeleton.is_valid()) { xforms_uniform_set = storage->skeleton_get_3d_uniform_set(e->instance->skeleton, default_shader_rd, TRANSFORMS_UNIFORM_SET); } @@ -1007,7 +1010,10 @@ void RendererSceneRenderForward::_render_list(RenderingDevice::DrawListID p_draw case RS::INSTANCE_MULTIMESH: { RID mesh = storage->multimesh_get_mesh(e->instance->base); ERR_CONTINUE(!mesh.is_valid()); //should be a bug - primitive = storage->mesh_surface_get_primitive(mesh, e->surface_index); + + mesh_surface = storage->mesh_get_surface(e->instance->base, e->surface_index); + + primitive = storage->mesh_surface_get_primitive(mesh_surface); xforms_uniform_set = storage->multimesh_get_3d_uniform_set(e->instance->base, default_shader_rd, TRANSFORMS_UNIFORM_SET); @@ -1018,7 +1024,10 @@ void RendererSceneRenderForward::_render_list(RenderingDevice::DrawListID p_draw case RS::INSTANCE_PARTICLES: { RID mesh = storage->particles_get_draw_pass_mesh(e->instance->base, e->surface_index >> 16); ERR_CONTINUE(!mesh.is_valid()); //should be a bug - primitive = storage->mesh_surface_get_primitive(mesh, e->surface_index & 0xFFFF); + + mesh_surface = storage->mesh_get_surface(e->instance->base, e->surface_index & 0xFFFF); + + primitive = storage->mesh_surface_get_primitive(mesh_surface); xforms_uniform_set = storage->particles_get_instance_buffer_uniform_set(e->instance->base, default_shader_rd, TRANSFORMS_UNIFORM_SET); @@ -1077,25 +1086,39 @@ void RendererSceneRenderForward::_render_list(RenderingDevice::DrawListID p_draw RID vertex_array_rd; RID index_array_rd; - switch (e->instance->base_type) { - case RS::INSTANCE_MESH: { - storage->mesh_surface_get_arrays_and_format(e->instance->base, e->surface_index, pipeline->get_vertex_input_mask(), vertex_array_rd, index_array_rd, vertex_format); - } break; - case RS::INSTANCE_MULTIMESH: { - RID mesh = storage->multimesh_get_mesh(e->instance->base); - ERR_CONTINUE(!mesh.is_valid()); //should be a bug - storage->mesh_surface_get_arrays_and_format(mesh, e->surface_index, pipeline->get_vertex_input_mask(), vertex_array_rd, index_array_rd, vertex_format); - } break; - case RS::INSTANCE_IMMEDIATE: { - ERR_CONTINUE(true); //should be a bug - } break; - case RS::INSTANCE_PARTICLES: { - RID mesh = storage->particles_get_draw_pass_mesh(e->instance->base, e->surface_index >> 16); - ERR_CONTINUE(!mesh.is_valid()); //should be a bug - storage->mesh_surface_get_arrays_and_format(mesh, e->surface_index & 0xFFFF, pipeline->get_vertex_input_mask(), vertex_array_rd, index_array_rd, vertex_format); - } break; - default: { - ERR_CONTINUE(true); //should be a bug + if (mesh_surface) { + if (e->instance->mesh_instance.is_valid()) { //skeleton and blend shape + storage->mesh_instance_surface_get_vertex_arrays_and_format(e->instance->mesh_instance, e->surface_index, pipeline->get_vertex_input_mask(), vertex_array_rd, vertex_format); + } else { + storage->mesh_surface_get_vertex_arrays_and_format(mesh_surface, pipeline->get_vertex_input_mask(), vertex_array_rd, vertex_format); + } + + if (p_screen_lod_threshold > 0.0 && storage->mesh_surface_has_lod(mesh_surface)) { + Vector3 support_min = e->instance->transformed_aabb.get_support(-p_lod_plane.normal); + Vector3 support_max = e->instance->transformed_aabb.get_support(p_lod_plane.normal); + + float distance_min = p_lod_plane.distance_to(support_min); + float distance_max = p_lod_plane.distance_to(support_max); + + float distance = 0.0; + + if (distance_min * distance_max < 0.0) { + //crossing plane + distance = 0.0; + } else if (distance_min >= 0.0) { + distance = distance_min; + } else if (distance_max <= 0.0) { + distance = -distance_max; + } + + Vector3 model_scale_vec = e->instance->transform.basis.get_scale_abs(); + + float model_scale = MAX(model_scale_vec.x, MAX(model_scale_vec.y, model_scale_vec.z)); + + index_array_rd = storage->mesh_surface_get_index_array_with_lod(mesh_surface, model_scale * e->instance->lod_bias, distance * p_lod_distance_multiplier, p_screen_lod_threshold); + + } else { + index_array_rd = storage->mesh_surface_get_index_array(mesh_surface); } } @@ -1498,7 +1521,7 @@ void RendererSceneRenderForward::_add_geometry_with_material(InstanceBase *p_ins e->geometry_index = p_geometry_index; e->material_index = e->material->index; e->uses_instancing = e->instance->base_type == RS::INSTANCE_MULTIMESH; - e->uses_lightmap = e->instance->lightmap != nullptr || !e->instance->lightmap_sh.empty(); + e->uses_lightmap = e->instance->lightmap != nullptr || !e->instance->lightmap_sh.is_empty(); e->uses_forward_gi = has_alpha && (e->instance->gi_probe_instances.size() || p_using_sdfgi); e->shader_index = e->shader_index; e->depth_layer = e->instance->depth_layer; @@ -1509,7 +1532,7 @@ void RendererSceneRenderForward::_add_geometry_with_material(InstanceBase *p_ins } } -void RendererSceneRenderForward::_fill_render_list(InstanceBase **p_cull_result, int p_cull_count, PassMode p_pass_mode, bool p_using_sdfgi) { +void RendererSceneRenderForward::_fill_render_list(const PagedArray<InstanceBase *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi) { scene_state.current_shader_index = 0; scene_state.current_material_index = 0; scene_state.used_sss = false; @@ -1517,12 +1540,19 @@ void RendererSceneRenderForward::_fill_render_list(InstanceBase **p_cull_result, scene_state.used_normal_texture = false; scene_state.used_depth_texture = false; + Plane near_plane(p_cam_transform.origin, -p_cam_transform.basis.get_axis(Vector3::AXIS_Z)); + near_plane.d += p_cam_projection.get_z_near(); + float z_max = p_cam_projection.get_z_far() - p_cam_projection.get_z_near(); + uint32_t geometry_index = 0; //fill list - for (int i = 0; i < p_cull_count; i++) { - InstanceBase *inst = p_cull_result[i]; + for (int i = 0; i < (int)p_instances.size(); i++) { + InstanceBase *inst = p_instances[i]; + + inst->depth = near_plane.distance_to(inst->transform.origin); + inst->depth_layer = CLAMP(int(inst->depth * 16 / z_max), 0, 15); //add geometry for drawing switch (inst->base_type) { @@ -1612,14 +1642,14 @@ void RendererSceneRenderForward::_fill_render_list(InstanceBase **p_cull_result, } } -void RendererSceneRenderForward::_setup_lightmaps(InstanceBase **p_lightmap_cull_result, int p_lightmap_cull_count, const Transform &p_cam_transform) { +void RendererSceneRenderForward::_setup_lightmaps(const PagedArray<InstanceBase *> &p_lightmaps, const Transform &p_cam_transform) { uint32_t lightmaps_used = 0; - for (int i = 0; i < p_lightmap_cull_count; i++) { + for (int i = 0; i < (int)p_lightmaps.size(); i++) { if (i >= (int)scene_state.max_lightmaps) { break; } - InstanceBase *lm = p_lightmap_cull_result[i]; + InstanceBase *lm = p_lightmaps[i]; Basis to_lm = lm->transform.basis.inverse() * p_cam_transform.basis; to_lm = to_lm.inverse().transposed(); //will transform normals RendererStorageRD::store_transform_3x3(to_lm, scene_state.lightmaps[i].normal_xform); @@ -1631,7 +1661,7 @@ void RendererSceneRenderForward::_setup_lightmaps(InstanceBase **p_lightmap_cull } } -void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, InstanceBase **p_cull_result, int p_cull_count, int p_directional_light_count, RID *p_gi_probe_cull_result, int p_gi_probe_cull_count, InstanceBase **p_lightmap_cull_result, int p_lightmap_cull_count, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color) { +void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<InstanceBase *> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_screen_lod_threshold) { RenderBufferDataForward *render_buffer = nullptr; if (p_render_buffer.is_valid()) { render_buffer = (RenderBufferDataForward *)render_buffers_get_data(p_render_buffer); @@ -1650,6 +1680,13 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf scene_state.ubo.reflection_multiplier = 1.0; } + float lod_distance_multiplier = p_cam_projection.get_lod_multiplier(); + Plane lod_camera_plane(p_cam_transform.get_origin(), -p_cam_transform.basis.get_axis(Vector3::AXIS_Z)); + + if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_DISABLE_LOD) { + p_screen_lod_threshold = 0.0; + } + //scene_state.ubo.subsurface_scatter_width = subsurface_scatter_size; Vector2 vp_he = p_cam_projection.get_viewport_half_extents(); @@ -1679,7 +1716,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf opaque_framebuffer = render_buffer->color_fb; - if (!low_end && p_gi_probe_cull_count > 0) { + if (!low_end && p_gi_probes.size() > 0) { using_giprobe = true; render_buffer->ensure_gi(); } @@ -1746,13 +1783,13 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf ERR_FAIL(); //bug? } - _setup_lightmaps(p_lightmap_cull_result, p_lightmap_cull_count, p_cam_transform); + _setup_lightmaps(p_lightmaps, p_cam_transform); _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); _update_render_base_uniform_set(); //may have changed due to the above (light buffer enlarged, as an example) render_list.clear(); - _fill_render_list(p_cull_result, p_cull_count, PASS_MODE_COLOR, using_sdfgi); + _fill_render_list(p_instances, PASS_MODE_COLOR, p_cam_projection, p_cam_transform, using_sdfgi); bool using_sss = !low_end && render_buffer && scene_state.used_sss && sub_surface_scattering_get_quality() != RS::SUB_SURFACE_SCATTERING_QUALITY_DISABLED; @@ -1834,8 +1871,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf clear_color = p_default_bg_color; } - RID rp_uniform_set = _setup_render_pass_uniform_set(p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_gi_probe_cull_result, p_gi_probe_cull_count); - render_list.sort_by_key(false); _fill_instances(render_list.elements, render_list.element_count, false, false, using_sdfgi || using_giprobe); @@ -1850,9 +1885,11 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf if (depth_pre_pass) { //depth pre pass RENDER_TIMESTAMP("Render Depth Pre-Pass"); + RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>()); + bool finish_depth = using_ssao || using_sdfgi || using_giprobe; RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(depth_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, finish_depth ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE, depth_pass_clear); - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(depth_framebuffer), render_list.elements, render_list.element_count, false, depth_pass_mode, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME); + _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(depth_framebuffer), render_list.elements, render_list.element_count, false, depth_pass_mode, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); RD::get_singleton()->draw_list_end(); if (render_buffer && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { @@ -1873,13 +1910,15 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf } if (using_sdfgi || using_giprobe) { - _process_gi(p_render_buffer, render_buffer->normal_roughness_buffer, render_buffer->ambient_buffer, render_buffer->reflection_buffer, render_buffer->giprobe_buffer, p_environment, p_cam_projection, p_cam_transform, p_gi_probe_cull_result, p_gi_probe_cull_count); + _process_gi(p_render_buffer, render_buffer->normal_roughness_buffer, render_buffer->ambient_buffer, render_buffer->reflection_buffer, render_buffer->giprobe_buffer, p_environment, p_cam_projection, p_cam_transform, p_gi_probes); } _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), p_render_buffer.is_valid()); RENDER_TIMESTAMP("Render Opaque Pass"); + RID rp_uniform_set = _setup_render_pass_uniform_set(p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_gi_probes); + bool can_continue_color = !scene_state.used_screen_texture && !using_ssr && !using_sss; bool can_continue_depth = !scene_state.used_depth_texture && !using_ssr && !using_sss; @@ -1900,7 +1939,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf RID framebuffer = using_separate_specular ? opaque_specular_framebuffer : opaque_framebuffer; RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, depth_pre_pass ? (continue_depth ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CONTINUE) : RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0); - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(framebuffer), render_list.elements, render_list.element_count, false, using_separate_specular ? PASS_MODE_COLOR_SPECULAR : PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME); + _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(framebuffer), render_list.elements, render_list.element_count, false, using_separate_specular ? PASS_MODE_COLOR_SPECULAR : PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); RD::get_singleton()->draw_list_end(); if (will_continue_color && using_separate_specular) { @@ -1919,8 +1958,8 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf dc.set_depth_correction(true); CameraMatrix cm = (dc * p_cam_projection) * CameraMatrix(p_cam_transform.affine_inverse()); RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(opaque_framebuffer, RD::INITIAL_ACTION_CONTINUE, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ); - for (int i = 0; i < p_gi_probe_cull_count; i++) { - _debug_giprobe(p_gi_probe_cull_result[i], draw_list, opaque_framebuffer, cm, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_LIGHTING, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_EMISSION, 1.0); + for (int i = 0; i < (int)p_gi_probes.size(); i++) { + _debug_giprobe(p_gi_probes[i], draw_list, opaque_framebuffer, cm, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_LIGHTING, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_EMISSION, 1.0); } RD::get_singleton()->draw_list_end(); } @@ -1988,7 +2027,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf { RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(alpha_framebuffer, can_continue_color ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, can_continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ); - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(alpha_framebuffer), &render_list.elements[render_list.max_elements - render_list.alpha_element_count], render_list.alpha_element_count, false, PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME); + _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(alpha_framebuffer), &render_list.elements[render_list.max_elements - render_list.alpha_element_count], render_list.alpha_element_count, false, PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); RD::get_singleton()->draw_list_end(); } @@ -1997,7 +2036,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf } } -void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, InstanceBase **p_cull_result, int p_cull_count, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake) { +void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, const PagedArray<InstanceBase *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold) { RENDER_TIMESTAMP("Setup Rendering Shadow"); _update_render_base_uniform_set(); @@ -2008,13 +2047,17 @@ void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, InstanceBase _setup_environment(RID(), RID(), p_projection, p_transform, RID(), true, Vector2(1, 1), RID(), true, Color(), 0, p_zfar, false, p_use_pancake); + if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_DISABLE_LOD) { + p_screen_lod_threshold = 0.0; + } + render_list.clear(); PassMode pass_mode = p_use_dp ? PASS_MODE_SHADOW_DP : PASS_MODE_SHADOW; - _fill_render_list(p_cull_result, p_cull_count, pass_mode); + _fill_render_list(p_instances, pass_mode, p_projection, p_transform); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), nullptr, 0); + RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>()); RENDER_TIMESTAMP("Render Shadow"); @@ -2025,12 +2068,12 @@ void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, InstanceBase { //regular forward for now RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ); - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), render_list.elements, render_list.element_count, p_use_dp_flip, pass_mode, true, rp_uniform_set); + _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), render_list.elements, render_list.element_count, p_use_dp_flip, pass_mode, true, rp_uniform_set, false, Vector2(), p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold); RD::get_singleton()->draw_list_end(); } } -void RendererSceneRenderForward::_render_particle_collider_heightfield(RID p_fb, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, InstanceBase **p_cull_result, int p_cull_count) { +void RendererSceneRenderForward::_render_particle_collider_heightfield(RID p_fb, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, const PagedArray<InstanceBase *> &p_instances) { RENDER_TIMESTAMP("Setup Render Collider Heightfield"); _update_render_base_uniform_set(); @@ -2045,9 +2088,9 @@ void RendererSceneRenderForward::_render_particle_collider_heightfield(RID p_fb, PassMode pass_mode = PASS_MODE_SHADOW; - _fill_render_list(p_cull_result, p_cull_count, pass_mode); + _fill_render_list(p_instances, pass_mode, p_cam_projection, p_cam_transform); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), nullptr, 0); + RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>()); RENDER_TIMESTAMP("Render Collider Heightield"); @@ -2063,7 +2106,7 @@ void RendererSceneRenderForward::_render_particle_collider_heightfield(RID p_fb, } } -void RendererSceneRenderForward::_render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, InstanceBase **p_cull_result, int p_cull_count, RID p_framebuffer, const Rect2i &p_region) { +void RendererSceneRenderForward::_render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, RID p_framebuffer, const Rect2i &p_region) { RENDER_TIMESTAMP("Setup Rendering Material"); _update_render_base_uniform_set(); @@ -2078,9 +2121,9 @@ void RendererSceneRenderForward::_render_material(const Transform &p_cam_transfo render_list.clear(); PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL; - _fill_render_list(p_cull_result, p_cull_count, pass_mode); + _fill_render_list(p_instances, pass_mode, p_cam_projection, p_cam_transform); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), nullptr, 0); + RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>()); RENDER_TIMESTAMP("Render Material"); @@ -2102,7 +2145,7 @@ void RendererSceneRenderForward::_render_material(const Transform &p_cam_transfo } } -void RendererSceneRenderForward::_render_uv2(InstanceBase **p_cull_result, int p_cull_count, RID p_framebuffer, const Rect2i &p_region) { +void RendererSceneRenderForward::_render_uv2(const PagedArray<InstanceBase *> &p_instances, RID p_framebuffer, const Rect2i &p_region) { RENDER_TIMESTAMP("Setup Rendering UV2"); _update_render_base_uniform_set(); @@ -2117,9 +2160,9 @@ void RendererSceneRenderForward::_render_uv2(InstanceBase **p_cull_result, int p render_list.clear(); PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL; - _fill_render_list(p_cull_result, p_cull_count, pass_mode); + _fill_render_list(p_instances, pass_mode, CameraMatrix(), Transform()); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), nullptr, 0); + RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>()); RENDER_TIMESTAMP("Render Material"); @@ -2163,7 +2206,7 @@ void RendererSceneRenderForward::_render_uv2(InstanceBase **p_cull_result, int p } } -void RendererSceneRenderForward::_render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, InstanceBase **p_cull_result, int p_cull_count, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) { +void RendererSceneRenderForward::_render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<InstanceBase *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) { RENDER_TIMESTAMP("Render SDFGI"); _update_render_base_uniform_set(); @@ -2175,7 +2218,7 @@ void RendererSceneRenderForward::_render_sdfgi(RID p_render_buffers, const Vecto render_list.clear(); PassMode pass_mode = PASS_MODE_SDF; - _fill_render_list(p_cull_result, p_cull_count, pass_mode); + _fill_render_list(p_instances, pass_mode, CameraMatrix(), Transform()); render_list.sort_by_key(false); _fill_instances(render_list.elements, render_list.element_count, true); @@ -2419,7 +2462,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { } } -RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID *p_gi_probe_cull_result, int p_gi_probe_cull_count) { +RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, const PagedArray<RID> &p_gi_probes) { if (render_pass_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(render_pass_uniform_set)) { RD::get_singleton()->free(render_pass_uniform_set); } @@ -2482,8 +2525,8 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff u.ids.resize(MAX_GI_PROBES); RID default_tex = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE); for (int i = 0; i < MAX_GI_PROBES; i++) { - if (i < p_gi_probe_cull_count) { - RID tex = gi_probe_instance_get_texture(p_gi_probe_cull_result[i]); + if (i < (int)p_gi_probes.size()) { + RID tex = gi_probe_instance_get_texture(p_gi_probes[i]); if (!tex.is_valid()) { tex = default_tex; } @@ -2500,7 +2543,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff RD::Uniform u; u.binding = 4; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - RID texture = false && rb && rb->depth.is_valid() ? rb->depth : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE); + RID texture = (false && rb && rb->depth.is_valid()) ? rb->depth : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE); u.ids.push_back(texture); uniforms.push_back(u); } @@ -2578,7 +2621,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff RD::Uniform u; u.binding = 12; u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.ids.push_back(rb ? render_buffers_get_default_gi_probe_buffer() : render_buffers_get_gi_probe_buffer(p_render_buffers)); + u.ids.push_back(rb ? render_buffers_get_gi_probe_buffer(p_render_buffers) : render_buffers_get_default_gi_probe_buffer()); uniforms.push_back(u); } { @@ -2684,7 +2727,7 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed uniforms.push_back(u); } - sdfgi_pass_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, default_shader_rd, RENDER_PASS_UNIFORM_SET); + sdfgi_pass_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, default_shader_sdfgi_rd, RENDER_PASS_UNIFORM_SET); return sdfgi_pass_uniform_set; } @@ -2826,8 +2869,8 @@ RendererSceneRenderForward::RendererSceneRenderForward(RendererStorageRD *p_stor actions.renames["FRAGCOORD"] = "gl_FragCoord"; actions.renames["FRONT_FACING"] = "gl_FrontFacing"; - actions.renames["NORMALMAP"] = "normalmap"; - actions.renames["NORMALMAP_DEPTH"] = "normaldepth"; + actions.renames["NORMAL_MAP"] = "normal_map"; + actions.renames["NORMAL_MAP_DEPTH"] = "normal_map_depth"; actions.renames["ALBEDO"] = "albedo"; actions.renames["ALPHA"] = "alpha"; actions.renames["METALLIC"] = "metallic"; @@ -2875,6 +2918,7 @@ RendererSceneRenderForward::RendererSceneRenderForward(RendererStorageRD *p_stor actions.renames["DIFFUSE_LIGHT"] = "diffuse_light"; actions.renames["SPECULAR_LIGHT"] = "specular_light"; + actions.usage_defines["NORMAL"] = "#define NORMAL_USED\n"; actions.usage_defines["TANGENT"] = "#define TANGENT_USED\n"; actions.usage_defines["BINORMAL"] = "@TANGENT"; actions.usage_defines["RIM"] = "#define LIGHT_RIM_USED\n"; @@ -2893,8 +2937,8 @@ RendererSceneRenderForward::RendererSceneRenderForward(RendererStorageRD *p_stor actions.usage_defines["CUSTOM1"] = "#define CUSTOM1\n"; actions.usage_defines["CUSTOM2"] = "#define CUSTOM2\n"; actions.usage_defines["CUSTOM3"] = "#define CUSTOM3\n"; - actions.usage_defines["NORMALMAP"] = "#define NORMALMAP_USED\n"; - actions.usage_defines["NORMALMAP_DEPTH"] = "@NORMALMAP"; + actions.usage_defines["NORMAL_MAP"] = "#define NORMAL_MAP_USED\n"; + actions.usage_defines["NORMAL_MAP_DEPTH"] = "@NORMAL_MAP"; actions.usage_defines["COLOR"] = "#define COLOR_USED\n"; actions.usage_defines["INSTANCE_CUSTOM"] = "#define ENABLE_INSTANCE_CUSTOM\n"; actions.usage_defines["POSITION"] = "#define OVERRIDE_POSITION\n"; diff --git a/servers/rendering/renderer_rd/renderer_scene_render_forward.h b/servers/rendering/renderer_rd/renderer_scene_render_forward.h index 6d76d5f0eb..4b37f4a391 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_forward.h +++ b/servers/rendering/renderer_rd/renderer_scene_render_forward.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -266,7 +266,7 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { void _update_render_base_uniform_set(); RID _setup_sdfgi_render_pass_uniform_set(RID p_albedo_texture, RID p_emission_texture, RID p_emission_aniso_texture, RID p_geom_facing_texture); - RID _setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID *p_gi_probe_cull_result, int p_gi_probe_cull_count); + RID _setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, const PagedArray<RID> &p_gi_probes); struct LightmapData { float normal_xform[12]; @@ -567,26 +567,26 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { }; void _setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2 &p_screen_pixel_size, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false); - void _setup_lightmaps(InstanceBase **p_lightmap_cull_result, int p_lightmap_cull_count, const Transform &p_cam_transform); + void _setup_lightmaps(const PagedArray<InstanceBase *> &p_lightmaps, const Transform &p_cam_transform); void _fill_instances(RenderList::Element **p_elements, int p_element_count, bool p_for_depth, bool p_has_sdfgi = false, bool p_has_opaque_gi = false); - void _render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderList::Element **p_elements, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2()); + void _render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderList::Element **p_elements, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), const Plane &p_lod_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0); _FORCE_INLINE_ void _add_geometry(InstanceBase *p_instance, uint32_t p_surface, RID p_material, PassMode p_pass_mode, uint32_t p_geometry_index, bool p_using_sdfgi = false); _FORCE_INLINE_ void _add_geometry_with_material(InstanceBase *p_instance, uint32_t p_surface, MaterialData *p_material, RID p_material_rid, PassMode p_pass_mode, uint32_t p_geometry_index, bool p_using_sdfgi = false); - void _fill_render_list(InstanceBase **p_cull_result, int p_cull_count, PassMode p_pass_mode, bool p_using_sdfgi = false); + void _fill_render_list(const PagedArray<InstanceBase *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi = false); Map<Size2i, RID> sdfgi_framebuffer_size_cache; bool low_end = false; protected: - virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, InstanceBase **p_cull_result, int p_cull_count, int p_directional_light_count, RID *p_gi_probe_cull_result, int p_gi_probe_cull_count, InstanceBase **p_lightmap_cull_result, int p_lightmap_cull_count, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color); - virtual void _render_shadow(RID p_framebuffer, InstanceBase **p_cull_result, int p_cull_count, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake); - virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, InstanceBase **p_cull_result, int p_cull_count, RID p_framebuffer, const Rect2i &p_region); - virtual void _render_uv2(InstanceBase **p_cull_result, int p_cull_count, RID p_framebuffer, const Rect2i &p_region); - virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, InstanceBase **p_cull_result, int p_cull_count, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture); - virtual void _render_particle_collider_heightfield(RID p_fb, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, InstanceBase **p_cull_result, int p_cull_count); + virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<InstanceBase *> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_lod_threshold); + virtual void _render_shadow(RID p_framebuffer, const PagedArray<InstanceBase *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0); + virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, RID p_framebuffer, const Rect2i &p_region); + virtual void _render_uv2(const PagedArray<InstanceBase *> &p_instances, RID p_framebuffer, const Rect2i &p_region); + virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<InstanceBase *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture); + virtual void _render_particle_collider_heightfield(RID p_fb, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, const PagedArray<InstanceBase *> &p_instances); public: virtual void set_time(double p_time, double p_step); diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp index f880eb7d8a..8e55dea2b1 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -1153,7 +1153,7 @@ void RendererSceneRenderRD::_sdfgi_update_cascades(RID p_render_buffers) { RD::get_singleton()->buffer_update(rb->sdfgi->cascades_ubo, 0, sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES, cascade_data, true); } -void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_environment, const RID *p_directional_light_instances, uint32_t p_directional_light_count, const RID *p_positional_light_instances, uint32_t p_positional_light_count) { +void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_environment, const Vector<RID> &p_directional_lights, const RID *p_positional_light_instances, uint32_t p_positional_light_count) { RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(rb == nullptr); if (rb->sdfgi == nullptr) { @@ -1179,12 +1179,12 @@ void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_envi SDGIShader::Light lights[SDFGI::MAX_DYNAMIC_LIGHTS]; uint32_t idx = 0; - for (uint32_t j = 0; j < p_directional_light_count; j++) { + for (uint32_t j = 0; j < (uint32_t)p_directional_lights.size(); j++) { if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { break; } - LightInstance *li = light_instance_owner.getornull(p_directional_light_instances[j]); + LightInstance *li = light_instance_owner.getornull(p_directional_lights[j]); ERR_CONTINUE(!li); if (storage->light_directional_is_sky_only(li->light)) { @@ -1402,7 +1402,7 @@ void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_envi RENDER_TIMESTAMP("<SDFGI Update Probes"); } -void RendererSceneRenderRD::_setup_giprobes(RID p_render_buffers, const Transform &p_transform, RID *p_gi_probe_cull_result, int p_gi_probe_cull_count, uint32_t &r_gi_probes_used) { +void RendererSceneRenderRD::_setup_giprobes(RID p_render_buffers, const Transform &p_transform, const PagedArray<RID> &p_gi_probes, uint32_t &r_gi_probes_used) { r_gi_probes_used = 0; RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(rb == nullptr); @@ -1417,8 +1417,8 @@ void RendererSceneRenderRD::_setup_giprobes(RID p_render_buffers, const Transfor for (int i = 0; i < RenderBuffers::MAX_GIPROBES; i++) { RID texture; - if (i < p_gi_probe_cull_count) { - GIProbeInstance *gipi = gi_probe_instance_owner.getornull(p_gi_probe_cull_result[i]); + if (i < (int)p_gi_probes.size()) { + GIProbeInstance *gipi = gi_probe_instance_owner.getornull(p_gi_probes[i]); if (gipi) { texture = gipi->texture; @@ -1489,12 +1489,12 @@ void RendererSceneRenderRD::_setup_giprobes(RID p_render_buffers, const Transfor } } - if (p_gi_probe_cull_count > 0) { - RD::get_singleton()->buffer_update(gi_probe_buffer, 0, sizeof(GI::GIProbeData) * MIN(RenderBuffers::MAX_GIPROBES, p_gi_probe_cull_count), gi_probe_data, true); + if (p_gi_probes.size() > 0) { + RD::get_singleton()->buffer_update(gi_probe_buffer, 0, sizeof(GI::GIProbeData) * MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()), gi_probe_data, true); } } -void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_ambient_buffer, RID p_reflection_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, RID *p_gi_probe_cull_result, int p_gi_probe_cull_count) { +void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_ambient_buffer, RID p_reflection_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, const PagedArray<RID> &p_gi_probes) { RENDER_TIMESTAMP("Render GI"); RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); @@ -1512,7 +1512,7 @@ void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_rough push_constant.proj_info[1] = -2.0f / (rb->height * p_projection.matrix[1][1]); push_constant.proj_info[2] = (1.0f - p_projection.matrix[0][2]) / p_projection.matrix[0][0]; push_constant.proj_info[3] = (1.0f + p_projection.matrix[1][2]) / p_projection.matrix[1][1]; - push_constant.max_giprobes = MIN(RenderBuffers::MAX_GIPROBES, p_gi_probe_cull_count); + push_constant.max_giprobes = MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()); push_constant.high_quality_vct = gi_probe_quality == RS::GI_PROBE_QUALITY_HIGH; push_constant.use_sdfgi = rb->sdfgi != nullptr; @@ -3122,7 +3122,7 @@ RS::EnvironmentSSRRoughnessQuality RendererSceneRenderRD::environment_get_ssr_ro return ssr_roughness_quality; } -void RendererSceneRenderRD::environment_set_ssao(RID p_env, bool p_enable, float p_radius, float p_intensity, float p_bias, float p_light_affect, float p_ao_channel_affect, RS::EnvironmentSSAOBlur p_blur, float p_bilateral_sharpness) { +void RendererSceneRenderRD::environment_set_ssao(RID p_env, bool p_enable, float p_radius, float p_intensity, float p_power, float p_detail, float p_horizon, float p_sharpness, float p_light_affect, float p_ao_channel_affect) { Environment *env = environment_owner.getornull(p_env); ERR_FAIL_COND(!env); @@ -3133,15 +3133,21 @@ void RendererSceneRenderRD::environment_set_ssao(RID p_env, bool p_enable, float env->ssao_enabled = p_enable; env->ssao_radius = p_radius; env->ssao_intensity = p_intensity; - env->ssao_bias = p_bias; + env->ssao_power = p_power; + env->ssao_detail = p_detail; + env->ssao_horizon = p_horizon; + env->ssao_sharpness = p_sharpness; env->ssao_direct_light_affect = p_light_affect; env->ssao_ao_channel_affect = p_ao_channel_affect; - env->ssao_blur = p_blur; } -void RendererSceneRenderRD::environment_set_ssao_quality(RS::EnvironmentSSAOQuality p_quality, bool p_half_size) { +void RendererSceneRenderRD::environment_set_ssao_quality(RS::EnvironmentSSAOQuality p_quality, bool p_half_size, float p_adaptive_target, int p_blur_passes, float p_fadeout_from, float p_fadeout_to) { ssao_quality = p_quality; ssao_half_size = p_half_size; + ssao_adaptive_target = p_adaptive_target; + ssao_blur_passes = p_blur_passes; + ssao_fadeout_from = p_fadeout_from; + ssao_fadeout_to = p_fadeout_to; } bool RendererSceneRenderRD::environment_is_ssao_enabled(RID p_env) const { @@ -3255,6 +3261,13 @@ void RendererSceneRenderRD::reflection_atlas_set_size(RID p_ref_atlas, int p_ref } } +int RendererSceneRenderRD::reflection_atlas_get_size(RID p_ref_atlas) const { + ReflectionAtlas *ra = reflection_atlas_owner.getornull(p_ref_atlas); + ERR_FAIL_COND_V(!ra, 0); + + return ra->size; +} + //////////////////////// RID RendererSceneRenderRD::reflection_probe_instance_create(RID p_probe) { ReflectionProbeInstance rpi; @@ -4048,7 +4061,7 @@ bool RendererSceneRenderRD::gi_probe_needs_update(RID p_probe) const { return gi_probe->last_probe_version != storage->gi_probe_get_version(gi_probe->probe); } -void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_instances, const Vector<RID> &p_light_instances, int p_dynamic_object_count, InstanceBase **p_dynamic_objects) { +void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_instances, const Vector<RID> &p_light_instances, const PagedArray<InstanceBase *> &p_dynamic_objects) { GIProbeInstance *gi_probe = gi_probe_instance_owner.getornull(p_probe); ERR_FAIL_COND(!gi_probe); @@ -4407,7 +4420,7 @@ void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_ins uint32_t light_count = 0; - if (p_update_light_instances || p_dynamic_object_count > 0) { + if (p_update_light_instances || p_dynamic_objects.size() > 0) { light_count = MIN(gi_probe_max_lights, (uint32_t)p_light_instances.size()); { @@ -4457,7 +4470,7 @@ void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_ins } } - if (gi_probe->has_dynamic_object_data || p_update_light_instances || p_dynamic_object_count) { + if (gi_probe->has_dynamic_object_data || p_update_light_instances || p_dynamic_objects.size()) { // PROCESS MIPMAPS if (gi_probe->mipmaps.size()) { //can update mipmaps @@ -4550,7 +4563,7 @@ void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_ins gi_probe->has_dynamic_object_data = false; //clear until dynamic object data is used again - if (p_dynamic_object_count && gi_probe->dynamic_maps.size()) { + if (p_dynamic_objects.size() && gi_probe->dynamic_maps.size()) { Vector3i octree_size = storage->gi_probe_get_octree_size(gi_probe->probe); int multiplier = gi_probe->dynamic_maps[0].size / MAX(MAX(octree_size.x, octree_size.y), octree_size.z); @@ -4564,7 +4577,7 @@ void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_ins AABB probe_aabb(Vector3(), octree_size); //this could probably be better parallelized in compute.. - for (int i = 0; i < p_dynamic_object_count; i++) { + for (int i = 0; i < (int)p_dynamic_objects.size(); i++) { InstanceBase *instance = p_dynamic_objects[i]; //not used, so clear instance->depth_layer = 0; @@ -4635,7 +4648,12 @@ void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_ins CameraMatrix cm; cm.set_orthogonal(-rect.size.width / 2, rect.size.width / 2, -rect.size.height / 2, rect.size.height / 2, 0.0001, aabb.size[z_axis]); - _render_material(to_world_xform * xform, cm, true, &instance, 1, gi_probe->dynamic_maps[0].fb, Rect2i(Vector2i(), rect.size)); + if (cull_argument.size() == 0) { + cull_argument.push_back(nullptr); + } + cull_argument[0] = instance; + + _render_material(to_world_xform * xform, cm, true, cull_argument, gi_probe->dynamic_maps[0].fb, Rect2i(Vector2i(), rect.size)); GIProbeDynamicPushConstant push_constant; zeromem(&push_constant, sizeof(GIProbeDynamicPushConstant)); @@ -5074,21 +5092,24 @@ void RendererSceneRenderRD::_free_render_buffer_data(RenderBuffers *rb) { rb->luminance.current = RID(); } - if (rb->ssao.ao[0].is_valid()) { + if (rb->ssao.depth.is_valid()) { RD::get_singleton()->free(rb->ssao.depth); - RD::get_singleton()->free(rb->ssao.ao[0]); - if (rb->ssao.ao[1].is_valid()) { - RD::get_singleton()->free(rb->ssao.ao[1]); - } - if (rb->ssao.ao_full.is_valid()) { - RD::get_singleton()->free(rb->ssao.ao_full); - } + RD::get_singleton()->free(rb->ssao.ao_deinterleaved); + RD::get_singleton()->free(rb->ssao.ao_pong); + RD::get_singleton()->free(rb->ssao.ao_final); + + RD::get_singleton()->free(rb->ssao.importance_map[0]); + RD::get_singleton()->free(rb->ssao.importance_map[1]); rb->ssao.depth = RID(); - rb->ssao.ao[0] = RID(); - rb->ssao.ao[1] = RID(); - rb->ssao.ao_full = RID(); + rb->ssao.ao_deinterleaved = RID(); + rb->ssao.ao_pong = RID(); + rb->ssao.ao_final = RID(); + rb->ssao.importance_map[0] = RID(); + rb->ssao.importance_map[1] = RID(); rb->ssao.depth_slices.clear(); + rb->ssao.ao_deinterleaved_slices.clear(); + rb->ssao.ao_pong_slices.clear(); } if (rb->ssr.blur_radius[0].is_valid()) { @@ -5187,64 +5208,132 @@ void RendererSceneRenderRD::_process_ssao(RID p_render_buffers, RID p_environmen RENDER_TIMESTAMP("Process SSAO"); - if (rb->ssao.ao[0].is_valid() && rb->ssao.ao_full.is_valid() != ssao_half_size) { + if (rb->ssao.ao_final.is_valid() && ssao_using_half_size != ssao_half_size) { RD::get_singleton()->free(rb->ssao.depth); - RD::get_singleton()->free(rb->ssao.ao[0]); - if (rb->ssao.ao[1].is_valid()) { - RD::get_singleton()->free(rb->ssao.ao[1]); - } - if (rb->ssao.ao_full.is_valid()) { - RD::get_singleton()->free(rb->ssao.ao_full); - } + RD::get_singleton()->free(rb->ssao.ao_deinterleaved); + RD::get_singleton()->free(rb->ssao.ao_pong); + RD::get_singleton()->free(rb->ssao.ao_final); + + RD::get_singleton()->free(rb->ssao.importance_map[0]); + RD::get_singleton()->free(rb->ssao.importance_map[1]); rb->ssao.depth = RID(); - rb->ssao.ao[0] = RID(); - rb->ssao.ao[1] = RID(); - rb->ssao.ao_full = RID(); + rb->ssao.ao_deinterleaved = RID(); + rb->ssao.ao_pong = RID(); + rb->ssao.ao_final = RID(); + rb->ssao.importance_map[0] = RID(); + rb->ssao.importance_map[1] = RID(); rb->ssao.depth_slices.clear(); + rb->ssao.ao_deinterleaved_slices.clear(); + rb->ssao.ao_pong_slices.clear(); + } + + int buffer_width; + int buffer_height; + int half_width; + int half_height; + if (ssao_half_size) { + buffer_width = (rb->width + 3) / 4; + buffer_height = (rb->height + 3) / 4; + half_width = (rb->width + 7) / 8; + half_height = (rb->height + 7) / 8; + } else { + buffer_width = (rb->width + 1) / 2; + buffer_height = (rb->height + 1) / 2; + half_width = (rb->width + 3) / 4; + half_height = (rb->height + 3) / 4; } - - if (!rb->ssao.ao[0].is_valid()) { + bool uniform_sets_are_invalid = false; + if (rb->ssao.depth.is_null()) { //allocate depth slices { RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R32_SFLOAT; - tf.width = rb->width / 2; - tf.height = rb->height / 2; - tf.mipmaps = Image::get_image_required_mipmaps(tf.width, tf.height, Image::FORMAT_RF) + 1; + tf.format = RD::DATA_FORMAT_R16_SFLOAT; + tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + tf.width = buffer_width; + tf.height = buffer_height; + tf.mipmaps = 4; + tf.array_layers = 4; tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; rb->ssao.depth = RD::get_singleton()->texture_create(tf, RD::TextureView()); for (uint32_t i = 0; i < tf.mipmaps; i++) { - RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ssao.depth, 0, i); + RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ssao.depth, 0, i, RD::TEXTURE_SLICE_2D_ARRAY); rb->ssao.depth_slices.push_back(slice); } } { RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R8_UNORM; - tf.width = ssao_half_size ? rb->width / 2 : rb->width; - tf.height = ssao_half_size ? rb->height / 2 : rb->height; + tf.format = RD::DATA_FORMAT_R8G8_UNORM; + tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + tf.width = buffer_width; + tf.height = buffer_height; + tf.array_layers = 4; tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - rb->ssao.ao[0] = RD::get_singleton()->texture_create(tf, RD::TextureView()); - rb->ssao.ao[1] = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->ssao.ao_deinterleaved = RD::get_singleton()->texture_create(tf, RD::TextureView()); + for (uint32_t i = 0; i < 4; i++) { + RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ssao.ao_deinterleaved, i, 0); + rb->ssao.ao_deinterleaved_slices.push_back(slice); + } } - if (ssao_half_size) { - //upsample texture + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8G8_UNORM; + tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + tf.width = buffer_width; + tf.height = buffer_height; + tf.array_layers = 4; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + rb->ssao.ao_pong = RD::get_singleton()->texture_create(tf, RD::TextureView()); + for (uint32_t i = 0; i < 4; i++) { + RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ssao.ao_pong, i, 0); + rb->ssao.ao_pong_slices.push_back(slice); + } + } + + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8_UNORM; + tf.width = half_width; + tf.height = half_height; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + rb->ssao.importance_map[0] = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->ssao.importance_map[1] = RD::get_singleton()->texture_create(tf, RD::TextureView()); + } + { RD::TextureFormat tf; tf.format = RD::DATA_FORMAT_R8_UNORM; tf.width = rb->width; tf.height = rb->height; tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - rb->ssao.ao_full = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->ssao.ao_final = RD::get_singleton()->texture_create(tf, RD::TextureView()); + _render_buffers_uniform_set_changed(p_render_buffers); } - - _render_buffers_uniform_set_changed(p_render_buffers); + ssao_using_half_size = ssao_half_size; + uniform_sets_are_invalid = true; } - storage->get_effects()->generate_ssao(rb->depth_texture, p_normal_buffer, Size2i(rb->width, rb->height), rb->ssao.depth, rb->ssao.depth_slices, rb->ssao.ao[0], rb->ssao.ao_full.is_valid(), rb->ssao.ao[1], rb->ssao.ao_full, env->ssao_intensity, env->ssao_radius, env->ssao_bias, p_projection, ssao_quality, env->ssao_blur, env->ssao_blur_edge_sharpness); + EffectsRD::SSAOSettings settings; + settings.radius = env->ssao_radius; + settings.intensity = env->ssao_intensity; + settings.power = env->ssao_power; + settings.detail = env->ssao_detail; + settings.horizon = env->ssao_horizon; + settings.sharpness = env->ssao_sharpness; + + settings.quality = ssao_quality; + settings.half_size = ssao_half_size; + settings.adaptive_target = ssao_adaptive_target; + settings.blur_passes = ssao_blur_passes; + settings.fadeout_from = ssao_fadeout_from; + settings.fadeout_to = ssao_fadeout_to; + settings.screen_size = Size2i(rb->width, rb->height); + settings.half_screen_size = Size2i(buffer_width, buffer_height); + settings.quarter_size = Size2i(half_width, half_height); + + storage->get_effects()->generate_ssao(rb->depth_texture, p_normal_buffer, rb->ssao.depth, rb->ssao.depth_slices, rb->ssao.ao_deinterleaved, rb->ssao.ao_deinterleaved_slices, rb->ssao.ao_pong, rb->ssao.ao_pong_slices, rb->ssao.ao_final, rb->ssao.importance_map[0], rb->ssao.importance_map[1], p_projection, settings, uniform_sets_are_invalid); } void RendererSceneRenderRD::_render_buffers_post_process_and_tonemap(RID p_render_buffers, RID p_environment, RID p_camera_effects, const CameraMatrix &p_projection) { @@ -5424,9 +5513,9 @@ void RendererSceneRenderRD::_render_buffers_debug_draw(RID p_render_buffers, RID } } - if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_SSAO && rb->ssao.ao[0].is_valid()) { + if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_SSAO && rb->ssao.ao_final.is_valid()) { Size2 rtsize = storage->render_target_get_size(rb->render_target); - RID ao_buf = rb->ssao.ao_full.is_valid() ? rb->ssao.ao_full : rb->ssao.ao[0]; + RID ao_buf = rb->ssao.ao_final; effects->copy_to_fb_rect(ao_buf, storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), false, true); } @@ -5614,7 +5703,7 @@ RID RendererSceneRenderRD::render_buffers_get_ao_texture(RID p_render_buffers) { RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND_V(!rb, RID()); - return rb->ssao.ao_full.is_valid() ? rb->ssao.ao_full : rb->ssao.ao[0]; + return rb->ssao.ao_final; } RID RendererSceneRenderRD::render_buffers_get_gi_probe_buffer(RID p_render_buffers) { @@ -5916,11 +6005,11 @@ RendererSceneRenderRD::RenderBufferData *RendererSceneRenderRD::render_buffers_g return rb->data; } -void RendererSceneRenderRD::_setup_reflections(RID *p_reflection_probe_cull_result, int p_reflection_probe_cull_count, const Transform &p_camera_inverse_transform, RID p_environment) { - for (int i = 0; i < p_reflection_probe_cull_count; i++) { - RID rpi = p_reflection_probe_cull_result[i]; +void RendererSceneRenderRD::_setup_reflections(const PagedArray<RID> &p_reflections, const Transform &p_camera_inverse_transform, RID p_environment) { + for (uint32_t i = 0; i < (uint32_t)p_reflections.size(); i++) { + RID rpi = p_reflections[i]; - if (i >= (int)cluster.max_reflections) { + if (i >= cluster.max_reflections) { reflection_probe_instance_set_render_index(rpi, 0); //invalid, but something needs to be set continue; } @@ -5971,19 +6060,19 @@ void RendererSceneRenderRD::_setup_reflections(RID *p_reflection_probe_cull_resu reflection_probe_instance_set_render_pass(rpi, RSG::rasterizer->get_frame_number()); } - if (p_reflection_probe_cull_count) { - RD::get_singleton()->buffer_update(cluster.reflection_buffer, 0, MIN(cluster.max_reflections, (unsigned int)p_reflection_probe_cull_count) * sizeof(ReflectionData), cluster.reflections, true); + if (p_reflections.size()) { + RD::get_singleton()->buffer_update(cluster.reflection_buffer, 0, MIN(cluster.max_reflections, (unsigned int)p_reflections.size()) * sizeof(ReflectionData), cluster.reflections, true); } } -void RendererSceneRenderRD::_setup_lights(RID *p_light_cull_result, int p_light_cull_count, const Transform &p_camera_inverse_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count) { +void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const Transform &p_camera_inverse_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count) { uint32_t light_count = 0; r_directional_light_count = 0; r_positional_light_count = 0; sky_scene_state.ubo.directional_light_count = 0; - for (int i = 0; i < p_light_cull_count; i++) { - RID li = p_light_cull_result[i]; + for (int i = 0; i < (int)p_lights.size(); i++) { + RID li = p_lights[i]; RID base = light_instance_get_base_light(li); ERR_CONTINUE(base.is_null()); @@ -6336,15 +6425,15 @@ void RendererSceneRenderRD::_setup_lights(RID *p_light_cull_result, int p_light_ } } -void RendererSceneRenderRD::_setup_decals(const RID *p_decal_instances, int p_decal_count, const Transform &p_camera_inverse_xform) { +void RendererSceneRenderRD::_setup_decals(const PagedArray<RID> &p_decals, const Transform &p_camera_inverse_xform) { Transform uv_xform; uv_xform.basis.scale(Vector3(2.0, 1.0, 2.0)); uv_xform.origin = Vector3(-1.0, 0.0, -1.0); - p_decal_count = MIN((uint32_t)p_decal_count, cluster.max_decals); + uint32_t decal_count = MIN((uint32_t)p_decals.size(), cluster.max_decals); int idx = 0; - for (int i = 0; i < p_decal_count; i++) { - RID di = p_decal_instances[i]; + for (uint32_t i = 0; i < decal_count; i++) { + RID di = p_decals[i]; RID decal = decal_instance_get_base(di); Transform xform = decal_instance_get_transform(di); @@ -6592,7 +6681,7 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e //update directional shadow if (p_use_directional_shadows) { - if (directional_shadow.shrink_stages.empty()) { + if (directional_shadow.shrink_stages.is_empty()) { if (rb->volumetric_fog->uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(rb->volumetric_fog->uniform_set)) { //invalidate uniform set, we will need a new one RD::get_singleton()->free(rb->volumetric_fog->uniform_set); @@ -6627,7 +6716,7 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e bool force_shrink_shadows = false; - if (shadow_atlas->shrink_stages.empty()) { + if (shadow_atlas->shrink_stages.is_empty()) { if (rb->volumetric_fog->uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(rb->volumetric_fog->uniform_set)) { //invalidate uniform set, we will need a new one RD::get_singleton()->free(rb->volumetric_fog->uniform_set); @@ -7012,7 +7101,7 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e RD::get_singleton()->compute_list_end(); } -void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, InstanceBase **p_cull_result, int p_cull_count, RID *p_light_cull_result, int p_light_cull_count, RID *p_reflection_probe_cull_result, int p_reflection_probe_cull_count, RID *p_gi_probe_cull_result, int p_gi_probe_cull_count, RID *p_decal_cull_result, int p_decal_cull_count, InstanceBase **p_lightmap_cull_result, int p_lightmap_cull_count, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass) { +void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<InstanceBase *> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold) { Color clear_color; if (p_render_buffers.is_valid()) { RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); @@ -7023,17 +7112,23 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform & } //assign render indices to giprobes - for (int i = 0; i < p_gi_probe_cull_count; i++) { - GIProbeInstance *giprobe_inst = gi_probe_instance_owner.getornull(p_gi_probe_cull_result[i]); + for (uint32_t i = 0; i < (uint32_t)p_gi_probes.size(); i++) { + GIProbeInstance *giprobe_inst = gi_probe_instance_owner.getornull(p_gi_probes[i]); if (giprobe_inst) { giprobe_inst->render_index = i; } } + const PagedArray<RID> *lights = &p_lights; + const PagedArray<RID> *reflections = &p_reflection_probes; + const PagedArray<RID> *gi_probes = &p_gi_probes; + + PagedArray<RID> empty; + if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_UNSHADED) { - p_light_cull_count = 0; - p_reflection_probe_cull_count = 0; - p_gi_probe_cull_count = 0; + lights = ∅ + reflections = ∅ + gi_probes = ∅ } cluster.builder.begin(p_cam_transform.affine_inverse(), p_cam_projection); //prepare cluster @@ -7046,17 +7141,17 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform & } } else { //do not render reflections when rendering a reflection probe - _setup_reflections(p_reflection_probe_cull_result, p_reflection_probe_cull_count, p_cam_transform.affine_inverse(), p_environment); + _setup_reflections(*reflections, p_cam_transform.affine_inverse(), p_environment); } uint32_t directional_light_count = 0; uint32_t positional_light_count = 0; - _setup_lights(p_light_cull_result, p_light_cull_count, p_cam_transform.affine_inverse(), p_shadow_atlas, using_shadows, directional_light_count, positional_light_count); - _setup_decals(p_decal_cull_result, p_decal_cull_count, p_cam_transform.affine_inverse()); + _setup_lights(*lights, p_cam_transform.affine_inverse(), p_shadow_atlas, using_shadows, directional_light_count, positional_light_count); + _setup_decals(p_decals, p_cam_transform.affine_inverse()); cluster.builder.bake_cluster(); //bake to cluster uint32_t gi_probe_count = 0; - _setup_giprobes(p_render_buffers, p_cam_transform, p_gi_probe_cull_result, p_gi_probe_cull_count, gi_probe_count); + _setup_giprobes(p_render_buffers, p_cam_transform, *gi_probes, gi_probe_count); if (p_render_buffers.is_valid()) { bool directional_shadows = false; @@ -7069,7 +7164,7 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform & _update_volumetric_fog(p_render_buffers, p_environment, p_cam_projection, p_cam_transform, p_shadow_atlas, directional_light_count, directional_shadows, positional_light_count, gi_probe_count); } - _render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_ortogonal, p_cull_result, p_cull_count, directional_light_count, p_gi_probe_cull_result, p_gi_probe_cull_count, p_lightmap_cull_result, p_lightmap_cull_count, p_environment, p_camera_effects, p_shadow_atlas, p_reflection_atlas, p_reflection_probe, p_reflection_probe_pass, clear_color); + _render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_ortogonal, p_instances, directional_light_count, *gi_probes, p_lightmaps, p_environment, p_camera_effects, p_shadow_atlas, p_reflection_atlas, p_reflection_probe, p_reflection_probe_pass, clear_color, p_screen_lod_threshold); if (p_render_buffers.is_valid()) { RENDER_TIMESTAMP("Tonemap"); @@ -7082,7 +7177,7 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform & } } -void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p_pass, InstanceBase **p_cull_result, int p_cull_count) { +void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<InstanceBase *> &p_instances, const Plane &p_camera_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold) { LightInstance *light_instance = light_instance_owner.getornull(p_light); ERR_FAIL_COND(!light_instance); @@ -7233,7 +7328,7 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p if (render_cubemap) { //rendering to cubemap - _render_shadow(render_fb, p_cull_result, p_cull_count, light_projection, light_transform, zfar, 0, 0, false, false, use_pancake); + _render_shadow(render_fb, p_instances, light_projection, light_transform, zfar, 0, 0, false, false, use_pancake, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold); if (finalize_cubemap) { //reblit atlas_rect.size.height /= 2; @@ -7244,7 +7339,7 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p } else { //render shadow - _render_shadow(render_fb, p_cull_result, p_cull_count, light_projection, light_transform, zfar, bias, normal_bias, using_dual_paraboloid, using_dual_paraboloid_flip, use_pancake); + _render_shadow(render_fb, p_instances, light_projection, light_transform, zfar, bias, normal_bias, using_dual_paraboloid, using_dual_paraboloid_flip, use_pancake, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold); //copy to atlas if (use_linear_depth) { @@ -7258,11 +7353,11 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p } } -void RendererSceneRenderRD::render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, InstanceBase **p_cull_result, int p_cull_count, RID p_framebuffer, const Rect2i &p_region) { - _render_material(p_cam_transform, p_cam_projection, p_cam_ortogonal, p_cull_result, p_cull_count, p_framebuffer, p_region); +void RendererSceneRenderRD::render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, RID p_framebuffer, const Rect2i &p_region) { + _render_material(p_cam_transform, p_cam_projection, p_cam_ortogonal, p_instances, p_framebuffer, p_region); } -void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, InstanceBase **p_cull_result, int p_cull_count) { +void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, const PagedArray<InstanceBase *> &p_instances) { //print_line("rendering region " + itos(p_region)); RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(!rb); @@ -7285,7 +7380,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, Ins } //print_line("rendering cascade " + itos(p_region) + " objects: " + itos(p_cull_count) + " bounds: " + bounds + " from: " + from + " size: " + size + " cell size: " + rtos(rb->sdfgi->cascades[cascade].cell_size)); - _render_sdfgi(p_render_buffers, from, size, bounds, p_cull_result, p_cull_count, rb->sdfgi->render_albedo, rb->sdfgi->render_emission, rb->sdfgi->render_emission_aniso, rb->sdfgi->render_geom_facing); + _render_sdfgi(p_render_buffers, from, size, bounds, p_instances, rb->sdfgi->render_albedo, rb->sdfgi->render_emission, rb->sdfgi->render_emission_aniso, rb->sdfgi->render_geom_facing); if (cascade_next != cascade) { RENDER_TIMESTAMP(">SDFGI Update SDF"); @@ -7599,7 +7694,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, Ins } } -void RendererSceneRenderRD::render_particle_collider_heightfield(RID p_collider, const Transform &p_transform, InstanceBase **p_cull_result, int p_cull_count) { +void RendererSceneRenderRD::render_particle_collider_heightfield(RID p_collider, const Transform &p_transform, const PagedArray<InstanceBase *> &p_instances) { ERR_FAIL_COND(!storage->particles_collision_is_heightfield(p_collider)); Vector3 extents = storage->particles_collision_get_extents(p_collider) * p_transform.basis.get_scale(); CameraMatrix cm; @@ -7613,16 +7708,14 @@ void RendererSceneRenderRD::render_particle_collider_heightfield(RID p_collider, RID fb = storage->particles_collision_get_heightfield_framebuffer(p_collider); - _render_particle_collider_heightfield(fb, cam_xform, cm, p_cull_result, p_cull_count); + _render_particle_collider_heightfield(fb, cam_xform, cm, p_instances); } -void RendererSceneRenderRD::render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const RID **p_positional_light_cull_result, const uint32_t *p_positional_light_cull_count) { +void RendererSceneRenderRD::render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_light_cull_result) { RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(!rb); ERR_FAIL_COND(!rb->sdfgi); - ERR_FAIL_COND(p_positional_light_cull_count == 0); - _sdfgi_update_cascades(p_render_buffers); //need cascades updated for this RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); @@ -7658,7 +7751,7 @@ void RendererSceneRenderRD::render_sdfgi_static_lights(RID p_render_buffers, uin int idx = 0; - for (uint32_t j = 0; j < p_positional_light_cull_count[i]; j++) { + for (uint32_t j = 0; j < (uint32_t)p_positional_light_cull_result[i].size(); j++) { if (idx == SDFGI::MAX_STATIC_LIGHTS) { break; } @@ -7897,8 +7990,11 @@ TypedArray<Image> RendererSceneRenderRD::bake_render_uv2(RID p_base, const Vecto } } - InstanceBase *cull = &ins; - _render_uv2(&cull, 1, fb, Rect2i(0, 0, p_image_size.width, p_image_size.height)); + if (cull_argument.size() == 0) { + cull_argument.push_back(nullptr); + } + cull_argument[0] = &ins; + _render_uv2(cull_argument, fb, Rect2i(0, 0, p_image_size.width, p_image_size.height)); TypedArray<Image> ret; @@ -8388,7 +8484,7 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { cluster.lights_instances = memnew_arr(RID, cluster.max_lights); cluster.lights_shadow_rect_cache = memnew_arr(Rect2i, cluster.max_lights); - cluster.max_directional_lights = 8; + cluster.max_directional_lights = MAX_DIRECTIONAL_LIGHTS; uint32_t directional_light_buffer_size = cluster.max_directional_lights * sizeof(Cluster::DirectionalLightData); cluster.directional_lights = memnew_arr(Cluster::DirectionalLightData, cluster.max_directional_lights); cluster.directional_light_buffer = RD::get_singleton()->uniform_buffer_create(directional_light_buffer_size); @@ -8428,7 +8524,7 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { camera_effects_set_dof_blur_bokeh_shape(RS::DOFBokehShape(int(GLOBAL_GET("rendering/quality/depth_of_field/depth_of_field_bokeh_shape")))); camera_effects_set_dof_blur_quality(RS::DOFBlurQuality(int(GLOBAL_GET("rendering/quality/depth_of_field/depth_of_field_bokeh_quality"))), GLOBAL_GET("rendering/quality/depth_of_field/depth_of_field_use_jitter")); - environment_set_ssao_quality(RS::EnvironmentSSAOQuality(int(GLOBAL_GET("rendering/quality/ssao/quality"))), GLOBAL_GET("rendering/quality/ssao/half_size")); + environment_set_ssao_quality(RS::EnvironmentSSAOQuality(int(GLOBAL_GET("rendering/quality/ssao/quality"))), GLOBAL_GET("rendering/quality/ssao/half_size"), GLOBAL_GET("rendering/quality/ssao/adaptive_target"), GLOBAL_GET("rendering/quality/ssao/blur_passes"), GLOBAL_GET("rendering/quality/ssao/fadeout_from"), GLOBAL_GET("rendering/quality/ssao/fadeout_to")); screen_space_roughness_limiter = GLOBAL_GET("rendering/quality/screen_filters/screen_space_roughness_limiter_enabled"); screen_space_roughness_limiter_amount = GLOBAL_GET("rendering/quality/screen_filters/screen_space_roughness_limiter_amount"); screen_space_roughness_limiter_limit = GLOBAL_GET("rendering/quality/screen_filters/screen_space_roughness_limiter_limit"); @@ -8449,6 +8545,8 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { environment_set_volumetric_fog_filter_active(GLOBAL_GET("rendering/volumetric_fog/use_filter")); environment_set_volumetric_fog_directional_shadow_shrink_size(GLOBAL_GET("rendering/volumetric_fog/directional_shadow_shrink")); environment_set_volumetric_fog_positional_shadow_shrink_size(GLOBAL_GET("rendering/volumetric_fog/positional_shadow_shrink")); + + cull_argument.set_page_pool(&cull_argument_pool); } RendererSceneRenderRD::~RendererSceneRenderRD() { @@ -8513,4 +8611,5 @@ RendererSceneRenderRD::~RendererSceneRenderRD() { RD::get_singleton()->free(shadow_sampler); directional_shadow_atlas_set_size(0); + cull_argument.reset(); //avoid exit error } diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.h b/servers/rendering/renderer_rd/renderer_scene_render_rd.h index e3dfee2da7..af35e1b3b4 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.h +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -104,17 +104,17 @@ protected: }; virtual RenderBufferData *_create_render_buffer_data() = 0; - void _setup_lights(RID *p_light_cull_result, int p_light_cull_count, const Transform &p_camera_inverse_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count); - void _setup_decals(const RID *p_decal_instances, int p_decal_count, const Transform &p_camera_inverse_xform); - void _setup_reflections(RID *p_reflection_probe_cull_result, int p_reflection_probe_cull_count, const Transform &p_camera_inverse_transform, RID p_environment); - void _setup_giprobes(RID p_render_buffers, const Transform &p_transform, RID *p_gi_probe_cull_result, int p_gi_probe_cull_count, uint32_t &r_gi_probes_used); + void _setup_lights(const PagedArray<RID> &p_lights, const Transform &p_camera_inverse_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count); + void _setup_decals(const PagedArray<RID> &p_decals, const Transform &p_camera_inverse_xform); + void _setup_reflections(const PagedArray<RID> &p_reflections, const Transform &p_camera_inverse_transform, RID p_environment); + void _setup_giprobes(RID p_render_buffers, const Transform &p_transform, const PagedArray<RID> &p_gi_probes, uint32_t &r_gi_probes_used); - virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, InstanceBase **p_cull_result, int p_cull_count, int p_directional_light_count, RID *p_gi_probe_cull_result, int p_gi_probe_cull_count, InstanceBase **p_lightmap_cull_result, int p_lightmap_cull_count, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_color) = 0; - virtual void _render_shadow(RID p_framebuffer, InstanceBase **p_cull_result, int p_cull_count, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool use_dp_flip, bool p_use_pancake) = 0; - virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, InstanceBase **p_cull_result, int p_cull_count, RID p_framebuffer, const Rect2i &p_region) = 0; - virtual void _render_uv2(InstanceBase **p_cull_result, int p_cull_count, RID p_framebuffer, const Rect2i &p_region) = 0; - virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, InstanceBase **p_cull_result, int p_cull_count, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) = 0; - virtual void _render_particle_collider_heightfield(RID p_fb, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, InstanceBase **p_cull_result, int p_cull_count) = 0; + virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<InstanceBase *> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_color, float p_screen_lod_threshold) = 0; + virtual void _render_shadow(RID p_framebuffer, const PagedArray<InstanceBase *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0) = 0; + virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; + virtual void _render_uv2(const PagedArray<InstanceBase *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; + virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<InstanceBase *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) = 0; + virtual void _render_particle_collider_heightfield(RID p_fb, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, const PagedArray<InstanceBase *> &p_instances) = 0; virtual void _debug_giprobe(RID p_gi_probe, RenderingDevice::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform, bool p_lighting, bool p_emission, float p_alpha); void _debug_sdfgi_probes(RID p_render_buffers, RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform); @@ -134,8 +134,11 @@ protected: void _setup_sky(RID p_environment, RID p_render_buffers, const CameraMatrix &p_projection, const Transform &p_transform, const Size2i p_screen_size); void _update_sky(RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform); void _draw_sky(bool p_can_continue_color, bool p_can_continue_depth, RID p_fb, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform); - void _process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_ambient_buffer, RID p_reflection_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, RID *p_gi_probe_cull_result, int p_gi_probe_cull_count); + void _process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_ambient_buffer, RID p_reflection_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, const PagedArray<RID> &p_gi_probes); + // needed for a single argument calls (material and uv2) + PagedArrayPool<InstanceBase *> cull_argument_pool; + PagedArray<InstanceBase *> cull_argument; //need this to exist private: RS::ViewportDebugDraw debug_draw = RS::VIEWPORT_DEBUG_DRAW_DISABLED; double time_step = 0; @@ -339,7 +342,7 @@ private: Vector<Reflection> reflections; }; - RID_Owner<ReflectionAtlas> reflection_atlas_owner; + mutable RID_Owner<ReflectionAtlas> reflection_atlas_owner; /* REFLECTION PROBE INSTANCE */ @@ -737,13 +740,14 @@ private: /// SSAO bool ssao_enabled = false; - float ssao_radius = 1; - float ssao_intensity = 1; - float ssao_bias = 0.01; + float ssao_radius = 1.0; + float ssao_intensity = 2.0; + float ssao_power = 1.5; + float ssao_detail = 0.5; + float ssao_horizon = 0.06; + float ssao_sharpness = 0.98; float ssao_direct_light_affect = 0.0; float ssao_ao_channel_affect = 0.0; - float ssao_blur_edge_sharpness = 4.0; - RS::EnvironmentSSAOBlur ssao_blur = RS::ENV_SSAO_BLUR_3x3; /// SSR /// @@ -777,6 +781,12 @@ private: RS::EnvironmentSSAOQuality ssao_quality = RS::ENV_SSAO_QUALITY_MEDIUM; bool ssao_half_size = false; + bool ssao_using_half_size = false; + float ssao_adaptive_target = 0.5; + int ssao_blur_passes = 2; + float ssao_fadeout_from = 50.0; + float ssao_fadeout_to = 300.0; + bool glow_bicubic_upscale = false; bool glow_high_quality = false; RS::EnvironmentSSRRoughnessQuality ssr_roughness_quality = RS::ENV_SSR_ROUGNESS_QUALITY_LOW; @@ -861,8 +871,12 @@ private: struct SSAO { RID depth; Vector<RID> depth_slices; - RID ao[2]; - RID ao_full; //when using half-size + RID ao_deinterleaved; + Vector<RID> ao_deinterleaved_slices; + RID ao_pong; + Vector<RID> ao_pong_slices; + RID ao_final; + RID importance_map[2]; } ssao; struct SSR { @@ -1502,7 +1516,7 @@ public: virtual int sdfgi_get_pending_region_count(RID p_render_buffers) const; virtual AABB sdfgi_get_pending_region_bounds(RID p_render_buffers, int p_region) const; virtual uint32_t sdfgi_get_pending_region_cascade(RID p_render_buffers, int p_region) const; - virtual void sdfgi_update_probes(RID p_render_buffers, RID p_environment, const RID *p_directional_light_instances, uint32_t p_directional_light_count, const RID *p_positional_light_instances, uint32_t p_positional_light_count); + virtual void sdfgi_update_probes(RID p_render_buffers, RID p_environment, const Vector<RID> &p_directional_lights, const RID *p_positional_light_instances, uint32_t p_positional_light_count); RID sdfgi_get_ubo() const { return gi.sdfgi_ubo; } /* SKY API */ @@ -1567,8 +1581,8 @@ public: virtual void environment_set_volumetric_fog_positional_shadow_shrink_size(int p_shrink_size); void environment_set_ssr(RID p_env, bool p_enable, int p_max_steps, float p_fade_int, float p_fade_out, float p_depth_tolerance); - void environment_set_ssao(RID p_env, bool p_enable, float p_radius, float p_intensity, float p_bias, float p_light_affect, float p_ao_channel_affect, RS::EnvironmentSSAOBlur p_blur, float p_bilateral_sharpness); - void environment_set_ssao_quality(RS::EnvironmentSSAOQuality p_quality, bool p_half_size); + void environment_set_ssao(RID p_env, bool p_enable, float p_radius, float p_intensity, float p_power, float p_detail, float p_horizon, float p_sharpness, float p_light_affect, float p_ao_channel_affect); + void environment_set_ssao_quality(RS::EnvironmentSSAOQuality p_quality, bool p_half_size, float p_adaptive_target, int p_blur_passes, float p_fadeout_from, float p_fadeout_to); bool environment_is_ssao_enabled(RID p_env) const; float environment_get_ssao_ao_affect(RID p_env) const; float environment_get_ssao_light_affect(RID p_env) const; @@ -1728,6 +1742,8 @@ public: virtual RID reflection_atlas_create(); virtual void reflection_atlas_set_size(RID p_ref_atlas, int p_reflection_size, int p_reflection_count); + virtual int reflection_atlas_get_size(RID p_ref_atlas) const; + _FORCE_INLINE_ RID reflection_atlas_get_texture(RID p_ref_atlas) { ReflectionAtlas *atlas = reflection_atlas_owner.getornull(p_ref_atlas); ERR_FAIL_COND_V(!atlas, RID()); @@ -1809,7 +1825,7 @@ public: RID gi_probe_instance_create(RID p_base); void gi_probe_instance_set_transform_to_data(RID p_probe, const Transform &p_xform); bool gi_probe_needs_update(RID p_probe) const; - void gi_probe_update(RID p_probe, bool p_update_light_instances, const Vector<RID> &p_light_instances, int p_dynamic_object_count, InstanceBase **p_dynamic_objects); + void gi_probe_update(RID p_probe, bool p_update_light_instances, const Vector<RID> &p_light_instances, const PagedArray<RendererSceneRender::InstanceBase *> &p_dynamic_objects); void gi_probe_set_quality(RS::GIProbeQuality p_quality) { gi_probe_quality = p_quality; } @@ -1884,16 +1900,16 @@ public: float render_buffers_get_volumetric_fog_end(RID p_render_buffers); float render_buffers_get_volumetric_fog_detail_spread(RID p_render_buffers); - void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, InstanceBase **p_cull_result, int p_cull_count, RID *p_light_cull_result, int p_light_cull_count, RID *p_reflection_probe_cull_result, int p_reflection_probe_cull_count, RID *p_gi_probe_cull_result, int p_gi_probe_cull_count, RID *p_decal_cull_result, int p_decal_cull_count, InstanceBase **p_lightmap_cull_result, int p_lightmap_cull_count, RID p_environment, RID p_shadow_atlas, RID p_camera_effects, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass); + void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<InstanceBase *> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold); - void render_shadow(RID p_light, RID p_shadow_atlas, int p_pass, InstanceBase **p_cull_result, int p_cull_count); + void render_shadow(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<InstanceBase *> &p_instances, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0, float p_screen_lod_threshold = 0.0); - void render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, InstanceBase **p_cull_result, int p_cull_count, RID p_framebuffer, const Rect2i &p_region); + void render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<InstanceBase *> &p_instances, RID p_framebuffer, const Rect2i &p_region); - void render_sdfgi(RID p_render_buffers, int p_region, InstanceBase **p_cull_result, int p_cull_count); - void render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const RID **p_positional_light_cull_result, const uint32_t *p_positional_light_cull_count); + void render_sdfgi(RID p_render_buffers, int p_region, const PagedArray<InstanceBase *> &p_instances); + void render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_light_cull_result); - void render_particle_collider_heightfield(RID p_collider, const Transform &p_transform, InstanceBase **p_cull_result, int p_cull_count); + void render_particle_collider_heightfield(RID p_collider, const Transform &p_transform, const PagedArray<InstanceBase *> &p_instances); virtual void set_scene_pass(uint64_t p_pass) { scene_pass = p_pass; diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.cpp b/servers/rendering/renderer_rd/renderer_storage_rd.cpp index 564d61f9fb..68983da408 100644 --- a/servers/rendering/renderer_rd/renderer_storage_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_storage_rd.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -537,7 +537,7 @@ Ref<Image> RendererStorageRD::_validate_texture_format(const Ref<Image> &p_image RID RendererStorageRD::texture_2d_create(const Ref<Image> &p_image) { ERR_FAIL_COND_V(p_image.is_null(), RID()); - ERR_FAIL_COND_V(p_image->empty(), RID()); + ERR_FAIL_COND_V(p_image->is_empty(), RID()); TextureToRDFormat ret_format; Ref<Image> image = _validate_texture_format(p_image, ret_format); @@ -620,7 +620,7 @@ RID RendererStorageRD::texture_2d_layered_create(const Vector<Ref<Image>> &p_lay Image::Format valid_format = Image::FORMAT_MAX; for (int i = 0; i < p_layers.size(); i++) { - ERR_FAIL_COND_V(p_layers[i]->empty(), RID()); + ERR_FAIL_COND_V(p_layers[i]->is_empty(), RID()); if (i == 0) { valid_width = p_layers[i]->get_width(); @@ -855,7 +855,7 @@ RID RendererStorageRD::texture_proxy_create(RID p_base) { } void RendererStorageRD::_texture_2d_update(RID p_texture, const Ref<Image> &p_image, int p_layer, bool p_immediate) { - ERR_FAIL_COND(p_image.is_null() || p_image->empty()); + ERR_FAIL_COND(p_image.is_null() || p_image->is_empty()); Texture *tex = texture_owner.getornull(p_texture); ERR_FAIL_COND(!tex); @@ -1039,7 +1039,7 @@ Ref<Image> RendererStorageRD::texture_2d_get(RID p_texture) const { Ref<Image> image; image.instance(); image->create(tex->width, tex->height, tex->mipmaps > 1, tex->validated_format, data); - ERR_FAIL_COND_V(image->empty(), Ref<Image>()); + ERR_FAIL_COND_V(image->is_empty(), Ref<Image>()); if (tex->format != tex->validated_format) { image->convert(tex->format); } @@ -1062,7 +1062,7 @@ Ref<Image> RendererStorageRD::texture_2d_layer_get(RID p_texture, int p_layer) c Ref<Image> image; image.instance(); image->create(tex->width, tex->height, tex->mipmaps > 1, tex->validated_format, data); - ERR_FAIL_COND_V(image->empty(), Ref<Image>()); + ERR_FAIL_COND_V(image->is_empty(), Ref<Image>()); if (tex->format != tex->validated_format) { image->convert(tex->format); } @@ -1090,7 +1090,7 @@ Vector<Ref<Image>> RendererStorageRD::texture_3d_get(RID p_texture) const { Ref<Image> img; img.instance(); img->create(bs.size.width, bs.size.height, false, tex->validated_format, sub_region); - ERR_FAIL_COND_V(img->empty(), Vector<Ref<Image>>()); + ERR_FAIL_COND_V(img->is_empty(), Vector<Ref<Image>>()); if (tex->format != tex->validated_format) { img->convert(tex->format); } @@ -1234,7 +1234,7 @@ void RendererStorageRD::canvas_texture_set_channel(RID p_canvas_texture, RS::Can ct->diffuse = p_texture; } break; case RS::CANVAS_TEXTURE_CHANNEL_NORMAL: { - ct->normalmap = p_texture; + ct->normal_map = p_texture; } break; case RS::CANVAS_TEXTURE_CHANNEL_SPECULAR: { ct->specular = p_texture; @@ -1316,7 +1316,7 @@ bool RendererStorageRD::canvas_texture_get_uniform_set(RID p_texture, RS::Canvas u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.binding = 1; - t = texture_owner.getornull(ct->normalmap); + t = texture_owner.getornull(ct->normal_map); if (!t) { u.ids.push_back(texture_rd_get_default(DEFAULT_RD_TEXTURE_NORMAL)); ct->use_normal_cache = false; @@ -2216,7 +2216,7 @@ void RendererStorageRD::MaterialData::update_textures(const Map<StringName, Vari RendererStorageRD *singleton = (RendererStorageRD *)RendererStorage::base_singleton; #ifdef TOOLS_ENABLED Texture *roughness_detect_texture = nullptr; - RS::TextureDetectRoughnessChannel roughness_channel = RS::TEXTURE_DETECT_ROUGNHESS_R; + RS::TextureDetectRoughnessChannel roughness_channel = RS::TEXTURE_DETECT_ROUGHNESS_R; Texture *normal_detect_texture = nullptr; #endif @@ -2392,15 +2392,22 @@ RID RendererStorageRD::mesh_create() { return mesh_owner.make_rid(Mesh()); } +void RendererStorageRD::mesh_set_blend_shape_count(RID p_mesh, int p_blend_shape_count) { + ERR_FAIL_COND(p_blend_shape_count < 0); + + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND(!mesh); + + ERR_FAIL_COND(mesh->surface_count > 0); //surfaces already exist + + mesh->blend_shape_count = p_blend_shape_count; +} + /// Returns stride void RendererStorageRD::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface) { Mesh *mesh = mesh_owner.getornull(p_mesh); ERR_FAIL_COND(!mesh); - //ensure blend shape consistency - ERR_FAIL_COND(mesh->blend_shape_count && p_surface.blend_shape_count != mesh->blend_shape_count); - ERR_FAIL_COND(mesh->blend_shape_count && p_surface.bone_aabbs.size() != mesh->bone_aabbs.size()); - #ifdef DEBUG_ENABLED //do a validation, to catch errors first { @@ -2453,7 +2460,7 @@ void RendererStorageRD::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_su case RS::ARRAY_BONES: { //uses a separate array bool use_8 = p_surface.format & RS::ARRAY_FLAG_USE_8_BONE_WEIGHTS; - skin_stride += sizeof(int16_t) * (use_8 ? 8 : 4); + skin_stride += sizeof(int16_t) * (use_8 ? 16 : 8); } break; } } @@ -2461,6 +2468,11 @@ void RendererStorageRD::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_su int expected_size = stride * p_surface.vertex_count; ERR_FAIL_COND_MSG(expected_size != p_surface.vertex_data.size(), "Size of vertex data provided (" + itos(p_surface.vertex_data.size()) + ") does not match expected (" + itos(expected_size) + ")"); + + int bs_expected_size = expected_size * mesh->blend_shape_count; + + ERR_FAIL_COND_MSG(bs_expected_size != p_surface.blend_shape_data.size(), "Size of blend shape data provided (" + itos(p_surface.blend_shape_data.size()) + ") does not match expected (" + itos(bs_expected_size) + ")"); + int expected_attrib_size = attrib_stride * p_surface.vertex_count; ERR_FAIL_COND_MSG(expected_attrib_size != p_surface.attribute_data.size(), "Size of attribute data provided (" + itos(p_surface.attribute_data.size()) + ") does not match expected (" + itos(expected_attrib_size) + ")"); @@ -2477,15 +2489,25 @@ void RendererStorageRD::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_su s->format = p_surface.format; s->primitive = p_surface.primitive; - s->vertex_buffer = RD::get_singleton()->vertex_buffer_create(p_surface.vertex_data.size(), p_surface.vertex_data); + bool use_as_storage = (p_surface.skin_data.size() || mesh->blend_shape_count > 0); + + s->vertex_buffer = RD::get_singleton()->vertex_buffer_create(p_surface.vertex_data.size(), p_surface.vertex_data, use_as_storage); + s->vertex_buffer_size = p_surface.vertex_data.size(); + if (p_surface.attribute_data.size()) { s->attribute_buffer = RD::get_singleton()->vertex_buffer_create(p_surface.attribute_data.size(), p_surface.attribute_data); } if (p_surface.skin_data.size()) { - s->skin_buffer = RD::get_singleton()->vertex_buffer_create(p_surface.skin_data.size(), p_surface.skin_data); + s->skin_buffer = RD::get_singleton()->vertex_buffer_create(p_surface.skin_data.size(), p_surface.skin_data, use_as_storage); + s->skin_buffer_size = p_surface.skin_data.size(); } + s->vertex_count = p_surface.vertex_count; + if (p_surface.format & RS::ARRAY_FORMAT_BONES) { + mesh->has_bone_weights = true; + } + if (p_surface.index_count) { bool is_index_16 = p_surface.vertex_count <= 65536; @@ -2507,22 +2529,55 @@ void RendererStorageRD::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_su s->aabb = p_surface.aabb; s->bone_aabbs = p_surface.bone_aabbs; //only really useful for returning them. -#if 0 - for (int i = 0; i < p_surface.blend_shapes.size(); i++) { - if (p_surface.blend_shapes[i].size() != p_surface.vertex_data.size()) { - memdelete(s); - ERR_FAIL_COND(p_surface.blend_shapes[i].size() != p_surface.vertex_data.size()); + + if (mesh->blend_shape_count > 0) { + s->blend_shape_buffer = RD::get_singleton()->storage_buffer_create(p_surface.blend_shape_data.size(), p_surface.blend_shape_data); + } + + if (use_as_storage) { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.binding = 0; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(s->vertex_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + if (s->skin_buffer.is_valid()) { + u.ids.push_back(s->skin_buffer); + } else { + u.ids.push_back(default_rd_storage_buffer); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 2; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + if (s->blend_shape_buffer.is_valid()) { + u.ids.push_back(s->blend_shape_buffer); + } else { + u.ids.push_back(default_rd_storage_buffer); + } + uniforms.push_back(u); } - RID vertex_buffer = RD::get_singleton()->vertex_buffer_create(p_surface.blend_shapes[i].size(), p_surface.blend_shapes[i]); - s->blend_shapes.push_back(vertex_buffer); + + s->uniform_set = RD::get_singleton()->uniform_set_create(uniforms, skeleton_shader.version_shader[0], SkeletonShader::UNIFORM_SET_SURFACE); } -#endif - mesh->blend_shape_count = p_surface.blend_shape_count; if (mesh->surface_count == 0) { mesh->bone_aabbs = p_surface.bone_aabbs; mesh->aabb = p_surface.aabb; } else { + if (mesh->bone_aabbs.size() < p_surface.bone_aabbs.size()) { + // ArrayMesh::_surface_set_data only allocates bone_aabbs up to max_bone + // Each surface may affect different numbers of bones. + mesh->bone_aabbs.resize(p_surface.bone_aabbs.size()); + } for (int i = 0; i < p_surface.bone_aabbs.size(); i++) { mesh->bone_aabbs.write[i].merge_with(p_surface.bone_aabbs[i]); } @@ -2535,6 +2590,12 @@ void RendererStorageRD::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_su mesh->surfaces[mesh->surface_count] = s; mesh->surface_count++; + for (List<MeshInstance *>::Element *E = mesh->instances.front(); E; E = E->next()) { + //update instances + MeshInstance *mi = E->get(); + _mesh_instance_add_surface(mi, mesh, mesh->surface_count - 1); + } + mesh->instance_dependency.instance_notify_changed(true, true); mesh->material_cache.clear(); @@ -2792,16 +2853,223 @@ void RendererStorageRD::mesh_clear(RID p_mesh) { mesh->surfaces = nullptr; mesh->surface_count = 0; mesh->material_cache.clear(); + //clear instance data + for (List<MeshInstance *>::Element *E = mesh->instances.front(); E; E = E->next()) { + MeshInstance *mi = E->get(); + _mesh_instance_clear(mi); + } mesh->instance_dependency.instance_notify_changed(true, true); + mesh->has_bone_weights = false; +} + +bool RendererStorageRD::mesh_needs_instance(RID p_mesh, bool p_has_skeleton) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh, false); + + return mesh->blend_shape_count > 0 || (mesh->has_bone_weights && p_has_skeleton); +} + +/* MESH INSTANCE */ + +RID RendererStorageRD::mesh_instance_create(RID p_base) { + Mesh *mesh = mesh_owner.getornull(p_base); + ERR_FAIL_COND_V(!mesh, RID()); + + MeshInstance *mi = memnew(MeshInstance); + + mi->mesh = mesh; + + for (uint32_t i = 0; i < mesh->surface_count; i++) { + _mesh_instance_add_surface(mi, mesh, i); + } + + mi->I = mesh->instances.push_back(mi); + + mi->dirty = true; + + return mesh_instance_owner.make_rid(mi); +} +void RendererStorageRD::mesh_instance_set_skeleton(RID p_mesh_instance, RID p_skeleton) { + MeshInstance *mi = mesh_instance_owner.getornull(p_mesh_instance); + if (mi->skeleton == p_skeleton) { + return; + } + mi->skeleton = p_skeleton; + mi->skeleton_version = 0; + mi->dirty = true; +} + +void RendererStorageRD::mesh_instance_set_blend_shape_weight(RID p_mesh_instance, int p_shape, float p_weight) { + MeshInstance *mi = mesh_instance_owner.getornull(p_mesh_instance); + ERR_FAIL_COND(!mi); + ERR_FAIL_INDEX(p_shape, (int)mi->blend_weights.size()); + mi->blend_weights[p_shape] = p_weight; + mi->weights_dirty = true; + //will be eventually updated +} + +void RendererStorageRD::_mesh_instance_clear(MeshInstance *mi) { + for (uint32_t i = 0; i < mi->surfaces.size(); i++) { + if (mi->surfaces[i].vertex_buffer.is_valid()) { + RD::get_singleton()->free(mi->surfaces[i].vertex_buffer); + } + if (mi->surfaces[i].versions) { + for (uint32_t j = 0; j < mi->surfaces[i].version_count; j++) { + RD::get_singleton()->free(mi->surfaces[i].versions[j].vertex_array); + } + memfree(mi->surfaces[i].versions); + } + } + mi->surfaces.clear(); + + if (mi->blend_weights_buffer.is_valid()) { + RD::get_singleton()->free(mi->blend_weights_buffer); + } + mi->blend_weights.clear(); + mi->weights_dirty = false; + mi->skeleton_version = 0; } -void RendererStorageRD::_mesh_surface_generate_version_for_input_mask(Mesh::Surface *s, uint32_t p_input_mask) { - uint32_t version = s->version_count; - s->version_count++; - s->versions = (Mesh::Surface::Version *)memrealloc(s->versions, sizeof(Mesh::Surface::Version) * s->version_count); +void RendererStorageRD::_mesh_instance_add_surface(MeshInstance *mi, Mesh *mesh, uint32_t p_surface) { + if (mesh->blend_shape_count > 0 && mi->blend_weights_buffer.is_null()) { + mi->blend_weights.resize(mesh->blend_shape_count); + for (uint32_t i = 0; i < mi->blend_weights.size(); i++) { + mi->blend_weights[i] = 0; + } + mi->blend_weights_buffer = RD::get_singleton()->storage_buffer_create(sizeof(float) * mi->blend_weights.size(), mi->blend_weights.to_byte_array()); + mi->weights_dirty = true; + } - Mesh::Surface::Version &v = s->versions[version]; + MeshInstance::Surface s; + if (mesh->blend_shape_count > 0 || (mesh->surfaces[p_surface]->format & RS::ARRAY_FORMAT_BONES)) { + //surface warrants transform + s.vertex_buffer = RD::get_singleton()->vertex_buffer_create(mesh->surfaces[p_surface]->vertex_buffer_size, Vector<uint8_t>(), true); + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(s.vertex_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 2; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + if (mi->blend_weights_buffer.is_valid()) { + u.ids.push_back(mi->blend_weights_buffer); + } else { + u.ids.push_back(default_rd_storage_buffer); + } + uniforms.push_back(u); + } + s.uniform_set = RD::get_singleton()->uniform_set_create(uniforms, skeleton_shader.version_shader[0], SkeletonShader::UNIFORM_SET_INSTANCE); + } + + mi->surfaces.push_back(s); + mi->dirty = true; +} + +void RendererStorageRD::mesh_instance_check_for_update(RID p_mesh_instance) { + MeshInstance *mi = mesh_instance_owner.getornull(p_mesh_instance); + + bool needs_update = mi->dirty; + + if (mi->weights_dirty && !mi->weight_update_list.in_list()) { + dirty_mesh_instance_weights.add(&mi->weight_update_list); + needs_update = true; + } + + if (mi->array_update_list.in_list()) { + return; + } + + if (!needs_update && mi->skeleton.is_valid()) { + Skeleton *sk = skeleton_owner.getornull(mi->skeleton); + if (sk && sk->version != mi->skeleton_version) { + needs_update = true; + } + } + + if (needs_update) { + dirty_mesh_instance_arrays.add(&mi->array_update_list); + } +} + +void RendererStorageRD::update_mesh_instances() { + while (dirty_mesh_instance_weights.first()) { + MeshInstance *mi = dirty_mesh_instance_weights.first()->self(); + + if (mi->blend_weights_buffer.is_valid()) { + RD::get_singleton()->buffer_update(mi->blend_weights_buffer, 0, mi->blend_weights.size() * sizeof(float), mi->blend_weights.ptr(), true); + } + dirty_mesh_instance_weights.remove(&mi->weight_update_list); + mi->weights_dirty = false; + } + if (dirty_mesh_instance_arrays.first() == nullptr) { + return; //nothing to do + } + + //process skeletons and blend shapes + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + while (dirty_mesh_instance_arrays.first()) { + MeshInstance *mi = dirty_mesh_instance_arrays.first()->self(); + + Skeleton *sk = skeleton_owner.getornull(mi->skeleton); + + for (uint32_t i = 0; i < mi->surfaces.size(); i++) { + if (mi->surfaces[i].uniform_set == RID() || mi->mesh->surfaces[i]->uniform_set == RID()) { + continue; + } + + bool array_is_2d = mi->mesh->surfaces[i]->format & RS::ARRAY_FLAG_USE_2D_VERTICES; + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, skeleton_shader.pipeline[array_is_2d ? SkeletonShader::SHADER_MODE_2D : SkeletonShader::SHADER_MODE_3D]); + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, mi->surfaces[i].uniform_set, SkeletonShader::UNIFORM_SET_INSTANCE); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, mi->mesh->surfaces[i]->uniform_set, SkeletonShader::UNIFORM_SET_SURFACE); + if (sk && sk->uniform_set_mi.is_valid()) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sk->uniform_set_mi, SkeletonShader::UNIFORM_SET_SKELETON); + } else { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, skeleton_shader.default_skeleton_uniform_set, SkeletonShader::UNIFORM_SET_SKELETON); + } + + SkeletonShader::PushConstant push_constant; + + push_constant.has_normal = mi->mesh->surfaces[i]->format & RS::ARRAY_FORMAT_NORMAL; + push_constant.has_tangent = mi->mesh->surfaces[i]->format & RS::ARRAY_FORMAT_TANGENT; + push_constant.has_skeleton = sk != nullptr && sk->use_2d == array_is_2d && (mi->mesh->surfaces[i]->format & RS::ARRAY_FORMAT_BONES); + push_constant.has_blend_shape = mi->mesh->blend_shape_count > 0; + + push_constant.vertex_count = mi->mesh->surfaces[i]->vertex_count; + push_constant.vertex_stride = (mi->mesh->surfaces[i]->vertex_buffer_size / mi->mesh->surfaces[i]->vertex_count) / 4; + push_constant.skin_stride = (mi->mesh->surfaces[i]->skin_buffer_size / mi->mesh->surfaces[i]->vertex_count) / 4; + push_constant.skin_weight_offset = (mi->mesh->surfaces[i]->format & RS::ARRAY_FLAG_USE_8_BONE_WEIGHTS) ? 4 : 2; + + push_constant.blend_shape_count = mi->mesh->blend_shape_count; + push_constant.normalized_blend_shapes = mi->mesh->blend_shape_mode == RS::BLEND_SHAPE_MODE_NORMALIZED; + push_constant.pad0 = 0; + push_constant.pad1 = 0; + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SkeletonShader::PushConstant)); + + //dispatch without barrier, so all is done at the same time + RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.vertex_count, 1, 1, 64, 1, 1); + } + + mi->dirty = false; + if (sk) { + mi->skeleton_version = sk->version; + } + dirty_mesh_instance_arrays.remove(&mi->array_update_list); + } + + RD::get_singleton()->compute_list_end(); +} + +void RendererStorageRD::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::Version &v, Mesh::Surface *s, uint32_t p_input_mask, MeshInstance::Surface *mis) { Vector<RD::VertexAttribute> attributes; Vector<RID> buffers; @@ -2873,7 +3141,11 @@ void RendererStorageRD::_mesh_surface_generate_version_for_input_mask(Mesh::Surf stride += sizeof(float) * 3; } - buffer = s->vertex_buffer; + if (mis) { + buffer = mis->vertex_buffer; + } else { + buffer = s->vertex_buffer; + } } break; case RS::ARRAY_NORMAL: { @@ -2882,14 +3154,22 @@ void RendererStorageRD::_mesh_surface_generate_version_for_input_mask(Mesh::Surf vd.format = RD::DATA_FORMAT_A2B10G10R10_UNORM_PACK32; stride += sizeof(uint32_t); - buffer = s->vertex_buffer; + if (mis) { + buffer = mis->vertex_buffer; + } else { + buffer = s->vertex_buffer; + } } break; case RS::ARRAY_TANGENT: { vd.offset = stride; vd.format = RD::DATA_FORMAT_A2B10G10R10_UNORM_PACK32; stride += sizeof(uint32_t); - buffer = s->vertex_buffer; + if (mis) { + buffer = mis->vertex_buffer; + } else { + buffer = s->vertex_buffer; + } } break; case RS::ARRAY_COLOR: { vd.offset = attribute_stride; @@ -4847,6 +5127,7 @@ void RendererStorageRD::skeleton_allocate(RID p_skeleton, int p_bones, bool p_2d RD::get_singleton()->free(skeleton->buffer); skeleton->buffer = RID(); skeleton->data.resize(0); + skeleton->uniform_set_mi = RID(); } if (skeleton->size) { @@ -4855,6 +5136,18 @@ void RendererStorageRD::skeleton_allocate(RID p_skeleton, int p_bones, bool p_2d zeromem(skeleton->data.ptrw(), skeleton->data.size() * sizeof(float)); _skeleton_make_dirty(skeleton); + + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.binding = 0; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(skeleton->buffer); + uniforms.push_back(u); + } + skeleton->uniform_set_mi = RD::get_singleton()->uniform_set_create(uniforms, skeleton_shader.version_shader[0], SkeletonShader::UNIFORM_SET_SKELETON); + } } } @@ -4977,6 +5270,7 @@ void RendererStorageRD::_update_dirty_skeletons() { skeleton_dirty_list = skeleton->dirty_list; skeleton->instance_dependency.instance_notify_changed(true, false); + skeleton->version++; skeleton->dirty = false; skeleton->dirty_list = nullptr; @@ -5353,6 +5647,15 @@ void RendererStorageRD::reflection_probe_set_resolution(RID p_probe, int p_resol reflection_probe->resolution = p_resolution; } +void RendererStorageRD::reflection_probe_set_lod_threshold(RID p_probe, float p_ratio) { + ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND(!reflection_probe); + + reflection_probe->lod_threshold = p_ratio; + + reflection_probe->instance_dependency.instance_notify_changed(true, false); +} + AABB RendererStorageRD::reflection_probe_get_aabb(RID p_probe) const { const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); ERR_FAIL_COND_V(!reflection_probe, AABB()); @@ -5406,6 +5709,13 @@ float RendererStorageRD::reflection_probe_get_origin_max_distance(RID p_probe) c return reflection_probe->max_distance; } +float RendererStorageRD::reflection_probe_get_lod_threshold(RID p_probe) const { + const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); + ERR_FAIL_COND_V(!reflection_probe, 0); + + return reflection_probe->lod_threshold; +} + int RendererStorageRD::reflection_probe_get_resolution(RID p_probe) const { const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); ERR_FAIL_COND_V(!reflection_probe, 0); @@ -6641,7 +6951,7 @@ void RendererStorageRD::render_target_copy_to_back_buffer(RID p_render_target, c if (p_region == Rect2i()) { region.size = rt->size; } else { - region = Rect2i(Size2i(), rt->size).clip(p_region); + region = Rect2i(Size2i(), rt->size).intersection(p_region); if (region.size == Size2i()) { return; //nothing to do } @@ -6681,7 +6991,7 @@ void RendererStorageRD::render_target_clear_back_buffer(RID p_render_target, con if (p_region == Rect2i()) { region.size = rt->size; } else { - region = Rect2i(Size2i(), rt->size).clip(p_region); + region = Rect2i(Size2i(), rt->size).intersection(p_region); if (region.size == Size2i()) { return; //nothing to do } @@ -6702,7 +7012,7 @@ void RendererStorageRD::render_target_gen_back_buffer_mipmaps(RID p_render_targe if (p_region == Rect2i()) { region.size = rt->size; } else { - region = Rect2i(Size2i(), rt->size).clip(p_region); + region = Rect2i(Size2i(), rt->size).intersection(p_region); if (region.size == Size2i()) { return; //nothing to do } @@ -7810,7 +8120,18 @@ bool RendererStorageRD::free(RID p_rid) { mesh_clear(p_rid); Mesh *mesh = mesh_owner.getornull(p_rid); mesh->instance_dependency.instance_notify_deleted(p_rid); + if (mesh->instances.size()) { + ERR_PRINT("deleting mesh with active instances"); + } mesh_owner.free(p_rid); + } else if (mesh_instance_owner.owns(p_rid)) { + MeshInstance *mi = mesh_instance_owner.getornull(p_rid); + _mesh_instance_clear(mi); + mi->mesh->instances.erase(mi->I); + mi->I = nullptr; + mesh_instance_owner.free(p_rid); + memdelete(mi); + } else if (multimesh_owner.owns(p_rid)) { _update_dirty_multimeshes(); multimesh_allocate(p_rid, 0, RS::MULTIMESH_TRANSFORM_2D); @@ -8517,6 +8838,30 @@ RendererStorageRD::RendererStorageRD() { rt_sdf.pipelines[i] = RD::get_singleton()->compute_pipeline_create(rt_sdf.shader.version_get_shader(rt_sdf.shader_version, i)); } } + { + Vector<String> skeleton_modes; + skeleton_modes.push_back("\n#define MODE_2D\n"); + skeleton_modes.push_back(""); + + skeleton_shader.shader.initialize(skeleton_modes); + skeleton_shader.version = skeleton_shader.shader.version_create(); + for (int i = 0; i < SkeletonShader::SHADER_MODE_MAX; i++) { + skeleton_shader.version_shader[i] = skeleton_shader.shader.version_get_shader(skeleton_shader.version, i); + skeleton_shader.pipeline[i] = RD::get_singleton()->compute_pipeline_create(skeleton_shader.version_shader[i]); + } + + { + Vector<RD::Uniform> uniforms; + { + RD::Uniform u; + u.binding = 0; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(default_rd_storage_buffer); + uniforms.push_back(u); + } + skeleton_shader.default_skeleton_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, skeleton_shader.version_shader[0], SkeletonShader::UNIFORM_SET_SKELETON); + } + } } RendererStorageRD::~RendererStorageRD() { @@ -8546,6 +8891,8 @@ RendererStorageRD::~RendererStorageRD() { particles_shader.copy_shader.version_free(particles_shader.copy_shader_version); rt_sdf.shader.version_free(rt_sdf.shader_version); + skeleton_shader.shader.version_free(skeleton_shader.version); + RenderingServer::get_singleton()->free(particles_shader.default_material); RenderingServer::get_singleton()->free(particles_shader.default_shader); diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.h b/servers/rendering/renderer_rd/renderer_storage_rd.h index b6a26fc9d0..6d1587185e 100644 --- a/servers/rendering/renderer_rd/renderer_storage_rd.h +++ b/servers/rendering/renderer_rd/renderer_storage_rd.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -31,6 +31,8 @@ #ifndef RENDERING_SERVER_STORAGE_RD_H #define RENDERING_SERVER_STORAGE_RD_H +#include "core/templates/list.h" +#include "core/templates/local_vector.h" #include "core/templates/rid_owner.h" #include "servers/rendering/renderer_compositor.h" #include "servers/rendering/renderer_rd/effects_rd.h" @@ -39,9 +41,9 @@ #include "servers/rendering/renderer_rd/shaders/giprobe_sdf.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/particles.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/particles_copy.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/skeleton.glsl.gen.h" #include "servers/rendering/renderer_scene_render.h" #include "servers/rendering/rendering_device.h" - class RendererStorageRD : public RendererStorage { public: static _FORCE_INLINE_ void store_transform(const Transform &p_mtx, float *p_array) { @@ -185,7 +187,7 @@ private: struct CanvasTexture { RID diffuse; - RID normalmap; + RID normal_map; RID specular; Color specular_color = Color(1, 1, 1, 1); float shininess = 1.0; @@ -377,6 +379,8 @@ private: /* Mesh */ + struct MeshInstance; + struct Mesh { struct Surface { RS::PrimitiveType primitive = RS::PRIMITIVE_POINTS; @@ -386,6 +390,8 @@ private: RID attribute_buffer; RID skin_buffer; uint32_t vertex_count = 0; + uint32_t vertex_buffer_size = 0; + uint32_t skin_buffer_size = 0; // A different pipeline needs to be allocated // depending on the inputs available in the @@ -433,6 +439,8 @@ private: uint32_t particles_render_index = 0; uint64_t particles_render_pass = 0; + + RID uniform_set; }; uint32_t blend_shape_count = 0; @@ -443,17 +451,90 @@ private: Vector<AABB> bone_aabbs; + bool has_bone_weights = false; + AABB aabb; AABB custom_aabb; Vector<RID> material_cache; + List<MeshInstance *> instances; + RendererStorage::InstanceDependency instance_dependency; }; mutable RID_Owner<Mesh> mesh_owner; - void _mesh_surface_generate_version_for_input_mask(Mesh::Surface *s, uint32_t p_input_mask); + struct MeshInstance { + Mesh *mesh; + RID skeleton; + struct Surface { + RID vertex_buffer; + RID uniform_set; + + Mesh::Surface::Version *versions = nullptr; //allocated on demand + uint32_t version_count = 0; + }; + LocalVector<Surface> surfaces; + LocalVector<float> blend_weights; + + RID blend_weights_buffer; + List<MeshInstance *>::Element *I = nullptr; //used to erase itself + uint64_t skeleton_version = 0; + bool dirty = false; + bool weights_dirty = false; + SelfList<MeshInstance> weight_update_list; + SelfList<MeshInstance> array_update_list; + MeshInstance() : + weight_update_list(this), array_update_list(this) {} + }; + + void _mesh_instance_clear(MeshInstance *mi); + void _mesh_instance_add_surface(MeshInstance *mi, Mesh *mesh, uint32_t p_surface); + + mutable RID_PtrOwner<MeshInstance> mesh_instance_owner; + + SelfList<MeshInstance>::List dirty_mesh_instance_weights; + SelfList<MeshInstance>::List dirty_mesh_instance_arrays; + + struct SkeletonShader { + struct PushConstant { + uint32_t has_normal; + uint32_t has_tangent; + uint32_t has_skeleton; + uint32_t has_blend_shape; + + uint32_t vertex_count; + uint32_t vertex_stride; + uint32_t skin_stride; + uint32_t skin_weight_offset; + + uint32_t blend_shape_count; + uint32_t normalized_blend_shapes; + uint32_t pad0; + uint32_t pad1; + }; + + enum { + UNIFORM_SET_INSTANCE = 0, + UNIFORM_SET_SURFACE = 1, + UNIFORM_SET_SKELETON = 2, + }; + enum { + SHADER_MODE_2D, + SHADER_MODE_3D, + SHADER_MODE_MAX + }; + + SkeletonShaderRD shader; + RID version; + RID version_shader[SHADER_MODE_MAX]; + RID pipeline[SHADER_MODE_MAX]; + + RID default_skeleton_uniform_set; + } skeleton_shader; + + void _mesh_surface_generate_version_for_input_mask(Mesh::Surface::Version &v, Mesh::Surface *s, uint32_t p_input_mask, MeshInstance::Surface *mis = nullptr); RID mesh_default_rd_buffers[DEFAULT_RD_BUFFER_MAX]; @@ -826,6 +907,9 @@ private: Transform2D base_transform_2d; RID uniform_set_3d; + RID uniform_set_mi; + + uint64_t version = 1; RendererStorage::InstanceDependency instance_dependency; }; @@ -880,6 +964,7 @@ private: bool box_projection = false; bool enable_shadows = false; uint32_t cull_mask = (1 << 20) - 1; + float lod_threshold = 0.01; RendererStorage::InstanceDependency instance_dependency; }; @@ -1280,6 +1365,8 @@ public: virtual RID mesh_create(); + virtual void mesh_set_blend_shape_count(RID p_mesh, int p_blend_shape_count); + /// Return stride virtual void mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface); @@ -1304,6 +1391,16 @@ public: virtual void mesh_clear(RID p_mesh); + virtual bool mesh_needs_instance(RID p_mesh, bool p_has_skeleton); + + /* MESH INSTANCE */ + + virtual RID mesh_instance_create(RID p_base); + virtual void mesh_instance_set_skeleton(RID p_mesh_instance, RID p_skeleton); + virtual void mesh_instance_set_blend_shape_weight(RID p_mesh_instance, int p_shape, float p_weight); + virtual void mesh_instance_check_for_update(RID p_mesh_instance); + virtual void update_mesh_instances(); + _FORCE_INLINE_ const RID *mesh_get_surface_count_and_materials(RID p_mesh, uint32_t &r_surface_count) { Mesh *mesh = mesh_owner.getornull(p_mesh); ERR_FAIL_COND_V(!mesh, nullptr); @@ -1311,7 +1408,7 @@ public: if (r_surface_count == 0) { return nullptr; } - if (mesh->material_cache.empty()) { + if (mesh->material_cache.is_empty()) { mesh->material_cache.resize(mesh->surface_count); for (uint32_t i = 0; i < r_surface_count; i++) { mesh->material_cache.write[i] = mesh->surfaces[i]->material; @@ -1321,22 +1418,50 @@ public: return mesh->material_cache.ptr(); } - _FORCE_INLINE_ RS::PrimitiveType mesh_surface_get_primitive(RID p_mesh, uint32_t p_surface_index) { + _FORCE_INLINE_ void *mesh_get_surface(RID p_mesh, uint32_t p_surface_index) { Mesh *mesh = mesh_owner.getornull(p_mesh); - ERR_FAIL_COND_V(!mesh, RS::PRIMITIVE_MAX); - ERR_FAIL_UNSIGNED_INDEX_V(p_surface_index, mesh->surface_count, RS::PRIMITIVE_MAX); + ERR_FAIL_COND_V(!mesh, nullptr); + ERR_FAIL_UNSIGNED_INDEX_V(p_surface_index, mesh->surface_count, nullptr); - return mesh->surfaces[p_surface_index]->primitive; + return mesh->surfaces[p_surface_index]; } - _FORCE_INLINE_ void mesh_surface_get_arrays_and_format(RID p_mesh, uint32_t p_surface_index, uint32_t p_input_mask, RID &r_vertex_array_rd, RID &r_index_array_rd, RD::VertexFormatID &r_vertex_format) { - Mesh *mesh = mesh_owner.getornull(p_mesh); - ERR_FAIL_COND(!mesh); - ERR_FAIL_UNSIGNED_INDEX(p_surface_index, mesh->surface_count); + _FORCE_INLINE_ RS::PrimitiveType mesh_surface_get_primitive(void *p_surface) { + Mesh::Surface *surface = reinterpret_cast<Mesh::Surface *>(p_surface); + return surface->primitive; + } - Mesh::Surface *s = mesh->surfaces[p_surface_index]; + _FORCE_INLINE_ bool mesh_surface_has_lod(void *p_surface) const { + Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); + return s->lod_count > 0; + } + + _FORCE_INLINE_ RID mesh_surface_get_index_array(void *p_surface) const { + Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); + + return s->index_array; + } + + _FORCE_INLINE_ RID mesh_surface_get_index_array_with_lod(void *p_surface, float p_model_scale, float p_distance_threshold, float p_lod_threshold) const { + Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); + + int32_t current_lod = -1; + for (uint32_t i = 0; i < s->lod_count; i++) { + float screen_size = s->lods[i].edge_length * p_model_scale / p_distance_threshold; + if (screen_size > p_lod_threshold) { + break; + } + current_lod = i; + } + if (current_lod == -1) { + return s->index_array; + } else { + return s->lods[current_lod].index_array; + } + } - r_index_array_rd = s->index_array; + _FORCE_INLINE_ void mesh_surface_get_vertex_arrays_and_format(void *p_surface, uint32_t p_input_mask, RID &r_vertex_array_rd, RD::VertexFormatID &r_vertex_format) { + Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); s->version_lock.lock(); @@ -1353,9 +1478,11 @@ public: return; } - uint32_t version = s->version_count; //gets added at the end + uint32_t version = s->version_count; + s->version_count++; + s->versions = (Mesh::Surface::Version *)memrealloc(s->versions, sizeof(Mesh::Surface::Version) * s->version_count); - _mesh_surface_generate_version_for_input_mask(s, p_input_mask); + _mesh_surface_generate_version_for_input_mask(s->versions[version], s, p_input_mask); r_vertex_format = s->versions[version].vertex_format; r_vertex_array_rd = s->versions[version].vertex_array; @@ -1363,6 +1490,42 @@ public: s->version_lock.unlock(); } + _FORCE_INLINE_ void mesh_instance_surface_get_vertex_arrays_and_format(RID p_mesh_instance, uint32_t p_surface_index, uint32_t p_input_mask, RID &r_vertex_array_rd, RD::VertexFormatID &r_vertex_format) { + MeshInstance *mi = mesh_instance_owner.getornull(p_mesh_instance); + ERR_FAIL_COND(!mi); + Mesh *mesh = mi->mesh; + ERR_FAIL_UNSIGNED_INDEX(p_surface_index, mesh->surface_count); + + MeshInstance::Surface *mis = &mi->surfaces[p_surface_index]; + Mesh::Surface *s = mesh->surfaces[p_surface_index]; + + s->version_lock.lock(); + + //there will never be more than, at much, 3 or 4 versions, so iterating is the fastest way + + for (uint32_t i = 0; i < mis->version_count; i++) { + if (mis->versions[i].input_mask != p_input_mask) { + continue; + } + //we have this version, hooray + r_vertex_format = mis->versions[i].vertex_format; + r_vertex_array_rd = mis->versions[i].vertex_array; + s->version_lock.unlock(); + return; + } + + uint32_t version = mis->version_count; + mis->version_count++; + mis->versions = (Mesh::Surface::Version *)memrealloc(mis->versions, sizeof(Mesh::Surface::Version) * mis->version_count); + + _mesh_surface_generate_version_for_input_mask(mis->versions[version], s, p_input_mask, mis); + + r_vertex_format = mis->versions[version].vertex_format; + r_vertex_array_rd = mis->versions[version].vertex_array; + + s->version_lock.unlock(); + } + _FORCE_INLINE_ RID mesh_get_default_rd_buffer(DefaultRDBuffer p_buffer) { ERR_FAIL_INDEX_V(p_buffer, DEFAULT_RD_BUFFER_MAX, RID()); return mesh_default_rd_buffers[p_buffer]; @@ -1644,6 +1807,7 @@ public: void reflection_probe_set_enable_shadows(RID p_probe, bool p_enable); void reflection_probe_set_cull_mask(RID p_probe, uint32_t p_layers); void reflection_probe_set_resolution(RID p_probe, int p_resolution); + void reflection_probe_set_lod_threshold(RID p_probe, float p_ratio); AABB reflection_probe_get_aabb(RID p_probe) const; RS::ReflectionProbeUpdateMode reflection_probe_get_update_mode(RID p_probe) const; @@ -1651,6 +1815,8 @@ public: Vector3 reflection_probe_get_extents(RID p_probe) const; Vector3 reflection_probe_get_origin_offset(RID p_probe) const; float reflection_probe_get_origin_max_distance(RID p_probe) const; + float reflection_probe_get_lod_threshold(RID p_probe) const; + int reflection_probe_get_resolution(RID p_probe) const; bool reflection_probe_renders_shadows(RID p_probe) const; diff --git a/servers/rendering/renderer_rd/shader_compiler_rd.cpp b/servers/rendering/renderer_rd/shader_compiler_rd.cpp index 2c1d2a84fd..e77141b26c 100644 --- a/servers/rendering/renderer_rd/shader_compiler_rd.cpp +++ b/servers/rendering/renderer_rd/shader_compiler_rd.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -920,7 +920,7 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge if (adnode->datatype == SL::TYPE_STRUCT) { declaration += _mkid(adnode->struct_name); } else { - declaration = _prestr(adnode->precision) + _typestr(adnode->datatype); + declaration += _prestr(adnode->precision) + _typestr(adnode->datatype); } for (int i = 0; i < adnode->declarations.size(); i++) { if (i > 0) { @@ -930,7 +930,11 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge } declaration += _mkid(adnode->declarations[i].name); declaration += "["; - declaration += itos(adnode->declarations[i].size); + if (adnode->size_expression != nullptr) { + declaration += _dump_node_code(adnode->size_expression, p_level, r_gen_code, p_actions, p_default_actions, p_assigning); + } else { + declaration += itos(adnode->declarations[i].size); + } declaration += "]"; int sz = adnode->declarations[i].initializer.size(); if (sz > 0) { @@ -986,12 +990,13 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge if (anode->call_expression != nullptr) { code += "."; code += _dump_node_code(anode->call_expression, p_level, r_gen_code, p_actions, p_default_actions, p_assigning, false); - } - - if (anode->index_expression != nullptr) { + } else if (anode->index_expression != nullptr) { code += "["; code += _dump_node_code(anode->index_expression, p_level, r_gen_code, p_actions, p_default_actions, p_assigning); code += "]"; + } else if (anode->assign_expression != nullptr) { + code += "="; + code += _dump_node_code(anode->assign_expression, p_level, r_gen_code, p_actions, p_default_actions, true, false); } if (anode->name == time_name) { @@ -1229,8 +1234,10 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge code += "["; code += _dump_node_code(mnode->index_expression, p_level, r_gen_code, p_actions, p_default_actions, p_assigning); code += "]"; + } else if (mnode->assign_expression != nullptr) { + code += "="; + code += _dump_node_code(mnode->assign_expression, p_level, r_gen_code, p_actions, p_default_actions, true, false); } - } break; } @@ -1333,8 +1340,8 @@ ShaderCompilerRD::ShaderCompilerRD() { actions[RS::SHADER_SPATIAL].renames["FRAGCOORD"] = "gl_FragCoord"; actions[RS::SHADER_SPATIAL].renames["FRONT_FACING"] = "gl_FrontFacing"; - actions[RS::SHADER_SPATIAL].renames["NORMALMAP"] = "normalmap"; - actions[RS::SHADER_SPATIAL].renames["NORMALMAP_DEPTH"] = "normaldepth"; + actions[RS::SHADER_SPATIAL].renames["NORMAL_MAP"] = "normal_map"; + actions[RS::SHADER_SPATIAL].renames["NORMAL_MAP_DEPTH"] = "normal_map_depth"; actions[RS::SHADER_SPATIAL].renames["ALBEDO"] = "albedo"; actions[RS::SHADER_SPATIAL].renames["ALPHA"] = "alpha"; actions[RS::SHADER_SPATIAL].renames["METALLIC"] = "metallic"; @@ -1380,8 +1387,8 @@ ShaderCompilerRD::ShaderCompilerRD() { actions[RS::SHADER_SPATIAL].usage_defines["AO_LIGHT_AFFECT"] = "#define ENABLE_AO\n"; actions[RS::SHADER_SPATIAL].usage_defines["UV"] = "#define ENABLE_UV_INTERP\n"; actions[RS::SHADER_SPATIAL].usage_defines["UV2"] = "#define ENABLE_UV2_INTERP\n"; - actions[RS::SHADER_SPATIAL].usage_defines["NORMALMAP"] = "#define ENABLE_NORMALMAP\n"; - actions[RS::SHADER_SPATIAL].usage_defines["NORMALMAP_DEPTH"] = "@NORMALMAP"; + actions[RS::SHADER_SPATIAL].usage_defines["NORMAL_MAP"] = "#define ENABLE_NORMAL_MAP\n"; + actions[RS::SHADER_SPATIAL].usage_defines["NORMAL_MAP_DEPTH"] = "@NORMAL_MAP"; actions[RS::SHADER_SPATIAL].usage_defines["COLOR"] = "#define ENABLE_COLOR_INTERP\n"; actions[RS::SHADER_SPATIAL].usage_defines["INSTANCE_CUSTOM"] = "#define ENABLE_INSTANCE_CUSTOM\n"; actions[RS::SHADER_SPATIAL].usage_defines["ALPHA_SCISSOR"] = "#define ALPHA_SCISSOR_USED\n"; diff --git a/servers/rendering/renderer_rd/shader_compiler_rd.h b/servers/rendering/renderer_rd/shader_compiler_rd.h index 694f8fff91..d127d8e01c 100644 --- a/servers/rendering/renderer_rd/shader_compiler_rd.h +++ b/servers/rendering/renderer_rd/shader_compiler_rd.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/servers/rendering/renderer_rd/shader_rd.cpp b/servers/rendering/renderer_rd/shader_rd.cpp index 41126218ae..d1f07a354f 100644 --- a/servers/rendering/renderer_rd/shader_rd.cpp +++ b/servers/rendering/renderer_rd/shader_rd.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/servers/rendering/renderer_rd/shader_rd.h b/servers/rendering/renderer_rd/shader_rd.h index 05e07d3cf3..a80d08050a 100644 --- a/servers/rendering/renderer_rd/shader_rd.h +++ b/servers/rendering/renderer_rd/shader_rd.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/servers/rendering/renderer_rd/shaders/SCsub b/servers/rendering/renderer_rd/shaders/SCsub index 1fe43b25f6..deaa9668df 100644 --- a/servers/rendering/renderer_rd/shaders/SCsub +++ b/servers/rendering/renderer_rd/shaders/SCsub @@ -21,8 +21,10 @@ if "RD_GLSL" in env["BUILDERS"]: env.RD_GLSL("luminance_reduce.glsl") env.RD_GLSL("bokeh_dof.glsl") env.RD_GLSL("ssao.glsl") - env.RD_GLSL("ssao_minify.glsl") + env.RD_GLSL("ssao_downsample.glsl") + env.RD_GLSL("ssao_importance_map.glsl") env.RD_GLSL("ssao_blur.glsl") + env.RD_GLSL("ssao_interleave.glsl") env.RD_GLSL("roughness_limiter.glsl") env.RD_GLSL("screen_space_reflection.glsl") env.RD_GLSL("screen_space_reflection_filter.glsl") @@ -41,3 +43,4 @@ if "RD_GLSL" in env["BUILDERS"]: env.RD_GLSL("particles.glsl") env.RD_GLSL("particles_copy.glsl") env.RD_GLSL("sort.glsl") + env.RD_GLSL("skeleton.glsl") diff --git a/servers/rendering/renderer_rd/shaders/canvas.glsl b/servers/rendering/renderer_rd/shaders/canvas.glsl index 7808e7ed52..2a7cae3b4c 100644 --- a/servers/rendering/renderer_rd/shaders/canvas.glsl +++ b/servers/rendering/renderer_rd/shaders/canvas.glsl @@ -499,7 +499,7 @@ void main() { { float normal_depth = 1.0; -#if defined(NORMALMAP_USED) +#if defined(NORMAL_MAP_USED) vec3 normal_map = vec3(0.0, 0.0, 1.0); normal_used = true; #endif @@ -510,7 +510,7 @@ FRAGMENT_SHADER_CODE /* clang-format on */ -#if defined(NORMALMAP_USED) +#if defined(NORMAL_MAP_USED) normal = mix(vec3(0.0, 0.0, 1.0), normal_map * vec3(2.0, -2.0, 1.0) - vec3(1.0, -1.0, 0.0), normal_depth); #endif } diff --git a/servers/rendering/renderer_rd/shaders/scene_forward.glsl b/servers/rendering/renderer_rd/shaders/scene_forward.glsl index 5b01cb1f82..05f7637478 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward.glsl @@ -9,8 +9,14 @@ VERSION_DEFINES /* INPUT ATTRIBS */ layout(location = 0) in vec3 vertex_attrib; + +//only for pure render depth when normal is not used + +#ifdef NORMAL_USED layout(location = 1) in vec3 normal_attrib; -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#endif + +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) layout(location = 2) in vec4 tangent_attrib; #endif @@ -18,7 +24,9 @@ layout(location = 2) in vec4 tangent_attrib; layout(location = 3) in vec4 color_attrib; #endif +#ifdef UV_USED layout(location = 4) in vec2 uv_attrib; +#endif #if defined(UV2_USED) || defined(USE_LIGHTMAP) || defined(MODE_RENDER_MATERIAL) layout(location = 5) in vec2 uv2_attrib; @@ -51,19 +59,24 @@ layout(location = 11) in vec4 weight_attrib; /* Varyings */ layout(location = 0) out vec3 vertex_interp; + +#ifdef NORMAL_USED layout(location = 1) out vec3 normal_interp; +#endif #if defined(COLOR_USED) layout(location = 2) out vec4 color_interp; #endif +#ifdef UV_USED layout(location = 3) out vec2 uv_interp; +#endif #if defined(UV2_USED) || defined(USE_LIGHTMAP) layout(location = 4) out vec2 uv2_interp; #endif -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) layout(location = 5) out vec3 tangent_interp; layout(location = 6) out vec3 binormal_interp; #endif @@ -138,9 +151,11 @@ void main() { } vec3 vertex = vertex_attrib; +#ifdef NORMAL_USED vec3 normal = normal_attrib * 2.0 - 1.0; +#endif -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) vec3 tangent = tangent_attrib.xyz * 2.0 - 1.0; float binormalf = tangent_attrib.a * 2.0 - 1.0; vec3 binormal = normalize(cross(normal, tangent) * binormalf); @@ -164,14 +179,17 @@ void main() { vertex = (vec4(vertex, 1.0) * m).xyz; normal = (vec4(normal, 0.0) * m).xyz; -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) tangent = (vec4(tangent, 0.0) * m).xyz; binormal = (vec4(binormal, 0.0) * m).xyz; #endif } #endif + +#ifdef UV_USED uv_interp = uv_attrib; +#endif #if defined(UV2_USED) || defined(USE_LIGHTMAP) uv2_interp = uv2_attrib; @@ -190,7 +208,7 @@ void main() { normal = world_normal_matrix * normal; -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) tangent = world_normal_matrix * tangent; binormal = world_normal_matrix * binormal; @@ -215,10 +233,13 @@ VERTEX_SHADER_CODE #if !defined(SKIP_TRANSFORM_USED) && !defined(VERTEX_WORLD_COORDS_USED) vertex = (modelview * vec4(vertex, 1.0)).xyz; +#ifdef NORMAL_USED normal = modelview_normal * normal; #endif -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#endif + +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) binormal = modelview_normal * binormal; tangent = modelview_normal * tangent; @@ -230,7 +251,7 @@ VERTEX_SHADER_CODE vertex = (scene_data.inv_camera_matrix * vec4(vertex, 1.0)).xyz; normal = mat3(scene_data.inverse_normal_matrix) * normal; -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) binormal = mat3(scene_data.camera_inverse_binormal_matrix) * binormal; tangent = mat3(scene_data.camera_inverse_tangent_matrix) * tangent; @@ -238,9 +259,11 @@ VERTEX_SHADER_CODE #endif vertex_interp = vertex; +#ifdef NORMAL_USED normal_interp = normal; +#endif -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) tangent_interp = tangent; binormal_interp = binormal; #endif @@ -250,7 +273,6 @@ VERTEX_SHADER_CODE #ifdef MODE_DUAL_PARABOLOID vertex_interp.z *= scene_data.dual_paraboloid_side; - normal_interp.z *= scene_data.dual_paraboloid_side; dp_clip = vertex_interp.z; //this attempts to avoid noise caused by objects sent to the other parabolloid side due to bias @@ -301,19 +323,24 @@ VERSION_DEFINES /* Varyings */ layout(location = 0) in vec3 vertex_interp; + +#ifdef NORMAL_USED layout(location = 1) in vec3 normal_interp; +#endif #if defined(COLOR_USED) layout(location = 2) in vec4 color_interp; #endif +#ifdef UV_USED layout(location = 3) in vec2 uv_interp; +#endif #if defined(UV2_USED) || defined(USE_LIGHTMAP) layout(location = 4) in vec2 uv2_interp; #endif -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) layout(location = 5) in vec3 tangent_interp; layout(location = 6) in vec3 binormal_interp; #endif @@ -1792,13 +1819,15 @@ void main() { float alpha = 1.0; -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) vec3 binormal = normalize(binormal_interp); vec3 tangent = normalize(tangent_interp); #else vec3 binormal = vec3(0.0); vec3 tangent = vec3(0.0); #endif + +#ifdef NORMAL_USED vec3 normal = normalize(normal_interp); #if defined(DO_SIDE_CHECK) @@ -1807,7 +1836,11 @@ void main() { } #endif +#endif //NORMAL_USED + +#ifdef UV_USED vec2 uv = uv_interp; +#endif #if defined(UV2_USED) || defined(USE_LIGHTMAP) vec2 uv2 = uv2_interp; @@ -1817,12 +1850,12 @@ void main() { vec4 color = color_interp; #endif -#if defined(NORMALMAP_USED) +#if defined(NORMAL_MAP_USED) - vec3 normalmap = vec3(0.5); + vec3 normal_map = vec3(0.5); #endif - float normaldepth = 1.0; + float normal_depth = 1.0; vec2 screen_uv = gl_FragCoord.xy * scene_data.screen_pixel_size + scene_data.screen_pixel_size * 0.5; //account for center @@ -1893,12 +1926,12 @@ FRAGMENT_SHADER_CODE #endif // !USE_SHADOW_TO_OPACITY -#ifdef NORMALMAP_USED +#ifdef NORMAL_MAP_USED - normalmap.xy = normalmap.xy * 2.0 - 1.0; - normalmap.z = sqrt(max(0.0, 1.0 - dot(normalmap.xy, normalmap.xy))); //always ignore Z, as it can be RG packed, Z may be pos/neg, etc. + normal_map.xy = normal_map.xy * 2.0 - 1.0; + normal_map.z = sqrt(max(0.0, 1.0 - dot(normal_map.xy, normal_map.xy))); //always ignore Z, as it can be RG packed, Z may be pos/neg, etc. - normal = normalize(mix(normal, tangent * normalmap.x + binormal * normalmap.y + normal * normalmap.z, normaldepth)); + normal = normalize(mix(normal, tangent * normal_map.x + binormal * normal_map.y + normal * normal_map.z, normal_depth)); #endif @@ -1994,6 +2027,7 @@ FRAGMENT_SHADER_CODE #endif //not render depth /////////////////////// LIGHTING ////////////////////////////// +#ifdef NORMAL_USED if (scene_data.roughness_limiter_enabled) { //http://www.jp.square-enix.com/tech/library/pdf/ImprovedGeometricSpecularAA.pdf float roughness2 = roughness * roughness; @@ -2003,6 +2037,7 @@ FRAGMENT_SHADER_CODE float filteredRoughness2 = min(1.0, roughness2 + kernelRoughness2); roughness = sqrt(filteredRoughness2); } +#endif //apply energy conservation vec3 specular_light = vec3(0.0, 0.0, 0.0); diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl index d18581c1b3..17ed22f58a 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl @@ -5,6 +5,12 @@ #include "cluster_data_inc.glsl" +#if !defined(MODE_RENDER_DEPTH) || defined(MODE_RENDER_MATERIAL) || defined(MODE_RENDER_SDF) || defined(MODE_RENDER_NORMAL_ROUGHNESS) || defined(MODE_RENDER_GIPROBE) || defined(TANGENT_USED) || defined(NORMAL_MAP_USED) +#ifndef NORMAL_USED +#define NORMAL_USED +#endif +#endif + layout(push_constant, binding = 0, std430) uniform DrawCall { uint instance_index; uint pad; //16 bits minimum size diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_fields.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_fields.glsl index eec0a90c0d..69d8824d8a 100644 --- a/servers/rendering/renderer_rd/shaders/sdfgi_fields.glsl +++ b/servers/rendering/renderer_rd/shaders/sdfgi_fields.glsl @@ -14,7 +14,7 @@ layout(local_size_x = OCT_RES, local_size_y = OCT_RES, local_size_z = 1) in; layout(rgba16f, set = 0, binding = 1) uniform restrict image2DArray irradiance_texture; layout(rg16f, set = 0, binding = 2) uniform restrict image2DArray depth_texture; -ayout(rgba32ui, set = 0, binding = 3) uniform restrict uimage2DArray irradiance_history_texture; +layout(rgba32ui, set = 0, binding = 3) uniform restrict uimage2DArray irradiance_history_texture; layout(rg32ui, set = 0, binding = 4) uniform restrict uimage2DArray depth_history_texture; struct CascadeData { diff --git a/servers/rendering/renderer_rd/shaders/skeleton.glsl b/servers/rendering/renderer_rd/shaders/skeleton.glsl new file mode 100644 index 0000000000..b19f5a9ad3 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/skeleton.glsl @@ -0,0 +1,199 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 1, std430) buffer restrict writeonly DstVertexData { + uint data[]; +} +dst_vertices; + +layout(set = 0, binding = 2, std430) buffer restrict readonly BlendShapeWeights { + float data[]; +} +blend_shape_weights; + +layout(set = 1, binding = 0, std430) buffer restrict readonly SrcVertexData { + uint data[]; +} +src_vertices; + +layout(set = 1, binding = 1, std430) buffer restrict readonly BoneWeightData { + uint data[]; +} +src_bone_weights; + +layout(set = 1, binding = 2, std430) buffer restrict readonly BlendShapeData { + uint data[]; +} +src_blend_shapes; + +layout(set = 2, binding = 0, std430) buffer restrict readonly SkeletonData { + vec4 data[]; +} +bone_transforms; + +layout(push_constant, binding = 0, std430) uniform Params { + bool has_normal; + bool has_tangent; + bool has_skeleton; + bool has_blend_shape; + + uint vertex_count; + uint vertex_stride; + uint skin_stride; + uint skin_weight_offset; + + uint blend_shape_count; + bool normalized_blend_shapes; + uint pad0; + uint pad1; +} +params; + +vec4 decode_abgr_2_10_10_10(uint base) { + uvec4 abgr_2_10_10_10 = (uvec4(base) >> uvec4(0, 10, 20, 30)) & uvec4(0x3FF, 0x3FF, 0x3FF, 0x3); + return vec4(abgr_2_10_10_10) / vec4(1023.0, 1023.0, 1023.0, 3.0) * 2.0 - 1.0; +} + +uint encode_abgr_2_10_10_10(vec4 base) { + uvec4 abgr_2_10_10_10 = uvec4(clamp(ivec4((base * 0.5 + 0.5) * vec4(1023.0, 1023.0, 1023.0, 3.0)), ivec4(0), ivec4(0x3FF, 0x3FF, 0x3FF, 0x3))) << uvec4(0, 10, 20, 30); + return abgr_2_10_10_10.x | abgr_2_10_10_10.y | abgr_2_10_10_10.z | abgr_2_10_10_10.w; +} + +void main() { + uint index = gl_GlobalInvocationID.x; + if (index >= params.vertex_count) { + return; + } + + uint src_offset = index * params.vertex_stride; + +#ifdef MODE_2D + vec2 vertex = uintBitsToFloat(uvec2(src_vertices.data[src_offset + 0], src_vertices.data[src_offset + 1])); +#else + vec3 vertex; + vec3 normal; + vec4 tangent; + + vertex = uintBitsToFloat(uvec3(src_vertices.data[src_offset + 0], src_vertices.data[src_offset + 1], src_vertices.data[src_offset + 2])); + + src_offset += 3; + + if (params.has_normal) { + normal = decode_abgr_2_10_10_10(src_vertices.data[src_offset]).rgb; + src_offset++; + } + + if (params.has_tangent) { + tangent = decode_abgr_2_10_10_10(src_vertices.data[src_offset]); + } + + if (params.has_blend_shape) { + float blend_total = 0.0; + vec3 blend_vertex = vec3(0.0); + vec3 blend_normal = vec3(0.0); + vec3 blend_tangent = vec3(0.0); + + for (uint i = 0; i < params.blend_shape_count; i++) { + float w = blend_shape_weights.data[i]; + if (w > 0.0001) { + uint base_offset = (params.vertex_count * i + index) * params.vertex_stride; + + blend_vertex += uintBitsToFloat(uvec3(src_blend_shapes.data[base_offset + 0], src_blend_shapes.data[base_offset + 1], src_blend_shapes.data[base_offset + 2])) * w; + + base_offset += 3; + + if (params.has_normal) { + blend_normal += decode_abgr_2_10_10_10(src_blend_shapes.data[base_offset]).rgb * w; + base_offset++; + } + + if (params.has_tangent) { + blend_tangent += decode_abgr_2_10_10_10(src_blend_shapes.data[base_offset]).rgb; + } + + blend_total += w; + } + } + + if (params.normalized_blend_shapes) { + vertex = (1.0 - blend_total) * vertex; + normal = (1.0 - blend_total) * normal; + tangent.rgb = (1.0 - blend_total) * tangent.rgb; + } + + vertex += blend_vertex; + normal += normalize(normal + blend_normal); + tangent.rgb += normalize(tangent.rgb + blend_tangent); + } + + if (params.has_skeleton) { + uint skin_offset = params.skin_stride * index; + + uvec2 bones = uvec2(src_bone_weights.data[skin_offset + 0], src_bone_weights.data[skin_offset + 1]); + uvec2 bones_01 = uvec2(bones.x & 0xFFFF, bones.x >> 16) * 3; //pre-add xform offset + uvec2 bones_23 = uvec2(bones.y & 0xFFFF, bones.y >> 16) * 3; + + skin_offset += params.skin_weight_offset; + + uvec2 weights = uvec2(src_bone_weights.data[skin_offset + 0], src_bone_weights.data[skin_offset + 1]); + + vec2 weights_01 = unpackUnorm2x16(weights.x); + vec2 weights_23 = unpackUnorm2x16(weights.y); + + mat4 m = mat4(bone_transforms.data[bones_01.x], bone_transforms.data[bones_01.x + 1], bone_transforms.data[bones_01.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.x; + m += mat4(bone_transforms.data[bones_01.y], bone_transforms.data[bones_01.y + 1], bone_transforms.data[bones_01.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.y; + m += mat4(bone_transforms.data[bones_23.x], bone_transforms.data[bones_23.x + 1], bone_transforms.data[bones_23.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.x; + m += mat4(bone_transforms.data[bones_23.y], bone_transforms.data[bones_23.y + 1], bone_transforms.data[bones_23.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.y; + + if (params.skin_weight_offset == 4) { + //using 8 bones/weights + skin_offset = params.skin_stride * index + 2; + + bones = uvec2(src_bone_weights.data[skin_offset + 0], src_bone_weights.data[skin_offset + 1]); + bones_01 = uvec2(bones.x & 0xFFFF, bones.x >> 16) * 3; //pre-add xform offset + bones_23 = uvec2(bones.y & 0xFFFF, bones.y >> 16) * 3; + + skin_offset += params.skin_weight_offset; + + weights = uvec2(src_bone_weights.data[skin_offset + 0], src_bone_weights.data[skin_offset + 1]); + + weights_01 = unpackUnorm2x16(weights.x); + weights_23 = unpackUnorm2x16(weights.y); + + m += mat4(bone_transforms.data[bones_01.x], bone_transforms.data[bones_01.x + 1], bone_transforms.data[bones_01.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.x; + m += mat4(bone_transforms.data[bones_01.y], bone_transforms.data[bones_01.y + 1], bone_transforms.data[bones_01.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.y; + m += mat4(bone_transforms.data[bones_23.x], bone_transforms.data[bones_23.x + 1], bone_transforms.data[bones_23.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.x; + m += mat4(bone_transforms.data[bones_23.y], bone_transforms.data[bones_23.y + 1], bone_transforms.data[bones_23.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.y; + } + + //reverse order because its transposed + vertex = (vec4(vertex, 1.0) * m).xyz; + normal = normalize((vec4(normal, 0.0) * m).xyz); + tangent.xyz = normalize((vec4(tangent.xyz, 0.0) * m).xyz); + } + + uint dst_offset = index * params.vertex_stride; + + uvec3 uvertex = floatBitsToUint(vertex); + dst_vertices.data[dst_offset + 0] = uvertex.x; + dst_vertices.data[dst_offset + 1] = uvertex.y; + dst_vertices.data[dst_offset + 2] = uvertex.z; + + dst_offset += 3; + + if (params.has_normal) { + dst_vertices.data[dst_offset] = encode_abgr_2_10_10_10(vec4(normal, 0.0)); + dst_offset++; + } + + if (params.has_tangent) { + dst_vertices.data[dst_offset] = encode_abgr_2_10_10_10(tangent); + } + +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/ssao.glsl b/servers/rendering/renderer_rd/shaders/ssao.glsl index 346338181a..315ef8fa13 100644 --- a/servers/rendering/renderer_rd/shaders/ssao.glsl +++ b/servers/rendering/renderer_rd/shaders/ssao.glsl @@ -1,249 +1,486 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[compute] #version 450 VERSION_DEFINES -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; +#define SSAO_ADAPTIVE_TAP_BASE_COUNT 5 + +#define INTELSSAO_MAIN_DISK_SAMPLE_COUNT (32) +const vec4 sample_pattern[INTELSSAO_MAIN_DISK_SAMPLE_COUNT] = { + vec4(0.78488064, 0.56661671, 1.500000, -0.126083), vec4(0.26022232, -0.29575172, 1.500000, -1.064030), vec4(0.10459357, 0.08372527, 1.110000, -2.730563), vec4(-0.68286800, 0.04963045, 1.090000, -0.498827), + vec4(-0.13570161, -0.64190155, 1.250000, -0.532765), vec4(-0.26193795, -0.08205118, 0.670000, -1.783245), vec4(-0.61177456, 0.66664219, 0.710000, -0.044234), vec4(0.43675563, 0.25119025, 0.610000, -1.167283), + vec4(0.07884444, 0.86618668, 0.640000, -0.459002), vec4(-0.12790935, -0.29869005, 0.600000, -1.729424), vec4(-0.04031125, 0.02413622, 0.600000, -4.792042), vec4(0.16201244, -0.52851415, 0.790000, -1.067055), + vec4(-0.70991218, 0.47301072, 0.640000, -0.335236), vec4(0.03277707, -0.22349690, 0.600000, -1.982384), vec4(0.68921727, 0.36800742, 0.630000, -0.266718), vec4(0.29251814, 0.37775412, 0.610000, -1.422520), + vec4(-0.12224089, 0.96582592, 0.600000, -0.426142), vec4(0.11071457, -0.16131058, 0.600000, -2.165947), vec4(0.46562141, -0.59747696, 0.600000, -0.189760), vec4(-0.51548797, 0.11804193, 0.600000, -1.246800), + vec4(0.89141309, -0.42090443, 0.600000, 0.028192), vec4(-0.32402530, -0.01591529, 0.600000, -1.543018), vec4(0.60771245, 0.41635221, 0.600000, -0.605411), vec4(0.02379565, -0.08239821, 0.600000, -3.809046), + vec4(0.48951152, -0.23657045, 0.600000, -1.189011), vec4(-0.17611565, -0.81696892, 0.600000, -0.513724), vec4(-0.33930185, -0.20732205, 0.600000, -1.698047), vec4(-0.91974425, 0.05403209, 0.600000, 0.062246), + vec4(-0.15064627, -0.14949332, 0.600000, -1.896062), vec4(0.53180975, -0.35210401, 0.600000, -0.758838), vec4(0.41487166, 0.81442589, 0.600000, -0.505648), vec4(-0.24106961, -0.32721516, 0.600000, -1.665244) +}; + +// these values can be changed (up to SSAO_MAX_TAPS) with no changes required elsewhere; values for 4th and 5th preset are ignored but array needed to avoid compilation errors +// the actual number of texture samples is two times this value (each "tap" has two symmetrical depth texture samples) +const int num_taps[5] = { 3, 5, 12, 0, 0 }; + +#define SSAO_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET (99) // to disable simply set to 99 or similar +#define SSAO_TILT_SAMPLES_AMOUNT (0.4) +// +#define SSAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET (1) // to disable simply set to 99 or similar +#define SSAO_HALOING_REDUCTION_AMOUNT (0.6) // values from 0.0 - 1.0, 1.0 means max weighting (will cause artifacts, 0.8 is more reasonable) +// +#define SSAO_NORMAL_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (2) // to disable simply set to 99 or similar +#define SSAO_NORMAL_BASED_EDGES_DOT_THRESHOLD (0.5) // use 0-0.1 for super-sharp normal-based edges +// +#define SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET (1) // whether to use detail; to disable simply set to 99 or similar +// +#define SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (2) // !!warning!! the MIP generation on the C++ side will be enabled on quality preset 2 regardless of this value, so if changing here, change the C++ side too +#define SSAO_DEPTH_MIPS_GLOBAL_OFFSET (-4.3) // best noise/quality/performance tradeoff, found empirically +// +// !!warning!! the edge handling is hard-coded to 'disabled' on quality level 0, and enabled above, on the C++ side; while toggling it here will work for +// testing purposes, it will not yield performance gains (or correct results) +#define SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (1) +// +#define SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET (1) + +#define SSAO_MAX_TAPS 32 +#define SSAO_MAX_REF_TAPS 512 +#define SSAO_ADAPTIVE_TAP_BASE_COUNT 5 +#define SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT (SSAO_MAX_TAPS - SSAO_ADAPTIVE_TAP_BASE_COUNT) +#define SSAO_DEPTH_MIP_LEVELS 4 -#define TWO_PI 6.283185307179586476925286766559 +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; -#ifdef SSAO_QUALITY_HIGH -#define NUM_SAMPLES (20) -#endif +layout(set = 0, binding = 0) uniform sampler2DArray source_depth_mipmaps; +layout(rgba8, set = 0, binding = 1) uniform restrict readonly image2D source_normal; +layout(set = 0, binding = 2) uniform Constants { //get into a lower set + vec4 rotation_matrices[20]; +} +constants; -#ifdef SSAO_QUALITY_ULTRA -#define NUM_SAMPLES (48) +#ifdef ADAPTIVE +layout(rg8, set = 1, binding = 0) uniform restrict readonly image2DArray source_ssao; +layout(set = 1, binding = 1) uniform sampler2D source_importance; +layout(set = 1, binding = 2, std430) buffer Counter { + uint sum; +} +counter; #endif -#ifdef SSAO_QUALITY_LOW -#define NUM_SAMPLES (8) -#endif +layout(rg8, set = 2, binding = 0) uniform restrict writeonly image2D dest_image; -#if !defined(SSAO_QUALITY_LOW) && !defined(SSAO_QUALITY_HIGH) && !defined(SSAO_QUALITY_ULTRA) -#define NUM_SAMPLES (12) -#endif +// This push_constant is full - 128 bytes - if you need to add more data, consider adding to the uniform buffer instead +layout(push_constant, binding = 1, std430) uniform Params { + ivec2 screen_size; + int pass; + int quality; -// If using depth mip levels, the log of the maximum pixel offset before we need to switch to a lower -// miplevel to maintain reasonable spatial locality in the cache -// If this number is too small (< 3), too many taps will land in the same pixel, and we'll get bad variance that manifests as flashing. -// If it is too high (> 5), we'll get bad performance because we're not using the MIP levels effectively -#define LOG_MAX_OFFSET (3) - -// This must be less than or equal to the MAX_MIP_LEVEL defined in SSAO.cpp -#define MAX_MIP_LEVEL (4) - -// This is the number of turns around the circle that the spiral pattern makes. This should be prime to prevent -// taps from lining up. This particular choice was tuned for NUM_SAMPLES == 9 - -const int ROTATIONS[] = int[]( - 1, 1, 2, 3, 2, 5, 2, 3, 2, - 3, 3, 5, 5, 3, 4, 7, 5, 5, 7, - 9, 8, 5, 5, 7, 7, 7, 8, 5, 8, - 11, 12, 7, 10, 13, 8, 11, 8, 7, 14, - 11, 11, 13, 12, 13, 19, 17, 13, 11, 18, - 19, 11, 11, 14, 17, 21, 15, 16, 17, 18, - 13, 17, 11, 17, 19, 18, 25, 18, 19, 19, - 29, 21, 19, 27, 31, 29, 21, 18, 17, 29, - 31, 31, 23, 18, 25, 26, 25, 23, 19, 34, - 19, 27, 21, 25, 39, 29, 17, 21, 27); - -//#define NUM_SPIRAL_TURNS (7) -const int NUM_SPIRAL_TURNS = ROTATIONS[NUM_SAMPLES - 1]; - -layout(set = 0, binding = 0) uniform sampler2D source_depth_mipmaps; -layout(r8, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; - -#ifndef USE_HALF_SIZE -layout(set = 2, binding = 0) uniform sampler2D source_depth; -#endif + vec2 half_screen_pixel_size; + int size_multiplier; + float detail_intensity; -layout(set = 3, binding = 0) uniform sampler2D source_normal; + vec2 NDC_to_view_mul; + vec2 NDC_to_view_add; -layout(push_constant, binding = 1, std430) uniform Params { - ivec2 screen_size; - float z_far; - float z_near; + vec2 pad2; + vec2 half_screen_pixel_size_x025; - bool orthogonal; - float intensity_div_r6; float radius; - float bias; - - vec4 proj_info; - vec2 pixel_size; - float proj_scale; - uint pad; + float intensity; + float shadow_power; + float shadow_clamp; + + float fade_out_mul; + float fade_out_add; + float horizon_angle_threshold; + float inv_radius_near_limit; + + bool is_orthogonal; + float neg_inv_radius; + float load_counter_avg_div; + float adaptive_sample_limit; + + ivec2 pass_coord_offset; + vec2 pass_uv_offset; } params; -vec3 reconstructCSPosition(vec2 S, float z) { - if (params.orthogonal) { - return vec3((S.xy * params.proj_info.xy + params.proj_info.zw), z); +// packing/unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions! +float pack_edges(vec4 p_edgesLRTB) { + p_edgesLRTB = round(clamp(p_edgesLRTB, 0.0, 1.0) * 3.05); + return dot(p_edgesLRTB, vec4(64.0 / 255.0, 16.0 / 255.0, 4.0 / 255.0, 1.0 / 255.0)); +} + +vec3 NDC_to_view_space(vec2 p_pos, float p_viewspace_depth) { + if (params.is_orthogonal) { + return vec3((params.NDC_to_view_mul * p_pos.xy + params.NDC_to_view_add), p_viewspace_depth); } else { - return vec3((S.xy * params.proj_info.xy + params.proj_info.zw) * z, z); + return vec3((params.NDC_to_view_mul * p_pos.xy + params.NDC_to_view_add) * p_viewspace_depth, p_viewspace_depth); } } -vec3 getPosition(ivec2 ssP) { - vec3 P; -#ifdef USE_HALF_SIZE - P.z = texelFetch(source_depth_mipmaps, ssP, 0).r; - P.z = -P.z; -#else - P.z = texelFetch(source_depth, ssP, 0).r; +// calculate effect radius and fit our screen sampling pattern inside it +void calculate_radius_parameters(const float p_pix_center_length, const vec2 p_pixel_size_at_center, out float r_lookup_radius, out float r_radius, out float r_fallof_sq) { + r_radius = params.radius; - P.z = P.z * 2.0 - 1.0; - if (params.orthogonal) { - P.z = ((P.z + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; - } else { - P.z = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - P.z * (params.z_far - params.z_near)); - } - P.z = -P.z; -#endif - // Offset to pixel center - P = reconstructCSPosition(vec2(ssP) + vec2(0.5), P.z); - return P; + // when too close, on-screen sampling disk will grow beyond screen size; limit this to avoid closeup temporal artifacts + const float too_close_limit = clamp(p_pix_center_length * params.inv_radius_near_limit, 0.0, 1.0) * 0.8 + 0.2; + + r_radius *= too_close_limit; + + // 0.85 is to reduce the radius to allow for more samples on a slope to still stay within influence + r_lookup_radius = (0.85 * r_radius) / p_pixel_size_at_center.x; + + // used to calculate falloff (both for AO samples and per-sample weights) + r_fallof_sq = -1.0 / (r_radius * r_radius); } -/** Returns a unit vector and a screen-space radius for the tap on a unit disk (the caller should scale by the actual disk radius) */ -vec2 tapLocation(int sampleNumber, float spinAngle, out float ssR) { - // Radius relative to ssR - float alpha = (float(sampleNumber) + 0.5) * (1.0 / float(NUM_SAMPLES)); - float angle = alpha * (float(NUM_SPIRAL_TURNS) * 6.28) + spinAngle; +vec4 calculate_edges(const float p_center_z, const float p_left_z, const float p_right_z, const float p_top_z, const float p_bottom_z) { + // slope-sensitive depth-based edge detection + vec4 edgesLRTB = vec4(p_left_z, p_right_z, p_top_z, p_bottom_z) - p_center_z; + vec4 edgesLRTB_slope_adjusted = edgesLRTB + edgesLRTB.yxwz; + edgesLRTB = min(abs(edgesLRTB), abs(edgesLRTB_slope_adjusted)); + return clamp((1.3 - edgesLRTB / (p_center_z * 0.040)), 0.0, 1.0); +} - ssR = alpha; - return vec2(cos(angle), sin(angle)); +vec3 decode_normal(vec3 p_encoded_normal) { + vec3 normal = p_encoded_normal * 2.0 - 1.0; + return normal; } -/** Read the camera-space position of the point at screen-space pixel ssP + unitOffset * ssR. Assumes length(unitOffset) == 1 */ -vec3 getOffsetPosition(ivec2 ssP, float ssR) { - // Derivation: - // mipLevel = floor(log(ssR / MAX_OFFSET)); +vec3 load_normal(ivec2 p_pos) { + vec3 encoded_normal = imageLoad(source_normal, p_pos).xyz; + encoded_normal.z = 1.0 - encoded_normal.z; + return decode_normal(encoded_normal); +} - int mipLevel = clamp(int(floor(log2(ssR))) - LOG_MAX_OFFSET, 0, MAX_MIP_LEVEL); +vec3 load_normal(ivec2 p_pos, ivec2 p_offset) { + vec3 encoded_normal = imageLoad(source_normal, p_pos + p_offset).xyz; + encoded_normal.z = 1.0 - encoded_normal.z; + return decode_normal(encoded_normal); +} - vec3 P; +// all vectors in viewspace +float calculate_pixel_obscurance(vec3 p_pixel_normal, vec3 p_hit_delta, float p_fallof_sq) { + float length_sq = dot(p_hit_delta, p_hit_delta); + float NdotD = dot(p_pixel_normal, p_hit_delta) / sqrt(length_sq); - // We need to divide by 2^mipLevel to read the appropriately scaled coordinate from a MIP-map. - // Manually clamp to the texture size because texelFetch bypasses the texture unit - ivec2 mipP = clamp(ssP >> mipLevel, ivec2(0), (params.screen_size >> mipLevel) - ivec2(1)); + float falloff_mult = max(0.0, length_sq * p_fallof_sq + 1.0); -#ifdef USE_HALF_SIZE - P.z = texelFetch(source_depth_mipmaps, mipP, mipLevel).r; - P.z = -P.z; -#else - if (mipLevel < 1) { - //read from depth buffer - P.z = texelFetch(source_depth, mipP, 0).r; - P.z = P.z * 2.0 - 1.0; - if (params.orthogonal) { - P.z = ((P.z + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; - } else { - P.z = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - P.z * (params.z_far - params.z_near)); - } - P.z = -P.z; + return max(0, NdotD - params.horizon_angle_threshold) * falloff_mult; +} - } else { - //read from mipmaps - P.z = texelFetch(source_depth_mipmaps, mipP, mipLevel - 1).r; - P.z = -P.z; +void SSAO_tap_inner(const int p_quality_level, inout float r_obscurance_sum, inout float r_weight_sum, const vec2 p_sampling_uv, const float p_mip_level, const vec3 p_pix_center_pos, vec3 p_pixel_normal, const float p_fallof_sq, const float p_weight_mod) { + // get depth at sample + float viewspace_sample_z = textureLod(source_depth_mipmaps, vec3(p_sampling_uv, params.pass), p_mip_level).x; + + // convert to viewspace + vec3 hit_pos = NDC_to_view_space(p_sampling_uv.xy, viewspace_sample_z).xyz; + vec3 hit_delta = hit_pos - p_pix_center_pos; + + float obscurance = calculate_pixel_obscurance(p_pixel_normal, hit_delta, p_fallof_sq); + float weight = 1.0; + + if (p_quality_level >= SSAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET) { + float reduct = max(0, -hit_delta.z); + reduct = clamp(reduct * params.neg_inv_radius + 2.0, 0.0, 1.0); + weight = SSAO_HALOING_REDUCTION_AMOUNT * reduct + (1.0 - SSAO_HALOING_REDUCTION_AMOUNT); } -#endif + weight *= p_weight_mod; + r_obscurance_sum += obscurance * weight; + r_weight_sum += weight; +} + +void SSAOTap(const int p_quality_level, inout float r_obscurance_sum, inout float r_weight_sum, const int p_tap_index, const mat2 p_rot_scale, const vec3 p_pix_center_pos, vec3 p_pixel_normal, const vec2 p_normalized_screen_pos, const float p_mip_offset, const float p_fallof_sq, float p_weight_mod, vec2 p_norm_xy, float p_norm_xy_length) { + vec2 sample_offset; + float sample_pow_2_len; + + // patterns + { + vec4 new_sample = sample_pattern[p_tap_index]; + sample_offset = new_sample.xy * p_rot_scale; + sample_pow_2_len = new_sample.w; // precalculated, same as: sample_pow_2_len = log2( length( new_sample.xy ) ); + p_weight_mod *= new_sample.z; + } + + // snap to pixel center (more correct obscurance math, avoids artifacts) + sample_offset = round(sample_offset); + + // calculate MIP based on the sample distance from the centre, similar to as described + // in http://graphics.cs.williams.edu/papers/SAOHPG12/. + float mip_level = (p_quality_level < SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (sample_pow_2_len + p_mip_offset); + + vec2 sampling_uv = sample_offset * params.half_screen_pixel_size + p_normalized_screen_pos; + + SSAO_tap_inner(p_quality_level, r_obscurance_sum, r_weight_sum, sampling_uv, mip_level, p_pix_center_pos, p_pixel_normal, p_fallof_sq, p_weight_mod); + + // for the second tap, just use the mirrored offset + vec2 sample_offset_mirrored_uv = -sample_offset; - // Offset to pixel center - P = reconstructCSPosition(vec2(ssP) + vec2(0.5), P.z); + // tilt the second set of samples so that the disk is effectively rotated by the normal + // effective at removing one set of artifacts, but too expensive for lower quality settings + if (p_quality_level >= SSAO_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET) { + float dot_norm = dot(sample_offset_mirrored_uv, p_norm_xy); + sample_offset_mirrored_uv -= dot_norm * p_norm_xy_length * p_norm_xy; + sample_offset_mirrored_uv = round(sample_offset_mirrored_uv); + } + + // snap to pixel center (more correct obscurance math, avoids artifacts) + vec2 sampling_mirrored_uv = sample_offset_mirrored_uv * params.half_screen_pixel_size + p_normalized_screen_pos; - return P; + SSAO_tap_inner(p_quality_level, r_obscurance_sum, r_weight_sum, sampling_mirrored_uv, mip_level, p_pix_center_pos, p_pixel_normal, p_fallof_sq, p_weight_mod); } -/** Compute the occlusion due to sample with index \a i about the pixel at \a ssC that corresponds - to camera-space point \a C with unit normal \a n_C, using maximum screen-space sampling radius \a ssDiskRadius +void generate_SSAO_shadows_internal(out float r_shadow_term, out vec4 r_edges, out float r_weight, const vec2 p_pos, int p_quality_level, bool p_adaptive_base) { + vec2 pos_rounded = trunc(p_pos); + uvec2 upos = uvec2(pos_rounded); + + const int number_of_taps = (p_adaptive_base) ? (SSAO_ADAPTIVE_TAP_BASE_COUNT) : (num_taps[p_quality_level]); + float pix_z, pix_left_z, pix_top_z, pix_right_z, pix_bottom_z; + + vec4 valuesUL = textureGather(source_depth_mipmaps, vec3(pos_rounded * params.half_screen_pixel_size, params.pass)); + vec4 valuesBR = textureGather(source_depth_mipmaps, vec3((pos_rounded + vec2(1.0)) * params.half_screen_pixel_size, params.pass)); + + // get this pixel's viewspace depth + pix_z = valuesUL.y; + + // get left right top bottom neighbouring pixels for edge detection (gets compiled out on quality_level == 0) + pix_left_z = valuesUL.x; + pix_top_z = valuesUL.z; + pix_right_z = valuesBR.z; + pix_bottom_z = valuesBR.x; - Note that units of H() in the HPG12 paper are meters, not - unitless. The whole falloff/sampling function is therefore - unitless. In this implementation, we factor out (9 / radius). + vec2 normalized_screen_pos = pos_rounded * params.half_screen_pixel_size + params.half_screen_pixel_size_x025; + vec3 pix_center_pos = NDC_to_view_space(normalized_screen_pos, pix_z); - Four versions of the falloff function are implemented below -*/ -float sampleAO(in ivec2 ssC, in vec3 C, in vec3 n_C, in float ssDiskRadius, in float p_radius, in int tapIndex, in float randomPatternRotationAngle) { - // Offset on the unit disk, spun for this pixel - float ssR; - vec2 unitOffset = tapLocation(tapIndex, randomPatternRotationAngle, ssR); - ssR *= ssDiskRadius; + // Load this pixel's viewspace normal + uvec2 full_res_coord = upos * 2 * params.size_multiplier + params.pass_coord_offset.xy; + vec3 pixel_normal = load_normal(ivec2(full_res_coord)); - ivec2 ssP = ivec2(ssR * unitOffset) + ssC; + const vec2 pixel_size_at_center = NDC_to_view_space(normalized_screen_pos.xy + params.half_screen_pixel_size, pix_center_pos.z).xy - pix_center_pos.xy; - if (any(lessThan(ssP, ivec2(0))) || any(greaterThanEqual(ssP, params.screen_size))) { - return 0.0; + float pixel_lookup_radius; + float fallof_sq; + + // calculate effect radius and fit our screen sampling pattern inside it + float viewspace_radius; + calculate_radius_parameters(length(pix_center_pos), pixel_size_at_center, pixel_lookup_radius, viewspace_radius, fallof_sq); + + // calculate samples rotation/scaling + mat2 rot_scale_matrix; + uint pseudo_random_index; + + { + vec4 rotation_scale; + // reduce effect radius near the screen edges slightly; ideally, one would render a larger depth buffer (5% on each side) instead + if (!p_adaptive_base && (p_quality_level >= SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET)) { + float near_screen_border = min(min(normalized_screen_pos.x, 1.0 - normalized_screen_pos.x), min(normalized_screen_pos.y, 1.0 - normalized_screen_pos.y)); + near_screen_border = clamp(10.0 * near_screen_border + 0.6, 0.0, 1.0); + pixel_lookup_radius *= near_screen_border; + } + + // load & update pseudo-random rotation matrix + pseudo_random_index = uint(pos_rounded.y * 2 + pos_rounded.x) % 5; + rotation_scale = constants.rotation_matrices[params.pass * 5 + pseudo_random_index]; + rot_scale_matrix = mat2(rotation_scale.x * pixel_lookup_radius, rotation_scale.y * pixel_lookup_radius, rotation_scale.z * pixel_lookup_radius, rotation_scale.w * pixel_lookup_radius); } - // The occluding point in camera space - vec3 Q = getOffsetPosition(ssP, ssR); + // the main obscurance & sample weight storage + float obscurance_sum = 0.0; + float weight_sum = 0.0; - vec3 v = Q - C; + // edge mask for between this and left/right/top/bottom neighbour pixels - not used in quality level 0 so initialize to "no edge" (1 is no edge, 0 is edge) + vec4 edgesLRTB = vec4(1.0, 1.0, 1.0, 1.0); - float vv = dot(v, v); - float vn = dot(v, n_C); + // Move center pixel slightly towards camera to avoid imprecision artifacts due to using of 16bit depth buffer; a lot smaller offsets needed when using 32bit floats + pix_center_pos *= 0.9992; - const float epsilon = 0.01; - float radius2 = p_radius * p_radius; + if (!p_adaptive_base && (p_quality_level >= SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { + edgesLRTB = calculate_edges(pix_z, pix_left_z, pix_right_z, pix_top_z, pix_bottom_z); + } - // A: From the HPG12 paper - // Note large epsilon to avoid overdarkening within cracks - //return float(vv < radius2) * max((vn - bias) / (epsilon + vv), 0.0) * radius2 * 0.6; + // adds a more high definition sharp effect, which gets blurred out (reuses left/right/top/bottom samples that we used for edge detection) + if (!p_adaptive_base && (p_quality_level >= SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET)) { + // disable in case of quality level 4 (reference) + if (p_quality_level != 4) { + //approximate neighbouring pixels positions (actually just deltas or "positions - pix_center_pos" ) + vec3 normalized_viewspace_dir = vec3(pix_center_pos.xy / pix_center_pos.zz, 1.0); + vec3 pixel_left_delta = vec3(-pixel_size_at_center.x, 0.0, 0.0) + normalized_viewspace_dir * (pix_left_z - pix_center_pos.z); + vec3 pixel_right_delta = vec3(+pixel_size_at_center.x, 0.0, 0.0) + normalized_viewspace_dir * (pix_right_z - pix_center_pos.z); + vec3 pixel_top_delta = vec3(0.0, -pixel_size_at_center.y, 0.0) + normalized_viewspace_dir * (pix_top_z - pix_center_pos.z); + vec3 pixel_bottom_delta = vec3(0.0, +pixel_size_at_center.y, 0.0) + normalized_viewspace_dir * (pix_bottom_z - pix_center_pos.z); + + const float range_reduction = 4.0f; // this is to avoid various artifacts + const float modified_fallof_sq = range_reduction * fallof_sq; + + vec4 additional_obscurance; + additional_obscurance.x = calculate_pixel_obscurance(pixel_normal, pixel_left_delta, modified_fallof_sq); + additional_obscurance.y = calculate_pixel_obscurance(pixel_normal, pixel_right_delta, modified_fallof_sq); + additional_obscurance.z = calculate_pixel_obscurance(pixel_normal, pixel_top_delta, modified_fallof_sq); + additional_obscurance.w = calculate_pixel_obscurance(pixel_normal, pixel_bottom_delta, modified_fallof_sq); + + obscurance_sum += params.detail_intensity * dot(additional_obscurance, edgesLRTB); + } + } - // B: Smoother transition to zero (lowers contrast, smoothing out corners). [Recommended] - float f = max(radius2 - vv, 0.0); - return f * f * f * max((vn - params.bias) / (epsilon + vv), 0.0); + // Sharp normals also create edges - but this adds to the cost as well + if (!p_adaptive_base && (p_quality_level >= SSAO_NORMAL_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { + vec3 neighbour_normal_left = load_normal(ivec2(full_res_coord), ivec2(-2, 0)); + vec3 neighbour_normal_right = load_normal(ivec2(full_res_coord), ivec2(2, 0)); + vec3 neighbour_normal_top = load_normal(ivec2(full_res_coord), ivec2(0, -2)); + vec3 neighbour_normal_bottom = load_normal(ivec2(full_res_coord), ivec2(0, 2)); - // C: Medium contrast (which looks better at high radii), no division. Note that the - // contribution still falls off with radius^2, but we've adjusted the rate in a way that is - // more computationally efficient and happens to be aesthetically pleasing. - // return 4.0 * max(1.0 - vv * invRadius2, 0.0) * max(vn - bias, 0.0); + const float dot_threshold = SSAO_NORMAL_BASED_EDGES_DOT_THRESHOLD; - // D: Low contrast, no division operation - // return 2.0 * float(vv < radius * radius) * max(vn - bias, 0.0); -} + vec4 normal_edgesLRTB; + normal_edgesLRTB.x = clamp((dot(pixel_normal, neighbour_normal_left) + dot_threshold), 0.0, 1.0); + normal_edgesLRTB.y = clamp((dot(pixel_normal, neighbour_normal_right) + dot_threshold), 0.0, 1.0); + normal_edgesLRTB.z = clamp((dot(pixel_normal, neighbour_normal_top) + dot_threshold), 0.0, 1.0); + normal_edgesLRTB.w = clamp((dot(pixel_normal, neighbour_normal_bottom) + dot_threshold), 0.0, 1.0); -void main() { - // Pixel being shaded - ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing - return; + edgesLRTB *= normal_edgesLRTB; } - // World space point being shaded - vec3 C = getPosition(ssC); + const float global_mip_offset = SSAO_DEPTH_MIPS_GLOBAL_OFFSET; + float mip_offset = (p_quality_level < SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (log2(pixel_lookup_radius) + global_mip_offset); -#ifdef USE_HALF_SIZE - vec3 n_C = texelFetch(source_normal, ssC << 1, 0).xyz * 2.0 - 1.0; -#else - vec3 n_C = texelFetch(source_normal, ssC, 0).xyz * 2.0 - 1.0; + // Used to tilt the second set of samples so that the disk is effectively rotated by the normal + // effective at removing one set of artifacts, but too expensive for lower quality settings + vec2 norm_xy = vec2(pixel_normal.x, pixel_normal.y); + float norm_xy_length = length(norm_xy); + norm_xy /= vec2(norm_xy_length, -norm_xy_length); + norm_xy_length *= SSAO_TILT_SAMPLES_AMOUNT; + + // standard, non-adaptive approach + if ((p_quality_level != 3) || p_adaptive_base) { + for (int i = 0; i < number_of_taps; i++) { + SSAOTap(p_quality_level, obscurance_sum, weight_sum, i, rot_scale_matrix, pix_center_pos, pixel_normal, normalized_screen_pos, mip_offset, fallof_sq, 1.0, norm_xy, norm_xy_length); + } + } +#ifdef ADAPTIVE + else { + // add new ones if needed + vec2 full_res_uv = normalized_screen_pos + params.pass_uv_offset.xy; + float importance = textureLod(source_importance, full_res_uv, 0.0).x; + + // this is to normalize SSAO_DETAIL_AO_AMOUNT across all pixel regardless of importance + obscurance_sum *= (SSAO_ADAPTIVE_TAP_BASE_COUNT / float(SSAO_MAX_TAPS)) + (importance * SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT / float(SSAO_MAX_TAPS)); + + // load existing base values + vec2 base_values = imageLoad(source_ssao, ivec3(upos, params.pass)).xy; + weight_sum += base_values.y * float(SSAO_ADAPTIVE_TAP_BASE_COUNT * 4.0); + obscurance_sum += (base_values.x) * weight_sum; + + // increase importance around edges + float edge_count = dot(1.0 - edgesLRTB, vec4(1.0, 1.0, 1.0, 1.0)); + + float avg_total_importance = float(counter.sum) * params.load_counter_avg_div; + + float importance_limiter = clamp(params.adaptive_sample_limit / avg_total_importance, 0.0, 1.0); + importance *= importance_limiter; + + float additional_sample_count = SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT * importance; + + const float blend_range = 3.0; + const float blend_range_inv = 1.0 / blend_range; + + additional_sample_count += 0.5; + uint additional_samples = uint(additional_sample_count); + uint additional_samples_to = min(SSAO_MAX_TAPS, additional_samples + SSAO_ADAPTIVE_TAP_BASE_COUNT); + + for (uint i = SSAO_ADAPTIVE_TAP_BASE_COUNT; i < additional_samples_to; i++) { + additional_sample_count -= 1.0f; + float weight_mod = clamp(additional_sample_count * blend_range_inv, 0.0, 1.0); + SSAOTap(p_quality_level, obscurance_sum, weight_sum, int(i), rot_scale_matrix, pix_center_pos, pixel_normal, normalized_screen_pos, mip_offset, fallof_sq, weight_mod, norm_xy, norm_xy_length); + } + } #endif - n_C = normalize(n_C); - n_C.y = -n_C.y; //because this code reads flipped - // Hash function used in the HPG12 AlchemyAO paper - float randomPatternRotationAngle = mod(float((3 * ssC.x ^ ssC.y + ssC.x * ssC.y) * 10), TWO_PI); + // early out for adaptive base - just output weight (used for the next pass) + if (p_adaptive_base) { + float obscurance = obscurance_sum / weight_sum; + + r_shadow_term = obscurance; + r_edges = vec4(0.0); + r_weight = weight_sum; + return; + } + + // calculate weighted average + float obscurance = obscurance_sum / weight_sum; - // Reconstruct normals from positions. These will lead to 1-pixel black lines - // at depth discontinuities, however the blur will wipe those out so they are not visible - // in the final image. + // calculate fadeout (1 close, gradient, 0 far) + float fade_out = clamp(pix_center_pos.z * params.fade_out_mul + params.fade_out_add, 0.0, 1.0); - // Choose the screen-space sample radius - // proportional to the projected area of the sphere + // Reduce the SSAO shadowing if we're on the edge to remove artifacts on edges (we don't care for the lower quality one) + if (!p_adaptive_base && (p_quality_level >= SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { + // when there's more than 2 opposite edges, start fading out the occlusion to reduce aliasing artifacts + float edge_fadeout_factor = clamp((1.0 - edgesLRTB.x - edgesLRTB.y) * 0.35, 0.0, 1.0) + clamp((1.0 - edgesLRTB.z - edgesLRTB.w) * 0.35, 0.0, 1.0); - float ssDiskRadius = -params.proj_scale * params.radius; - if (!params.orthogonal) { - ssDiskRadius = -params.proj_scale * params.radius / C.z; + fade_out *= clamp(1.0 - edge_fadeout_factor, 0.0, 1.0); } - float sum = 0.0; - for (int i = 0; i < NUM_SAMPLES; ++i) { - sum += sampleAO(ssC, C, n_C, ssDiskRadius, params.radius, i, randomPatternRotationAngle); + + // strength + obscurance = params.intensity * obscurance; + + // clamp + obscurance = min(obscurance, params.shadow_clamp); + + // fadeout + obscurance *= fade_out; + + // conceptually switch to occlusion with the meaning being visibility (grows with visibility, occlusion == 1 implies full visibility), + // to be in line with what is more commonly used. + float occlusion = 1.0 - obscurance; + + // modify the gradient + // note: this cannot be moved to a later pass because of loss of precision after storing in the render target + occlusion = pow(clamp(occlusion, 0.0, 1.0), params.shadow_power); + + // outputs! + r_shadow_term = occlusion; // Our final 'occlusion' term (0 means fully occluded, 1 means fully lit) + r_edges = edgesLRTB; // These are used to prevent blurring across edges, 1 means no edge, 0 means edge, 0.5 means half way there, etc. + r_weight = weight_sum; +} + +void main() { + float out_shadow_term; + float out_weight; + vec4 out_edges; + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing + return; } - float A = max(0.0, 1.0 - sum * params.intensity_div_r6 * (5.0 / float(NUM_SAMPLES))); + vec2 uv = vec2(gl_GlobalInvocationID) + vec2(0.5); +#ifdef SSAO_BASE + generate_SSAO_shadows_internal(out_shadow_term, out_edges, out_weight, uv, params.quality, true); + + imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(out_shadow_term, out_weight / (float(SSAO_ADAPTIVE_TAP_BASE_COUNT) * 4.0), 0.0, 0.0)); +#else + generate_SSAO_shadows_internal(out_shadow_term, out_edges, out_weight, uv, params.quality, false); // pass in quality levels + if (params.quality == 0) { + out_edges = vec4(1.0); + } - imageStore(dest_image, ssC, vec4(A)); + imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(out_shadow_term, pack_edges(out_edges), 0.0, 0.0)); +#endif } diff --git a/servers/rendering/renderer_rd/shaders/ssao_blur.glsl b/servers/rendering/renderer_rd/shaders/ssao_blur.glsl index 3e63e3cb59..510a777048 100644 --- a/servers/rendering/renderer_rd/shaders/ssao_blur.glsl +++ b/servers/rendering/renderer_rd/shaders/ssao_blur.glsl @@ -1,3 +1,22 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[compute] #version 450 @@ -7,147 +26,129 @@ VERSION_DEFINES layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; layout(set = 0, binding = 0) uniform sampler2D source_ssao; -layout(set = 1, binding = 0) uniform sampler2D source_depth; -#ifdef MODE_UPSCALE -layout(set = 2, binding = 0) uniform sampler2D source_depth_mipmaps; -#endif -layout(r8, set = 3, binding = 0) uniform restrict writeonly image2D dest_image; - -////////////////////////////////////////////////////////////////////////////////////////////// -// Tunable Parameters: +layout(rg8, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; layout(push_constant, binding = 1, std430) uniform Params { - float edge_sharpness; /** Increase to make depth edges crisper. Decrease to reduce flicker. */ - int filter_scale; - float z_far; - float z_near; - bool orthogonal; - uint pad0; - uint pad1; - uint pad2; - ivec2 axis; /** (1, 0) or (0, 1) */ - ivec2 screen_size; + float edge_sharpness; + float pad; + vec2 half_screen_pixel_size; } params; -/** Filter radius in pixels. This will be multiplied by SCALE. */ -#define R (4) +vec4 unpack_edges(float p_packed_val) { + uint packed_val = uint(p_packed_val * 255.5); + vec4 edgesLRTB; + edgesLRTB.x = float((packed_val >> 6) & 0x03) / 3.0; + edgesLRTB.y = float((packed_val >> 4) & 0x03) / 3.0; + edgesLRTB.z = float((packed_val >> 2) & 0x03) / 3.0; + edgesLRTB.w = float((packed_val >> 0) & 0x03) / 3.0; + + return clamp(edgesLRTB + params.edge_sharpness, 0.0, 1.0); +} + +void add_sample(float p_ssao_value, float p_edge_value, inout float r_sum, inout float r_sum_weight) { + float weight = p_edge_value; + + r_sum += (weight * p_ssao_value); + r_sum_weight += weight; +} + +#ifdef MODE_WIDE +vec2 sample_blurred_wide(vec2 p_coord) { + vec2 vC = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(0, 0)).xy; + vec2 vL = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(-2, 0)).xy; + vec2 vT = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(0, -2)).xy; + vec2 vR = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(2, 0)).xy; + vec2 vB = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(0, 2)).xy; + + float packed_edges = vC.y; + vec4 edgesLRTB = unpack_edges(packed_edges); + edgesLRTB.x *= unpack_edges(vL.y).y; + edgesLRTB.z *= unpack_edges(vT.y).w; + edgesLRTB.y *= unpack_edges(vR.y).x; + edgesLRTB.w *= unpack_edges(vB.y).z; + + float ssao_value = vC.x; + float ssao_valueL = vL.x; + float ssao_valueT = vT.x; + float ssao_valueR = vR.x; + float ssao_valueB = vB.x; + + float sum_weight = 0.8f; + float sum = ssao_value * sum_weight; + + add_sample(ssao_valueL, edgesLRTB.x, sum, sum_weight); + add_sample(ssao_valueR, edgesLRTB.y, sum, sum_weight); + add_sample(ssao_valueT, edgesLRTB.z, sum, sum_weight); + add_sample(ssao_valueB, edgesLRTB.w, sum, sum_weight); + + float ssao_avg = sum / sum_weight; + + ssao_value = ssao_avg; + + return vec2(ssao_value, packed_edges); +} +#endif + +#ifdef MODE_SMART +vec2 sample_blurred(vec3 p_pos, vec2 p_coord) { + float packed_edges = texelFetch(source_ssao, ivec2(p_pos.xy), 0).y; + vec4 edgesLRTB = unpack_edges(packed_edges); + + vec4 valuesUL = textureGather(source_ssao, vec2(p_coord - params.half_screen_pixel_size * 0.5)); + vec4 valuesBR = textureGather(source_ssao, vec2(p_coord + params.half_screen_pixel_size * 0.5)); + + float ssao_value = valuesUL.y; + float ssao_valueL = valuesUL.x; + float ssao_valueT = valuesUL.z; + float ssao_valueR = valuesBR.z; + float ssao_valueB = valuesBR.x; + + float sum_weight = 0.5; + float sum = ssao_value * sum_weight; + + add_sample(ssao_valueL, edgesLRTB.x, sum, sum_weight); + add_sample(ssao_valueR, edgesLRTB.y, sum, sum_weight); + + add_sample(ssao_valueT, edgesLRTB.z, sum, sum_weight); + add_sample(ssao_valueB, edgesLRTB.w, sum, sum_weight); -////////////////////////////////////////////////////////////////////////////////////////////// + float ssao_avg = sum / sum_weight; -// Gaussian coefficients -const float gaussian[R + 1] = - //float[](0.356642, 0.239400, 0.072410, 0.009869); - //float[](0.398943, 0.241971, 0.053991, 0.004432, 0.000134); // stddev = 1.0 - float[](0.153170, 0.144893, 0.122649, 0.092902, 0.062970); // stddev = 2.0 -//float[](0.111220, 0.107798, 0.098151, 0.083953, 0.067458, 0.050920, 0.036108); // stddev = 3.0 + ssao_value = ssao_avg; + + return vec2(ssao_value, packed_edges); +} +#endif void main() { // Pixel being shaded ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing - return; - } - -#ifdef MODE_UPSCALE - - //closest one should be the same pixel, but check nearby just in case - float depth = texelFetch(source_depth, ssC, 0).r; - - depth = depth * 2.0 - 1.0; - if (params.orthogonal) { - depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; - } else { - depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near)); - } - - vec2 pixel_size = 1.0 / vec2(params.screen_size); - vec2 closest_uv = vec2(ssC) * pixel_size + pixel_size * 0.5; - vec2 from_uv = closest_uv; - vec2 ps2 = pixel_size; // * 2.0; - - float closest_depth = abs(textureLod(source_depth_mipmaps, closest_uv, 0.0).r - depth); - - vec2 offsets[4] = vec2[](vec2(ps2.x, 0), vec2(-ps2.x, 0), vec2(0, ps2.y), vec2(0, -ps2.y)); - for (int i = 0; i < 4; i++) { - vec2 neighbour = from_uv + offsets[i]; - float neighbour_depth = abs(textureLod(source_depth_mipmaps, neighbour, 0.0).r - depth); - if (neighbour_depth < closest_depth) { - closest_uv = neighbour; - closest_depth = neighbour_depth; - } - } - - float visibility = textureLod(source_ssao, closest_uv, 0.0).r; - imageStore(dest_image, ssC, vec4(visibility)); -#else - float depth = texelFetch(source_depth, ssC, 0).r; +#ifdef MODE_NON_SMART -#ifdef MODE_FULL_SIZE - depth = depth * 2.0 - 1.0; + vec2 halfPixel = params.half_screen_pixel_size * 0.5f; - if (params.orthogonal) { - depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; - } else { - depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near)); - } + vec2 uv = (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size; -#endif - float depth_divide = 1.0 / params.z_far; - - //depth *= depth_divide; - - /* - if (depth > params.z_far * 0.999) { - discard; //skybox - } - */ - - float sum = texelFetch(source_ssao, ssC, 0).r; - - // Base weight for depth falloff. Increase this for more blurriness, - // decrease it for better edge discrimination - float BASE = gaussian[0]; - float totalWeight = BASE; - sum *= totalWeight; - - ivec2 clamp_limit = params.screen_size - ivec2(1); - - for (int r = -R; r <= R; ++r) { - // We already handled the zero case above. This loop should be unrolled and the static branch optimized out, - // so the IF statement has no runtime cost - if (r != 0) { - ivec2 ppos = ssC + params.axis * (r * params.filter_scale); - float value = texelFetch(source_ssao, clamp(ppos, ivec2(0), clamp_limit), 0).r; - ivec2 rpos = clamp(ppos, ivec2(0), clamp_limit); - - float temp_depth = texelFetch(source_depth, rpos, 0).r; -#ifdef MODE_FULL_SIZE - temp_depth = temp_depth * 2.0 - 1.0; - if (params.orthogonal) { - temp_depth = ((temp_depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; - } else { - temp_depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - temp_depth * (params.z_far - params.z_near)); - } - //temp_depth *= depth_divide; -#endif - // spatial domain: offset gaussian tap - float weight = 0.3 + gaussian[abs(r)]; - //weight *= max(0.0, dot(temp_normal, normal)); + vec2 centre = textureLod(source_ssao, vec2(uv), 0.0).xy; - // range domain (the "bilateral" weight). As depth difference increases, decrease weight. - weight *= max(0.0, 1.0 - params.edge_sharpness * abs(temp_depth - depth)); + vec4 vals; + vals.x = textureLod(source_ssao, vec2(uv + vec2(-halfPixel.x * 3, -halfPixel.y)), 0.0).x; + vals.y = textureLod(source_ssao, vec2(uv + vec2(+halfPixel.x, -halfPixel.y * 3)), 0.0).x; + vals.z = textureLod(source_ssao, vec2(uv + vec2(-halfPixel.x, +halfPixel.y * 3)), 0.0).x; + vals.w = textureLod(source_ssao, vec2(uv + vec2(+halfPixel.x * 3, +halfPixel.y)), 0.0).x; - sum += value * weight; - totalWeight += weight; - } - } + vec2 sampled = vec2(dot(vals, vec4(0.2)) + centre.x * 0.2, centre.y); - const float epsilon = 0.0001; - float visibility = sum / (totalWeight + epsilon); +#else +#ifdef MODE_SMART + vec2 sampled = sample_blurred(vec3(gl_GlobalInvocationID), (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size); +#else // MODE_WIDE + vec2 sampled = sample_blurred_wide((vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size); +#endif - imageStore(dest_image, ssC, vec4(visibility)); #endif + imageStore(dest_image, ivec2(ssC), vec4(sampled, 0.0, 0.0)); } diff --git a/servers/rendering/renderer_rd/shaders/ssao_downsample.glsl b/servers/rendering/renderer_rd/shaders/ssao_downsample.glsl new file mode 100644 index 0000000000..cb2d31f70d --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/ssao_downsample.glsl @@ -0,0 +1,206 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(push_constant, binding = 1, std430) uniform Params { + vec2 pixel_size; + float z_far; + float z_near; + bool orthogonal; + float radius_sq; + uvec2 pad; +} +params; + +layout(set = 0, binding = 0) uniform sampler2D source_depth; + +layout(r16f, set = 1, binding = 0) uniform restrict writeonly image2DArray dest_image0; //rename +#ifdef GENERATE_MIPS +layout(r16f, set = 2, binding = 0) uniform restrict writeonly image2DArray dest_image1; +layout(r16f, set = 2, binding = 1) uniform restrict writeonly image2DArray dest_image2; +layout(r16f, set = 2, binding = 2) uniform restrict writeonly image2DArray dest_image3; +#endif + +vec4 screen_space_to_view_space_depth(vec4 p_depth) { + if (params.orthogonal) { + vec4 depth = p_depth * 2.0 - 1.0; + return ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; + } + + float depth_linearize_mul = params.z_near; + float depth_linearize_add = params.z_far; + + // Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar" + + // Set your depth_linearize_mul and depth_linearize_add to: + // depth_linearize_mul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear ); + // depth_linearize_add = cameraClipFar / ( cameraClipFar - cameraClipNear ); + + return depth_linearize_mul / (depth_linearize_add - p_depth); +} + +float screen_space_to_view_space_depth(float p_depth) { + if (params.orthogonal) { + float depth = p_depth * 2.0 - 1.0; + return ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / (2.0 * params.z_far); + } + + float depth_linearize_mul = params.z_near; + float depth_linearize_add = params.z_far; + + return depth_linearize_mul / (depth_linearize_add - p_depth); +} + +#ifdef GENERATE_MIPS + +shared float depth_buffer[4][8][8]; + +float mip_smart_average(vec4 p_depths) { + float closest = min(min(p_depths.x, p_depths.y), min(p_depths.z, p_depths.w)); + float fallof_sq = -1.0f / params.radius_sq; + vec4 dists = p_depths - closest.xxxx; + vec4 weights = clamp(dists * dists * fallof_sq + 1.0, 0.0, 1.0); + return dot(weights, p_depths) / dot(weights, vec4(1.0, 1.0, 1.0, 1.0)); +} + +void prepare_depths_and_mips(vec4 p_samples, uvec2 p_output_coord, uvec2 p_gtid) { + p_samples = screen_space_to_view_space_depth(p_samples); + + depth_buffer[0][p_gtid.x][p_gtid.y] = p_samples.w; + depth_buffer[1][p_gtid.x][p_gtid.y] = p_samples.z; + depth_buffer[2][p_gtid.x][p_gtid.y] = p_samples.x; + depth_buffer[3][p_gtid.x][p_gtid.y] = p_samples.y; + + imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 0), vec4(p_samples.w)); + imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 1), vec4(p_samples.z)); + imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 2), vec4(p_samples.x)); + imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 3), vec4(p_samples.y)); + + uint depth_array_index = 2 * (p_gtid.y % 2) + (p_gtid.x % 2); + uvec2 depth_array_offset = ivec2(p_gtid.x % 2, p_gtid.y % 2); + ivec2 buffer_coord = ivec2(p_gtid) - ivec2(depth_array_offset); + + p_output_coord /= 2; + groupMemoryBarrier(); + barrier(); + + // if (still_alive) <-- all threads alive here + { + float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0]; + float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 1]; + float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 1][buffer_coord.y + 0]; + float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 1][buffer_coord.y + 1]; + + float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11)); + imageStore(dest_image1, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg)); + depth_buffer[depth_array_index][buffer_coord.x][buffer_coord.y] = avg; + } + + bool still_alive = p_gtid.x % 4 == depth_array_offset.x && p_gtid.y % 4 == depth_array_offset.y; + + p_output_coord /= 2; + groupMemoryBarrier(); + barrier(); + + if (still_alive) { + float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0]; + float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 2]; + float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 2][buffer_coord.y + 0]; + float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 2][buffer_coord.y + 2]; + + float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11)); + imageStore(dest_image2, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg)); + depth_buffer[depth_array_index][buffer_coord.x][buffer_coord.y] = avg; + } + + still_alive = p_gtid.x % 8 == depth_array_offset.x && depth_array_offset.y % 8 == depth_array_offset.y; + + p_output_coord /= 2; + groupMemoryBarrier(); + barrier(); + + if (still_alive) { + float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0]; + float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 4]; + float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 4][buffer_coord.y + 0]; + float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 4][buffer_coord.y + 4]; + + float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11)); + imageStore(dest_image3, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg)); + } +} +#else +#ifndef USE_HALF_BUFFERS +void prepare_depths(vec4 p_samples, uvec2 p_tid) { + p_samples = screen_space_to_view_space_depth(p_samples); + + imageStore(dest_image0, ivec3(p_tid, 0), vec4(p_samples.w)); + imageStore(dest_image0, ivec3(p_tid, 1), vec4(p_samples.z)); + imageStore(dest_image0, ivec3(p_tid, 2), vec4(p_samples.x)); + imageStore(dest_image0, ivec3(p_tid, 3), vec4(p_samples.y)); +} +#endif +#endif + +void main() { +#ifdef USE_HALF_BUFFERS +#ifdef USE_HALF_SIZE + float sample_00 = texelFetch(source_depth, ivec2(4 * gl_GlobalInvocationID.x + 0, 4 * gl_GlobalInvocationID.y + 0), 0).x; + float sample_11 = texelFetch(source_depth, ivec2(4 * gl_GlobalInvocationID.x + 2, 4 * gl_GlobalInvocationID.y + 2), 0).x; +#else + float sample_00 = texelFetch(source_depth, ivec2(2 * gl_GlobalInvocationID.x + 0, 2 * gl_GlobalInvocationID.y + 0), 0).x; + float sample_11 = texelFetch(source_depth, ivec2(2 * gl_GlobalInvocationID.x + 1, 2 * gl_GlobalInvocationID.y + 1), 0).x; +#endif + sample_00 = screen_space_to_view_space_depth(sample_00); + sample_11 = screen_space_to_view_space_depth(sample_11); + + imageStore(dest_image0, ivec3(gl_GlobalInvocationID.xy, 0), vec4(sample_00)); + imageStore(dest_image0, ivec3(gl_GlobalInvocationID.xy, 3), vec4(sample_11)); +#else //!USE_HALF_BUFFERS +#ifdef USE_HALF_SIZE + ivec2 depth_buffer_coord = 4 * ivec2(gl_GlobalInvocationID.xy); + ivec2 output_coord = ivec2(gl_GlobalInvocationID); + + vec2 uv = (vec2(depth_buffer_coord) + 0.5f) * params.pixel_size; + vec4 samples; + samples.x = textureLodOffset(source_depth, uv, 0, ivec2(0, 2)).x; + samples.y = textureLodOffset(source_depth, uv, 0, ivec2(2, 2)).x; + samples.z = textureLodOffset(source_depth, uv, 0, ivec2(2, 0)).x; + samples.w = textureLodOffset(source_depth, uv, 0, ivec2(0, 0)).x; +#else + ivec2 depth_buffer_coord = 2 * ivec2(gl_GlobalInvocationID.xy); + ivec2 output_coord = ivec2(gl_GlobalInvocationID); + + vec2 uv = (vec2(depth_buffer_coord) + 0.5f) * params.pixel_size; + vec4 samples = textureGather(source_depth, uv); +#endif +#ifdef GENERATE_MIPS + prepare_depths_and_mips(samples, output_coord, gl_LocalInvocationID.xy); +#else + prepare_depths(samples, gl_GlobalInvocationID.xy); +#endif +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl b/servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl new file mode 100644 index 0000000000..6aa7624261 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl @@ -0,0 +1,126 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#ifdef GENERATE_MAP +layout(set = 0, binding = 0) uniform sampler2DArray source_ssao; +#else +layout(set = 0, binding = 0) uniform sampler2D source_importance; +#endif +layout(r8, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; + +#ifdef PROCESS_MAPB +layout(set = 2, binding = 0, std430) buffer Counter { + uint sum; +} +counter; +#endif + +layout(push_constant, binding = 1, std430) uniform Params { + vec2 half_screen_pixel_size; + float intensity; + float power; +} +params; + +void main() { + // Pixel being shaded + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + +#ifdef GENERATE_MAP + // importance map stuff + uvec2 base_position = ssC * 2; + + vec2 base_uv = (vec2(base_position) + vec2(0.5f, 0.5f)) * params.half_screen_pixel_size; + + float avg = 0.0; + float minV = 1.0; + float maxV = 0.0; + for (int i = 0; i < 4; i++) { + vec4 vals = textureGather(source_ssao, vec3(base_uv, i)); + + // apply the same modifications that would have been applied in the main shader + vals = params.intensity * vals; + + vals = 1 - vals; + + vals = pow(clamp(vals, 0.0, 1.0), vec4(params.power)); + + avg += dot(vec4(vals.x, vals.y, vals.z, vals.w), vec4(1.0 / 16.0, 1.0 / 16.0, 1.0 / 16.0, 1.0 / 16.0)); + + maxV = max(maxV, max(max(vals.x, vals.y), max(vals.z, vals.w))); + minV = min(minV, min(min(vals.x, vals.y), min(vals.z, vals.w))); + } + + float min_max_diff = maxV - minV; + + imageStore(dest_image, ssC, vec4(pow(clamp(min_max_diff * 2.0, 0.0, 1.0), 0.8))); +#endif + +#ifdef PROCESS_MAPA + vec2 uv = (vec2(ssC) + 0.5f) * params.half_screen_pixel_size * 2.0; + + float centre = textureLod(source_importance, uv, 0.0).x; + + vec2 half_pixel = params.half_screen_pixel_size; + + vec4 vals; + vals.x = textureLod(source_importance, uv + vec2(-half_pixel.x * 3, -half_pixel.y), 0.0).x; + vals.y = textureLod(source_importance, uv + vec2(+half_pixel.x, -half_pixel.y * 3), 0.0).x; + vals.z = textureLod(source_importance, uv + vec2(+half_pixel.x * 3, +half_pixel.y), 0.0).x; + vals.w = textureLod(source_importance, uv + vec2(-half_pixel.x, +half_pixel.y * 3), 0.0).x; + + float avg = dot(vals, vec4(0.25, 0.25, 0.25, 0.25)); + + imageStore(dest_image, ssC, vec4(avg)); +#endif + +#ifdef PROCESS_MAPB + vec2 uv = (vec2(ssC) + 0.5f) * params.half_screen_pixel_size * 2.0; + + float centre = textureLod(source_importance, uv, 0.0).x; + + vec2 half_pixel = params.half_screen_pixel_size; + + vec4 vals; + vals.x = textureLod(source_importance, uv + vec2(-half_pixel.x, -half_pixel.y * 3), 0.0).x; + vals.y = textureLod(source_importance, uv + vec2(+half_pixel.x * 3, -half_pixel.y), 0.0).x; + vals.z = textureLod(source_importance, uv + vec2(+half_pixel.x, +half_pixel.y * 3), 0.0).x; + vals.w = textureLod(source_importance, uv + vec2(-half_pixel.x * 3, +half_pixel.y), 0.0).x; + + float avg = dot(vals, vec4(0.25, 0.25, 0.25, 0.25)); + + imageStore(dest_image, ssC, vec4(avg)); + + // sum the average; to avoid overflowing we assume max AO resolution is not bigger than 16384x16384; so quarter res (used here) will be 4096x4096, which leaves us with 8 bits per pixel + uint sum = uint(clamp(avg, 0.0, 1.0) * 255.0 + 0.5); + + // save every 9th to avoid InterlockedAdd congestion - since we're blurring, this is good enough; compensated by multiplying load_counter_avg_div by 9 + if (((ssC.x % 3) + (ssC.y % 3)) == 0) { + atomicAdd(counter.sum, sum); + } +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/ssao_interleave.glsl b/servers/rendering/renderer_rd/shaders/ssao_interleave.glsl new file mode 100644 index 0000000000..4fdf334aa5 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/ssao_interleave.glsl @@ -0,0 +1,119 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(rgba8, set = 0, binding = 0) uniform restrict writeonly image2D dest_image; +layout(set = 1, binding = 0) uniform sampler2DArray source_texture; + +layout(push_constant, binding = 1, std430) uniform Params { + float inv_sharpness; + uint size_modifier; + vec2 pixel_size; +} +params; + +vec4 unpack_edges(float p_packed_val) { + uint packed_val = uint(p_packed_val * 255.5); + vec4 edgesLRTB; + edgesLRTB.x = float((packed_val >> 6) & 0x03) / 3.0; + edgesLRTB.y = float((packed_val >> 4) & 0x03) / 3.0; + edgesLRTB.z = float((packed_val >> 2) & 0x03) / 3.0; + edgesLRTB.w = float((packed_val >> 0) & 0x03) / 3.0; + + return clamp(edgesLRTB + params.inv_sharpness, 0.0, 1.0); +} + +void main() { + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(ssC, ivec2(1.0 / params.pixel_size)))) { //too large, do nothing + return; + } + +#ifdef MODE_SMART + float ao; + uvec2 pix_pos = uvec2(gl_GlobalInvocationID.xy); + vec2 uv = (gl_GlobalInvocationID.xy + vec2(0.5)) * params.pixel_size; + + // calculate index in the four deinterleaved source array texture + int mx = int(pix_pos.x % 2); + int my = int(pix_pos.y % 2); + int index_center = mx + my * 2; // center index + int index_horizontal = (1 - mx) + my * 2; // neighbouring, horizontal + int index_vertical = mx + (1 - my) * 2; // neighbouring, vertical + int index_diagonal = (1 - mx) + (1 - my) * 2; // diagonal + + vec2 center_val = texelFetch(source_texture, ivec3(pix_pos / uvec2(params.size_modifier), index_center), 0).xy; + + ao = center_val.x; + + vec4 edgesLRTB = unpack_edges(center_val.y); + + // convert index shifts to sampling offsets + float fmx = float(mx); + float fmy = float(my); + + // in case of an edge, push sampling offsets away from the edge (towards pixel center) + float fmxe = (edgesLRTB.y - edgesLRTB.x); + float fmye = (edgesLRTB.w - edgesLRTB.z); + + // calculate final sampling offsets and sample using bilinear filter + vec2 uv_horizontal = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(fmx + fmxe - 0.5, 0.5 - fmy)) * params.pixel_size; + float ao_horizontal = textureLod(source_texture, vec3(uv_horizontal, index_horizontal), 0.0).x; + vec2 uv_vertical = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(0.5 - fmx, fmy - 0.5 + fmye)) * params.pixel_size; + float ao_vertical = textureLod(source_texture, vec3(uv_vertical, index_vertical), 0.0).x; + vec2 uv_diagonal = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(fmx - 0.5 + fmxe, fmy - 0.5 + fmye)) * params.pixel_size; + float ao_diagonal = textureLod(source_texture, vec3(uv_diagonal, index_diagonal), 0.0).x; + + // reduce weight for samples near edge - if the edge is on both sides, weight goes to 0 + vec4 blendWeights; + blendWeights.x = 1.0; + blendWeights.y = (edgesLRTB.x + edgesLRTB.y) * 0.5; + blendWeights.z = (edgesLRTB.z + edgesLRTB.w) * 0.5; + blendWeights.w = (blendWeights.y + blendWeights.z) * 0.5; + + // calculate weighted average + float blendWeightsSum = dot(blendWeights, vec4(1.0, 1.0, 1.0, 1.0)); + ao = dot(vec4(ao, ao_horizontal, ao_vertical, ao_diagonal), blendWeights) / blendWeightsSum; + + imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(ao)); +#else // !MODE_SMART + + vec2 uv = (gl_GlobalInvocationID.xy + vec2(0.5)) * params.pixel_size; +#ifdef MODE_HALF + float a = textureLod(source_texture, vec3(uv, 0), 0.0).x; + float d = textureLod(source_texture, vec3(uv, 3), 0.0).x; + float avg = (a + d) * 0.5; + +#else + float a = textureLod(source_texture, vec3(uv, 0), 0.0).x; + float b = textureLod(source_texture, vec3(uv, 1), 0.0).x; + float c = textureLod(source_texture, vec3(uv, 2), 0.0).x; + float d = textureLod(source_texture, vec3(uv, 3), 0.0).x; + float avg = (a + b + c + d) * 0.25; + +#endif + imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(avg)); +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/ssao_minify.glsl b/servers/rendering/renderer_rd/shaders/ssao_minify.glsl deleted file mode 100644 index 263fca386f..0000000000 --- a/servers/rendering/renderer_rd/shaders/ssao_minify.glsl +++ /dev/null @@ -1,45 +0,0 @@ -#[compute] - -#version 450 - -VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -layout(push_constant, binding = 1, std430) uniform Params { - vec2 pixel_size; - float z_far; - float z_near; - ivec2 source_size; - bool orthogonal; - uint pad; -} -params; - -#ifdef MINIFY_START -layout(set = 0, binding = 0) uniform sampler2D source_texture; -#else -layout(r32f, set = 0, binding = 0) uniform restrict readonly image2D source_image; -#endif -layout(r32f, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; - -void main() { - ivec2 pos = ivec2(gl_GlobalInvocationID.xy); - - if (any(greaterThan(pos, params.source_size >> 1))) { //too large, do nothing - return; - } - -#ifdef MINIFY_START - float depth = texelFetch(source_texture, pos << 1, 0).r * 2.0 - 1.0; - if (params.orthogonal) { - depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; - } else { - depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near)); - } -#else - float depth = imageLoad(source_image, pos << 1).r; -#endif - - imageStore(dest_image, pos, vec4(depth)); -} |