diff options
author | Juan Linietsky <reduzio@gmail.com> | 2020-10-18 19:27:51 -0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-18 19:27:51 -0300 |
commit | 53d5a252bb3bab85b9518054893fea9563707a4b (patch) | |
tree | 4b2d613284d25c8a1c4d22b58d6d2b47653f2bcb /servers/rendering | |
parent | d98261ab8f57e5354d6235f1b9929ff3b4708486 (diff) |
Revert "Replace SAO implementation with MSSAO"
Diffstat (limited to 'servers/rendering')
17 files changed, 716 insertions, 1027 deletions
diff --git a/servers/rendering/rasterizer.h b/servers/rendering/rasterizer.h index f3b68b74cf..e64c517a0b 100644 --- a/servers/rendering/rasterizer.h +++ b/servers/rendering/rasterizer.h @@ -98,9 +98,9 @@ public: virtual void environment_set_ssr(RID p_env, bool p_enable, int p_max_steps, float p_fade_int, float p_fade_out, float p_depth_tolerance) = 0; virtual void environment_set_ssr_roughness_quality(RS::EnvironmentSSRRoughnessQuality p_quality) = 0; - virtual void environment_set_ssao(RID p_env, bool p_enable, float p_rejection_radius, float p_intensity, int p_levels, float p_light_affect, float p_ao_channel_affect) = 0; + virtual void environment_set_ssao(RID p_env, bool p_enable, float p_radius, float p_intensity, float p_bias, float p_light_affect, float p_ao_channel_affect, RS::EnvironmentSSAOBlur p_blur, float p_bilateral_sharpness) = 0; - virtual void environment_set_ssao_settings(RS::EnvironmentSSAOQuality p_quality, bool p_full_samples, float p_noise_tolerance, float p_blur_tolerance, float p_upsample_tolerance) = 0; + virtual void environment_set_ssao_quality(RS::EnvironmentSSAOQuality p_quality, bool p_half_size) = 0; virtual void environment_set_sdfgi(RID p_env, bool p_enable, RS::EnvironmentSDFGICascades p_cascades, float p_min_cell_size, RS::EnvironmentSDFGIYScale p_y_scale, bool p_use_occlusion, bool p_use_multibounce, bool p_read_sky, float p_energy, float p_normal_bias, float p_probe_bias) = 0; diff --git a/servers/rendering/rasterizer_rd/rasterizer_effects_rd.cpp b/servers/rendering/rasterizer_rd/rasterizer_effects_rd.cpp index f8997bc441..0a3a863ee7 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_effects_rd.cpp +++ b/servers/rendering/rasterizer_rd/rasterizer_effects_rd.cpp @@ -918,330 +918,154 @@ void RasterizerEffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, con RD::get_singleton()->compute_list_end(); } -void RasterizerEffectsRD::_compute_ssao(RD::ComputeListID p_compute_list, RID p_destination, RID p_depth_buffer, const float p_tan_half_fov_h, int p_width, int p_height, int p_depth, float p_rejection_radius, float p_intensity, bool p_use_full_samples) { - const float sample_thickness[12] = { - Math::sqrt(1.0f - 0.2f * 0.2f), - Math::sqrt(1.0f - 0.4f * 0.4f), - Math::sqrt(1.0f - 0.6f * 0.6f), - Math::sqrt(1.0f - 0.8f * 0.8f), - Math::sqrt(1.0f - 0.2f * 0.2f - 0.2f * 0.2f), - Math::sqrt(1.0f - 0.2f * 0.2f - 0.4f * 0.4f), - Math::sqrt(1.0f - 0.2f * 0.2f - 0.6f * 0.6f), - Math::sqrt(1.0f - 0.2f * 0.2f - 0.8f * 0.8f), - Math::sqrt(1.0f - 0.4f * 0.4f - 0.4f * 0.4f), - Math::sqrt(1.0f - 0.4f * 0.4f - 0.6f * 0.6f), - Math::sqrt(1.0f - 0.4f * 0.4f - 0.8f * 0.8f), - Math::sqrt(1.0f - 0.6f * 0.6f - 0.6f * 0.6f) - }; - - // Here we compute multipliers that convert the center depth value into (the reciprocal of) - // sphere thicknesses at each sample location. This assumes a maximum sample radius of 5 - // units, but since a sphere has no thickness at its extent, we don't need to sample that far - // out. Only samples whole integer offsets with distance less than 25 are used. This means - // that there is no sample at (3, 4) because its distance is exactly 25 (and has a thickness of 0.) - - // The shaders are set up to sample a circular region within a 5-pixel radius. - const float screenspace_diameter = 10.0f; - - // SphereDiameter = CenterDepth * thickness_multiplier. This will compute the thickness of a sphere centered - // at a specific depth. The ellipsoid scale can stretch a sphere into an ellipsoid, which changes the - // characteristics of the AO. - // tan_half_fov_h: Radius of sphere in depth units if its center lies at Z = 1 - // screenspace_diameter: Diameter of sample sphere in pixel units - // screenspace_diameter / p_width: Ratio of the screen width that the sphere actually covers - // Note about the "2.0f * ": Diameter = 2 * Radius - float thickness_multiplier = 2.0f * p_tan_half_fov_h * screenspace_diameter / p_width; - - if (p_depth == 1) { - thickness_multiplier *= 2.0f; - } - - // This will transform a depth value from [0, thickness] to [0, 1]. - float inverse_range_factor = 1.0f / thickness_multiplier; - - // The thicknesses are smaller for all off-center samples of the sphere. Compute thicknesses relative - // to the center sample. - ssao.ssao_render_push_constant.inv_thickness_table[0] = inverse_range_factor / sample_thickness[0]; - ssao.ssao_render_push_constant.inv_thickness_table[1] = inverse_range_factor / sample_thickness[1]; - ssao.ssao_render_push_constant.inv_thickness_table[2] = inverse_range_factor / sample_thickness[2]; - ssao.ssao_render_push_constant.inv_thickness_table[3] = inverse_range_factor / sample_thickness[3]; - ssao.ssao_render_push_constant.inv_thickness_table[4] = inverse_range_factor / sample_thickness[4]; - ssao.ssao_render_push_constant.inv_thickness_table[5] = inverse_range_factor / sample_thickness[5]; - ssao.ssao_render_push_constant.inv_thickness_table[6] = inverse_range_factor / sample_thickness[6]; - ssao.ssao_render_push_constant.inv_thickness_table[7] = inverse_range_factor / sample_thickness[7]; - ssao.ssao_render_push_constant.inv_thickness_table[8] = inverse_range_factor / sample_thickness[8]; - ssao.ssao_render_push_constant.inv_thickness_table[9] = inverse_range_factor / sample_thickness[9]; - ssao.ssao_render_push_constant.inv_thickness_table[10] = inverse_range_factor / sample_thickness[10]; - ssao.ssao_render_push_constant.inv_thickness_table[11] = inverse_range_factor / sample_thickness[11]; - - // These are the weights that are multiplied against the samples because not all samples are - // equally important. The farther the sample is from the center location, the less they matter. - // We use the thickness of the sphere to determine the weight. The scalars in front are the number - // of samples with this weight because we sum the samples together before multiplying by the weight, - // so as an aggregate all of those samples matter more. After generating this table, the weights - // are normalized. - ssao.ssao_render_push_constant.sample_weight_table[0] = 4.0f * sample_thickness[0]; // Axial - ssao.ssao_render_push_constant.sample_weight_table[1] = 4.0f * sample_thickness[1]; // Axial - ssao.ssao_render_push_constant.sample_weight_table[2] = 4.0f * sample_thickness[2]; // Axial - ssao.ssao_render_push_constant.sample_weight_table[3] = 4.0f * sample_thickness[3]; // Axial - ssao.ssao_render_push_constant.sample_weight_table[4] = 4.0f * sample_thickness[4]; // Diagonal - ssao.ssao_render_push_constant.sample_weight_table[5] = 8.0f * sample_thickness[5]; // L-shaped - ssao.ssao_render_push_constant.sample_weight_table[6] = 8.0f * sample_thickness[6]; // L-shaped - ssao.ssao_render_push_constant.sample_weight_table[7] = 8.0f * sample_thickness[7]; // L-shaped - ssao.ssao_render_push_constant.sample_weight_table[8] = 4.0f * sample_thickness[8]; // Diagonal - ssao.ssao_render_push_constant.sample_weight_table[9] = 8.0f * sample_thickness[9]; // L-shaped - ssao.ssao_render_push_constant.sample_weight_table[10] = 8.0f * sample_thickness[10]; // L-shaped - ssao.ssao_render_push_constant.sample_weight_table[11] = 4.0f * sample_thickness[11]; // Diagonal - - // If we aren't using all of the samples, delete their weights before we normalize. - if (!p_use_full_samples) { - ssao.ssao_render_push_constant.sample_weight_table[0] = 0.0f; - ssao.ssao_render_push_constant.sample_weight_table[2] = 0.0f; - ssao.ssao_render_push_constant.sample_weight_table[5] = 0.0f; - ssao.ssao_render_push_constant.sample_weight_table[7] = 0.0f; - ssao.ssao_render_push_constant.sample_weight_table[9] = 0.0f; - } - - // Normalize the weights by dividing by the sum of all weights - float total_weight = 0.0f; - for (int i = 0; i < 12; ++i) { - total_weight += ssao.ssao_render_push_constant.sample_weight_table[i]; - } - - for (int i = 0; i < 12; ++i) { - ssao.ssao_render_push_constant.sample_weight_table[i] /= total_weight; - } - - ssao.ssao_render_push_constant.texel_size[0] = 1.0f / float(p_width); - ssao.ssao_render_push_constant.texel_size[1] = 1.0f / float(p_height); - ssao.ssao_render_push_constant.rejection_fadeoff = 1.0f / -p_rejection_radius; - ssao.ssao_render_push_constant.intensity = p_intensity; - ssao.ssao_render_push_constant.intensity = p_intensity; - - RID render_uniform_set = RID(); - bool uniform_set_needs_update = false; - - if (ssao.render_uniform_set_cache.has(p_depth_buffer)) { - render_uniform_set = ssao.render_uniform_set_cache[p_depth_buffer]; - if (!RD::get_singleton()->uniform_set_is_valid(render_uniform_set)) { - uniform_set_needs_update = true; - } - } else { - uniform_set_needs_update = true; - } - - if (uniform_set_needs_update) { - Vector<RD::Uniform> uniforms; - RD::Uniform u; - u.type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; - u.binding = 0; - u.ids.push_back(ssao.render_sampler); - u.ids.push_back(p_depth_buffer); - uniforms.push_back(u); +void RasterizerEffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, const Size2i &p_depth_buffer_size, RID p_depth_mipmaps_texture, const Vector<RID> &depth_mipmaps, RID p_ao1, bool p_half_size, RID p_ao2, RID p_upscale_buffer, float p_intensity, float p_radius, float p_bias, const CameraMatrix &p_projection, RS::EnvironmentSSAOQuality p_quality, RS::EnvironmentSSAOBlur p_blur, float p_edge_sharpness) { + //minify first + ssao.minify_push_constant.orthogonal = p_projection.is_orthogonal(); + ssao.minify_push_constant.z_near = p_projection.get_z_near(); + ssao.minify_push_constant.z_far = p_projection.get_z_far(); + ssao.minify_push_constant.pixel_size[0] = 1.0 / p_depth_buffer_size.x; + ssao.minify_push_constant.pixel_size[1] = 1.0 / p_depth_buffer_size.y; + ssao.minify_push_constant.source_size[0] = p_depth_buffer_size.x; + ssao.minify_push_constant.source_size[1] = p_depth_buffer_size.y; - render_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.ssao_render_shader.version_get_shader(ssao.ssao_render_shader_version, 0), 0); - texture_to_compute_uniform_set_cache[p_depth_buffer] = render_uniform_set; - } - - RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, render_uniform_set, 0); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_uniform_set_from_image(p_destination), 1); + /* FIRST PASS */ + // Minify the depth buffer. - int x_groups = (p_width + 7) / 8; - int y_groups = (p_height + 7) / 8; - int z_groups = p_depth; + for (int i = 0; i < depth_mipmaps.size(); i++) { + if (i == 0) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_MINIFY_FIRST]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_buffer), 0); + } else { + if (i == 1) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_MINIFY_MIPMAP]); + } - if (z_groups == 1) { - x_groups = (p_width + 15) / 16; - y_groups = (p_height + 15) / 16; - } + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(depth_mipmaps[i - 1]), 0); + } + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(depth_mipmaps[i]), 1); - RD::get_singleton()->compute_list_set_push_constant(p_compute_list, &ssao.ssao_render_push_constant, sizeof(SSAORenderPushConstant)); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.minify_push_constant, sizeof(SSAOMinifyPushConstant)); + // shrink after set + ssao.minify_push_constant.source_size[0] = MAX(1, ssao.minify_push_constant.source_size[0] >> 1); + ssao.minify_push_constant.source_size[1] = MAX(1, ssao.minify_push_constant.source_size[1] >> 1); - RD::get_singleton()->compute_list_dispatch(p_compute_list, x_groups, y_groups, z_groups); -} + int x_groups = (ssao.minify_push_constant.source_size[0] - 1) / 8 + 1; + int y_groups = (ssao.minify_push_constant.source_size[1] - 1) / 8 + 1; -void RasterizerEffectsRD::_upsample_ssao(RD::ComputeListID p_compute_list, RID p_destination, RID p_hi_res_depth, RID p_lo_res_depth, RID p_interleaved_ao, RID p_high_quality_ao, RID p_hi_res_ao, int p_low_width, int p_low_height, int p_high_width, int p_high_height, int p_screen_width, float p_noise_tolerance, float p_blur_tolerance, float p_upscale_tolerance) { - SSAOMode pipeline = SSAO_MAX; - if (p_hi_res_ao == RID()) { - pipeline = p_high_quality_ao == RID() ? SSAO_BLUR_UPSCALE : SSAO_BLUR_UPSCALE_MIN; - } else { - pipeline = p_high_quality_ao == RID() ? SSAO_BLUR_UPSCALE_BLEND : SSAO_BLUR_UPSCALE_MIN_BLEND; + RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); } - RD::get_singleton()->compute_list_bind_compute_pipeline(p_compute_list, ssao.pipelines[pipeline]); - - float blur_tolerance = 1.0f - powf(10.0f, p_blur_tolerance) * float(p_screen_width) / float(p_low_width); - blur_tolerance *= blur_tolerance; - float upsample_tolerance = powf(10.0f, p_upscale_tolerance); - float noise_filter_weight = 1.0f / (powf(10.0f, p_noise_tolerance) + upsample_tolerance); - - ssao.upsample_push_constant.inv_low_resolution[0] = 1.0 / float(p_low_width); - ssao.upsample_push_constant.inv_low_resolution[1] = 1.0 / float(p_low_height); - ssao.upsample_push_constant.inv_high_resolution[0] = 1.0 / float(p_high_width); - ssao.upsample_push_constant.inv_high_resolution[1] = 1.0 / float(p_high_height); - ssao.upsample_push_constant.noise_filter_strength = noise_filter_weight; - ssao.upsample_push_constant.step_size = float(p_screen_width) / float(p_low_width); - ssao.upsample_push_constant.blur_tolerance = blur_tolerance; - ssao.upsample_push_constant.upsample_tolerance = upsample_tolerance; - - RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_compute_uniform_set_from_texture(p_lo_res_depth), 0); - RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_compute_uniform_set_from_texture(p_hi_res_depth), 1); - RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_compute_uniform_set_from_texture(p_interleaved_ao), 2); - RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_uniform_set_from_image(p_destination), 3); + /* SECOND PASS */ + // Gather samples - if (p_high_quality_ao != RID()) { - RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_compute_uniform_set_from_texture(p_high_quality_ao), 4); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[(SSAO_GATHER_LOW + p_quality) + (p_half_size ? 4 : 0)]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_mipmaps_texture), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_ao1), 1); + if (!p_half_size) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_buffer), 2); } + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_normal_buffer), 3); - if (p_hi_res_ao != RID()) { - RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_compute_uniform_set_from_texture(p_hi_res_ao), 5); + ssao.gather_push_constant.screen_size[0] = p_depth_buffer_size.x; + ssao.gather_push_constant.screen_size[1] = p_depth_buffer_size.y; + if (p_half_size) { + ssao.gather_push_constant.screen_size[0] >>= 1; + ssao.gather_push_constant.screen_size[1] >>= 1; } + ssao.gather_push_constant.z_far = p_projection.get_z_far(); + ssao.gather_push_constant.z_near = p_projection.get_z_near(); + ssao.gather_push_constant.orthogonal = p_projection.is_orthogonal(); - int x_groups = ((p_high_width + 17) / 16); - int y_groups = ((p_high_height + 17) / 16); + ssao.gather_push_constant.proj_info[0] = -2.0f / (ssao.gather_push_constant.screen_size[0] * p_projection.matrix[0][0]); + ssao.gather_push_constant.proj_info[1] = -2.0f / (ssao.gather_push_constant.screen_size[1] * p_projection.matrix[1][1]); + ssao.gather_push_constant.proj_info[2] = (1.0f - p_projection.matrix[0][2]) / p_projection.matrix[0][0]; + ssao.gather_push_constant.proj_info[3] = (1.0f + p_projection.matrix[1][2]) / p_projection.matrix[1][1]; + //ssao.gather_push_constant.proj_info[2] = (1.0f - p_projection.matrix[0][2]) / p_projection.matrix[0][0]; + //ssao.gather_push_constant.proj_info[3] = -(1.0f + p_projection.matrix[1][2]) / p_projection.matrix[1][1]; - RD::get_singleton()->compute_list_set_push_constant(p_compute_list, &ssao.upsample_push_constant, sizeof(SSAOUpsamplePushConstant)); - - RD::get_singleton()->compute_list_dispatch(p_compute_list, x_groups, y_groups, 1); - RD::get_singleton()->compute_list_add_barrier(p_compute_list); -} + ssao.gather_push_constant.radius = p_radius; -// Implementation comes from Microsofts DirectX samples miniengine here https://github.com/microsoft/DirectX-Graphics-Samples/blob/master/MiniEngine/Core/SSAO.cpp -void RasterizerEffectsRD::generate_ssao(RID p_depth_buffer, const Size2i &p_depth_buffer_size, const Vector<RID> &depth_mipmaps, RID p_linear_z, const Vector<RID> &p_tiled_depth_mipmaps, const Vector<RID> &p_ao_slices, const Vector<RID> &p_high_quality_ao_slices, const Vector<RID> &p_filtered_ao_slices, RID p_ao_full, const CameraMatrix &p_projection, float p_noise_tolerance, float p_blur_tolerance, float p_upsample_tolerance, float p_rejection_radius, float p_intensity, int p_levels, RS::EnvironmentSSAOQuality p_quality, bool p_full_samples) { - ssao.downsample1_push_constant.orthogonal = p_projection.is_orthogonal(); - ssao.downsample1_push_constant.z_near = p_projection.get_z_near(); - ssao.downsample1_push_constant.z_far = p_projection.get_z_far(); - - const int buffer_width1 = (p_depth_buffer_size.x + 1) / 2; - const int buffer_width2 = (p_depth_buffer_size.x + 3) / 4; - const int buffer_width3 = (p_depth_buffer_size.x + 7) / 8; - const int buffer_width4 = (p_depth_buffer_size.x + 15) / 16; - const int buffer_width5 = (p_depth_buffer_size.x + 31) / 32; - const int buffer_width6 = (p_depth_buffer_size.x + 63) / 64; - const int buffer_height1 = (p_depth_buffer_size.y + 1) / 2; - const int buffer_height2 = (p_depth_buffer_size.y + 3) / 4; - const int buffer_height3 = (p_depth_buffer_size.y + 7) / 8; - const int buffer_height4 = (p_depth_buffer_size.y + 15) / 16; - const int buffer_height5 = (p_depth_buffer_size.y + 31) / 32; - const int buffer_height6 = (p_depth_buffer_size.y + 63) / 64; + ssao.gather_push_constant.proj_scale = float(p_projection.get_pixels_per_meter(ssao.gather_push_constant.screen_size[0])); + ssao.gather_push_constant.bias = p_bias; + ssao.gather_push_constant.intensity_div_r6 = p_intensity / pow(p_radius, 6.0f); - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + ssao.gather_push_constant.pixel_size[0] = 1.0 / p_depth_buffer_size.x; + ssao.gather_push_constant.pixel_size[1] = 1.0 / p_depth_buffer_size.y; - /* FIRST PASS */ - // Downsample the depth buffer. - { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_DOWNSAMPLE1]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_buffer), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_linear_z), 1); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(depth_mipmaps[0]), 2); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_tiled_depth_mipmaps[0]), 3); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(depth_mipmaps[1]), 4); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_tiled_depth_mipmaps[1]), 5); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.gather_push_constant, sizeof(SSAOGatherPushConstant)); - int x_groups = (buffer_width4 * 8 + 7) / 8; - int y_groups = (buffer_height4 * 8 + 7) / 8; + int x_groups = (ssao.gather_push_constant.screen_size[0] - 1) / 8 + 1; + int y_groups = (ssao.gather_push_constant.screen_size[1] - 1) / 8 + 1; - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.downsample1_push_constant, sizeof(SSAODownsample1PushConstant)); + RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); - RD::get_singleton()->compute_list_add_barrier(compute_list); - } - if (p_levels > 2) { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_DOWNSAMPLE2]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(depth_mipmaps[1]), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(depth_mipmaps[2]), 1); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_tiled_depth_mipmaps[2]), 2); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(depth_mipmaps[3]), 3); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_tiled_depth_mipmaps[3]), 4); + /* THIRD PASS */ + // Blur horizontal + + ssao.blur_push_constant.edge_sharpness = p_edge_sharpness; + ssao.blur_push_constant.filter_scale = p_blur; + ssao.blur_push_constant.screen_size[0] = ssao.gather_push_constant.screen_size[0]; + ssao.blur_push_constant.screen_size[1] = ssao.gather_push_constant.screen_size[1]; + ssao.blur_push_constant.z_far = p_projection.get_z_far(); + ssao.blur_push_constant.z_near = p_projection.get_z_near(); + ssao.blur_push_constant.orthogonal = p_projection.is_orthogonal(); + ssao.blur_push_constant.axis[0] = 1; + ssao.blur_push_constant.axis[1] = 0; + + if (p_blur != RS::ENV_SSAO_BLUR_DISABLED) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[p_half_size ? SSAO_BLUR_PASS_HALF : SSAO_BLUR_PASS]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao1), 0); + if (p_half_size) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_mipmaps_texture), 1); + } else { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_buffer), 1); + } + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_ao2), 3); - int x_groups = (buffer_width6 * 8 + 7) / 8; - int y_groups = (buffer_height6 * 8 + 7) / 8; + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.blur_push_constant, sizeof(SSAOBlurPushConstant)); RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); - } - /* SECOND PASS */ - // compute AO for each level used + /* THIRD PASS */ + // Blur vertical - { - const float fov_tangent = 0.5 / p_projection.matrix[0][0]; - - if (p_levels > 3) { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_RENDER1]); - _compute_ssao(compute_list, p_filtered_ao_slices[3], p_tiled_depth_mipmaps[3], fov_tangent, buffer_width6, buffer_height6, 16, p_rejection_radius, p_intensity, p_full_samples); - if (p_quality >= RS::ENV_SSAO_QUALITY_LOW) { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_RENDER2]); - _compute_ssao(compute_list, p_high_quality_ao_slices[3], depth_mipmaps[3], fov_tangent, buffer_width4, buffer_height4, 1, p_rejection_radius, p_intensity, p_full_samples); - } - } - if (p_levels > 2) { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_RENDER1]); - _compute_ssao(compute_list, p_filtered_ao_slices[2], p_tiled_depth_mipmaps[2], fov_tangent, buffer_width5, buffer_height5, 16, p_rejection_radius, p_intensity, p_full_samples); - if (p_quality >= RS::ENV_SSAO_QUALITY_MEDIUM) { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_RENDER2]); - _compute_ssao(compute_list, p_high_quality_ao_slices[2], depth_mipmaps[2], fov_tangent, buffer_width3, buffer_height3, 1, p_rejection_radius, p_intensity, p_full_samples); - } - } - if (p_levels > 1) { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_RENDER1]); - _compute_ssao(compute_list, p_filtered_ao_slices[1], p_tiled_depth_mipmaps[1], fov_tangent, buffer_width4, buffer_height4, 16, p_rejection_radius, p_intensity, p_full_samples); - if (p_quality >= RS::ENV_SSAO_QUALITY_HIGH) { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_RENDER2]); - _compute_ssao(compute_list, p_high_quality_ao_slices[1], depth_mipmaps[1], fov_tangent, buffer_width2, buffer_height2, 1, p_rejection_radius, p_intensity, p_full_samples); - } - } - { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_RENDER1]); - _compute_ssao(compute_list, p_filtered_ao_slices[0], p_tiled_depth_mipmaps[0], fov_tangent, buffer_width3, buffer_height3, 16, p_rejection_radius, p_intensity, p_full_samples); - if (p_quality >= RS::ENV_SSAO_QUALITY_ULTRA) { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_RENDER2]); - _compute_ssao(compute_list, p_high_quality_ao_slices[0], depth_mipmaps[0], fov_tangent, buffer_width1, buffer_height1, 1, p_rejection_radius, p_intensity, p_full_samples); - } - } - } - RD::get_singleton()->compute_list_add_barrier(compute_list); + ssao.blur_push_constant.axis[0] = 0; + ssao.blur_push_constant.axis[1] = 1; - /* THIRD PASS */ - // blend and upsample levels for final result + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao2), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_ao1), 3); - { - RID NextSRV = p_filtered_ao_slices[3]; + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.blur_push_constant, sizeof(SSAOBlurPushConstant)); - if (p_levels > 3) { - _upsample_ssao(compute_list, p_ao_slices[2], depth_mipmaps[2], depth_mipmaps[3], NextSRV, p_quality >= RS::ENV_SSAO_QUALITY_LOW ? p_high_quality_ao_slices[3] : RID(), - p_filtered_ao_slices[2], buffer_width4, buffer_height4, buffer_width3, buffer_height3, p_depth_buffer_size.x, - p_noise_tolerance, p_blur_tolerance, p_upsample_tolerance); + RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + } + if (p_half_size) { //must upscale - NextSRV = p_ao_slices[2]; - } else { - NextSRV = p_filtered_ao_slices[2]; - } + /* FOURTH PASS */ + // upscale if half size + //back to full size + ssao.blur_push_constant.screen_size[0] = p_depth_buffer_size.x; + ssao.blur_push_constant.screen_size[1] = p_depth_buffer_size.y; - if (p_levels > 2) { - _upsample_ssao(compute_list, p_ao_slices[1], depth_mipmaps[1], depth_mipmaps[2], NextSRV, p_quality >= RS::ENV_SSAO_QUALITY_MEDIUM ? p_high_quality_ao_slices[2] : RID(), - p_filtered_ao_slices[1], buffer_width3, buffer_height3, buffer_width2, buffer_height2, p_depth_buffer_size.x, - p_noise_tolerance, p_blur_tolerance, p_upsample_tolerance); + RD::get_singleton()->compute_list_add_barrier(compute_list); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_BLUR_UPSCALE]); - NextSRV = p_ao_slices[1]; - } else { - NextSRV = p_filtered_ao_slices[1]; - } + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao1), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_upscale_buffer), 3); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_buffer), 1); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_mipmaps_texture), 2); - if (p_levels > 1) { - _upsample_ssao(compute_list, p_ao_slices[0], depth_mipmaps[0], depth_mipmaps[1], NextSRV, p_quality >= RS::ENV_SSAO_QUALITY_HIGH ? p_high_quality_ao_slices[1] : RID(), - p_filtered_ao_slices[0], buffer_width2, buffer_height2, buffer_width1, buffer_height1, p_depth_buffer_size.x, - p_noise_tolerance, p_blur_tolerance, p_upsample_tolerance); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.blur_push_constant, sizeof(SSAOBlurPushConstant)); //not used but set anyway - NextSRV = p_ao_slices[0]; - } else { - NextSRV = p_filtered_ao_slices[0]; - } + x_groups = (p_depth_buffer_size.x - 1) / 8 + 1; + y_groups = (p_depth_buffer_size.y - 1) / 8 + 1; - _upsample_ssao(compute_list, p_ao_full, p_linear_z, depth_mipmaps[0], NextSRV, p_quality >= RS::ENV_SSAO_QUALITY_ULTRA ? p_high_quality_ao_slices[0] : RID(), - RID(), buffer_width1, buffer_height1, p_depth_buffer_size.x, p_depth_buffer_size.y, p_depth_buffer_size.x, - p_noise_tolerance, p_blur_tolerance, p_upsample_tolerance); + RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); } RD::get_singleton()->compute_list_end(); @@ -1624,67 +1448,54 @@ RasterizerEffectsRD::RasterizerEffectsRD() { } { - RD::SamplerState ssao_sampler; - ssao_sampler.mag_filter = RD::SAMPLER_FILTER_LINEAR; - ssao_sampler.min_filter = RD::SAMPLER_FILTER_LINEAR; - ssao_sampler.max_lod = 0; - ssao_sampler.border_color = RD::SAMPLER_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; - ssao_sampler.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_BORDER; - ssao_sampler.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_BORDER; - ssao_sampler.repeat_w = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_BORDER; - - ssao.render_sampler = RD::get_singleton()->sampler_create(ssao_sampler); // Initialize ssao uint32_t pipeline = 0; { Vector<String> ssao_modes; + ssao_modes.push_back("\n#define MINIFY_START\n"); ssao_modes.push_back("\n"); - ssao.downsample1_shader.initialize(ssao_modes); + ssao.minify_shader.initialize(ssao_modes); - ssao.downsample1_shader_version = ssao.downsample1_shader.version_create(); + ssao.minify_shader_version = ssao.minify_shader.version_create(); - ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.downsample1_shader.version_get_shader(ssao.downsample1_shader_version, 0)); - pipeline++; - } - { - Vector<String> ssao_modes; - ssao_modes.push_back("\n"); - - ssao.downsample2_shader.initialize(ssao_modes); - - ssao.downsample2_shader_version = ssao.downsample2_shader.version_create(); - - ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.downsample2_shader.version_get_shader(ssao.downsample2_shader_version, 0)); - pipeline++; + for (int i = 0; i <= SSAO_MINIFY_MIPMAP; i++) { + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.minify_shader.version_get_shader(ssao.minify_shader_version, i)); + pipeline++; + } } { Vector<String> ssao_modes; - ssao_modes.push_back("\n#define INTERLEAVE_RESULT\n"); + ssao_modes.push_back("\n#define SSAO_QUALITY_LOW\n"); ssao_modes.push_back("\n"); + ssao_modes.push_back("\n#define SSAO_QUALITY_HIGH\n"); + ssao_modes.push_back("\n#define SSAO_QUALITY_ULTRA\n"); + ssao_modes.push_back("\n#define SSAO_QUALITY_LOW\n#define USE_HALF_SIZE\n"); + ssao_modes.push_back("\n#define USE_HALF_SIZE\n"); + ssao_modes.push_back("\n#define SSAO_QUALITY_HIGH\n#define USE_HALF_SIZE\n"); + ssao_modes.push_back("\n#define SSAO_QUALITY_ULTRA\n#define USE_HALF_SIZE\n"); - ssao.ssao_render_shader.initialize(ssao_modes); + ssao.gather_shader.initialize(ssao_modes); - ssao.ssao_render_shader_version = ssao.ssao_render_shader.version_create(); + ssao.gather_shader_version = ssao.gather_shader.version_create(); - for (int i = SSAO_RENDER1; i <= SSAO_RENDER2; i++) { - ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.ssao_render_shader.version_get_shader(ssao.ssao_render_shader_version, i - SSAO_RENDER1)); + for (int i = SSAO_GATHER_LOW; i <= SSAO_GATHER_ULTRA_HALF; i++) { + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.gather_shader.version_get_shader(ssao.gather_shader_version, i - SSAO_GATHER_LOW)); pipeline++; } } { Vector<String> ssao_modes; + ssao_modes.push_back("\n#define MODE_FULL_SIZE\n"); ssao_modes.push_back("\n"); - ssao_modes.push_back("\n#define COMBINE_LOWER_RESOLUTIONS\n"); - ssao_modes.push_back("\n#define BLEND_WITH_HIGHER_RESOLUTION\n"); - ssao_modes.push_back("\n#define COMBINE_LOWER_RESOLUTIONS\n#define BLEND_WITH_HIGHER_RESOLUTION\n"); + ssao_modes.push_back("\n#define MODE_UPSCALE\n"); - ssao.upsample_shader.initialize(ssao_modes); + ssao.blur_shader.initialize(ssao_modes); - ssao.upsample_shader_version = ssao.upsample_shader.version_create(); + ssao.blur_shader_version = ssao.blur_shader.version_create(); - for (int i = SSAO_BLUR_UPSCALE; i <= SSAO_BLUR_UPSCALE_MIN_BLEND; i++) { - ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.upsample_shader.version_get_shader(ssao.upsample_shader_version, i - SSAO_BLUR_UPSCALE)); + for (int i = SSAO_BLUR_PASS; i <= SSAO_BLUR_UPSCALE; i++) { + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.blur_shader.version_get_shader(ssao.blur_shader_version, i - SSAO_BLUR_PASS)); pipeline++; } @@ -1926,7 +1737,6 @@ RasterizerEffectsRD::~RasterizerEffectsRD() { RD::get_singleton()->free(default_sampler); RD::get_singleton()->free(default_mipmap_sampler); - RD::get_singleton()->free(ssao.render_sampler); RD::get_singleton()->free(index_buffer); //array gets freed as dependency RD::get_singleton()->free(filter.coefficient_buffer); @@ -1942,10 +1752,9 @@ RasterizerEffectsRD::~RasterizerEffectsRD() { roughness_limiter.shader.version_free(roughness_limiter.shader_version); sort.shader.version_free(sort.shader_version); specular_merge.shader.version_free(specular_merge.shader_version); - ssao.upsample_shader.version_free(ssao.upsample_shader_version); - ssao.ssao_render_shader.version_free(ssao.ssao_render_shader_version); - ssao.downsample1_shader.version_free(ssao.downsample1_shader_version); - ssao.downsample2_shader.version_free(ssao.downsample2_shader_version); + ssao.blur_shader.version_free(ssao.blur_shader_version); + ssao.gather_shader.version_free(ssao.gather_shader_version); + ssao.minify_shader.version_free(ssao.minify_shader_version); ssr.shader.version_free(ssr.shader_version); ssr_filter.shader.version_free(ssr_filter.shader_version); ssr_scale.shader.version_free(ssr_scale.shader_version); diff --git a/servers/rendering/rasterizer_rd/rasterizer_effects_rd.h b/servers/rendering/rasterizer_rd/rasterizer_effects_rd.h index 4331441502..8607a6ee67 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_effects_rd.h +++ b/servers/rendering/rasterizer_rd/rasterizer_effects_rd.h @@ -49,10 +49,9 @@ #include "servers/rendering/rasterizer_rd/shaders/shadow_reduce.glsl.gen.h" #include "servers/rendering/rasterizer_rd/shaders/sort.glsl.gen.h" #include "servers/rendering/rasterizer_rd/shaders/specular_merge.glsl.gen.h" -#include "servers/rendering/rasterizer_rd/shaders/ssao_downsample1.glsl.gen.h" -#include "servers/rendering/rasterizer_rd/shaders/ssao_downsample2.glsl.gen.h" -#include "servers/rendering/rasterizer_rd/shaders/ssao_render.glsl.gen.h" -#include "servers/rendering/rasterizer_rd/shaders/ssao_upsample.glsl.gen.h" +#include "servers/rendering/rasterizer_rd/shaders/ssao.glsl.gen.h" +#include "servers/rendering/rasterizer_rd/shaders/ssao_blur.glsl.gen.h" +#include "servers/rendering/rasterizer_rd/shaders/ssao_minify.glsl.gen.h" #include "servers/rendering/rasterizer_rd/shaders/subsurface_scattering.glsl.gen.h" #include "servers/rendering/rasterizer_rd/shaders/tonemap.glsl.gen.h" @@ -282,60 +281,72 @@ class RasterizerEffectsRD { } bokeh; enum SSAOMode { - SSAO_DOWNSAMPLE1, - SSAO_DOWNSAMPLE2, - SSAO_RENDER1, - SSAO_RENDER2, + SSAO_MINIFY_FIRST, + SSAO_MINIFY_MIPMAP, + SSAO_GATHER_LOW, + SSAO_GATHER_MEDIUM, + SSAO_GATHER_HIGH, + SSAO_GATHER_ULTRA, + SSAO_GATHER_LOW_HALF, + SSAO_GATHER_MEDIUM_HALF, + SSAO_GATHER_HIGH_HALF, + SSAO_GATHER_ULTRA_HALF, + SSAO_BLUR_PASS, + SSAO_BLUR_PASS_HALF, SSAO_BLUR_UPSCALE, - SSAO_BLUR_UPSCALE_MIN, - SSAO_BLUR_UPSCALE_BLEND, - SSAO_BLUR_UPSCALE_MIN_BLEND, SSAO_MAX }; - struct SSAODownsample1PushConstant { + struct SSAOMinifyPushConstant { + float pixel_size[2]; float z_far; float z_near; + int32_t source_size[2]; uint32_t orthogonal; uint32_t pad; }; - struct SSAORenderPushConstant { - float inv_thickness_table[12]; - float sample_weight_table[12]; - float texel_size[2]; - float rejection_fadeoff; - float intensity; + struct SSAOGatherPushConstant { + int32_t screen_size[2]; + float z_far; + float z_near; + + uint32_t orthogonal; + float intensity_div_r6; + float radius; + float bias; + + float proj_info[4]; + float pixel_size[2]; + float proj_scale; + uint32_t pad; }; - struct SSAOUpsamplePushConstant { - float inv_low_resolution[2]; - float inv_high_resolution[2]; - float noise_filter_strength; - float step_size; - float blur_tolerance; - float upsample_tolerance; + struct SSAOBlurPushConstant { + float edge_sharpness; + int32_t filter_scale; + float z_far; + float z_near; + uint32_t orthogonal; + uint32_t pad[3]; + int32_t axis[2]; + int32_t screen_size[2]; }; struct SSAO { - SSAODownsample1PushConstant downsample1_push_constant; - SsaoDownsample1ShaderRD downsample1_shader; - RID downsample1_shader_version; - - SsaoDownsample2ShaderRD downsample2_shader; - RID downsample2_shader_version; + SSAOMinifyPushConstant minify_push_constant; + SsaoMinifyShaderRD minify_shader; + RID minify_shader_version; - SSAORenderPushConstant ssao_render_push_constant; - SsaoRenderShaderRD ssao_render_shader; - RID ssao_render_shader_version; + SSAOGatherPushConstant gather_push_constant; + SsaoShaderRD gather_shader; + RID gather_shader_version; - SSAOUpsamplePushConstant upsample_push_constant; - SsaoUpsampleShaderRD upsample_shader; - RID upsample_shader_version; + SSAOBlurPushConstant blur_push_constant; + SsaoBlurShaderRD blur_shader; + RID blur_shader_version; RID pipelines[SSAO_MAX]; - RID render_sampler; - Map<RID, RID> render_uniform_set_cache; } ssao; struct RoughnessLimiterPushConstant { @@ -645,9 +656,7 @@ public: void tonemapper(RID p_source_color, RID p_dst_framebuffer, const TonemapSettings &p_settings); - _FORCE_INLINE_ void _compute_ssao(RD::ComputeListID p_compute_list, RID p_destination, RID p_depth_buffer, const float p_tan_half_fov_h, int p_width, int p_height, int p_depth, float p_rejection_radius, float p_intensity, bool p_full_samples); - _FORCE_INLINE_ void _upsample_ssao(RD::ComputeListID p_compute_list, RID p_destination, RID p_hi_res_depth, RID p_lo_res_depth, RID p_interleaved_ao, RID p_high_quality_ao, RID p_hi_res_ao, int p_low_width, int p_low_height, int p_high_width, int p_high_high, int p_screen_width, float p_noise_tolerance, float p_blur_tolerance, float p_upscale_tolerance); - void generate_ssao(RID p_depth_buffer, const Size2i &p_depth_buffer_size, const Vector<RID> &depth_mipmaps, RID p_linear_z, const Vector<RID> &p_tiled_depth_mipmaps, const Vector<RID> &p_ao_slices, const Vector<RID> &p_high_quality_ao_slices, const Vector<RID> &p_filtered_ao_slices, RID p_ao_full, const CameraMatrix &p_projection, float p_noise_tolerance, float p_blur_tolerance, float p_upsample_tolerance, float p_rejection_radius, float p_intensity, int p_levels, RS::EnvironmentSSAOQuality p_quality, bool p_full_samples); + void generate_ssao(RID p_depth_buffer, RID p_normal_buffer, const Size2i &p_depth_buffer_size, RID p_depth_mipmaps_texture, const Vector<RID> &depth_mipmaps, RID p_ao1, bool p_half_size, RID p_ao2, RID p_upscale_buffer, float p_intensity, float p_radius, float p_bias, const CameraMatrix &p_projection, RS::EnvironmentSSAOQuality p_quality, RS::EnvironmentSSAOBlur p_blur, float p_edge_sharpness); void roughness_limit(RID p_source_normal, RID p_roughness, const Size2i &p_size, float p_curve); void cubemap_downsample(RID p_source_cubemap, RID p_dest_cubemap, const Size2i &p_size); diff --git a/servers/rendering/rasterizer_rd/rasterizer_scene_rd.cpp b/servers/rendering/rasterizer_rd/rasterizer_scene_rd.cpp index 0d341581a6..9e6225a97a 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_scene_rd.cpp +++ b/servers/rendering/rasterizer_rd/rasterizer_scene_rd.cpp @@ -3100,25 +3100,22 @@ RS::EnvironmentSSRRoughnessQuality RasterizerSceneRD::environment_get_ssr_roughn return ssr_roughness_quality; } -void RasterizerSceneRD::environment_set_ssao(RID p_env, bool p_enable, float p_rejection_radius, float p_intensity, int p_levels, float p_light_affect, float p_ao_channel_affect) { +void RasterizerSceneRD::environment_set_ssao(RID p_env, bool p_enable, float p_radius, float p_intensity, float p_bias, float p_light_affect, float p_ao_channel_affect, RS::EnvironmentSSAOBlur p_blur, float p_bilateral_sharpness) { Environment *env = environment_owner.getornull(p_env); ERR_FAIL_COND(!env); env->ssao_enabled = p_enable; - - env->ssao_rejection_radius = p_rejection_radius; + env->ssao_radius = p_radius; env->ssao_intensity = p_intensity; - env->ssao_levels = p_levels; + env->ssao_bias = p_bias; env->ssao_direct_light_affect = p_light_affect; env->ssao_ao_channel_affect = p_ao_channel_affect; + env->ssao_blur = p_blur; } -void RasterizerSceneRD::environment_set_ssao_settings(RS::EnvironmentSSAOQuality p_quality, bool p_full_samples, float p_noise_tolerance, float p_blur_tolerance, float p_upsample_tolerance) { +void RasterizerSceneRD::environment_set_ssao_quality(RS::EnvironmentSSAOQuality p_quality, bool p_half_size) { ssao_quality = p_quality; - ssao_full_samples = p_full_samples; - ssao_noise_tolerance = Math::lerp(-8.0f, 0.0f, p_noise_tolerance); - ssao_blur_tolerance = Math::lerp(-8.0f, -1.0f, p_blur_tolerance); - ssao_upsample_tolerance = Math::lerp(-12.0f, -1.0f, p_upsample_tolerance); + ssao_half_size = p_half_size; } bool RasterizerSceneRD::environment_is_ssao_enabled(RID p_env) const { @@ -5038,33 +5035,21 @@ void RasterizerSceneRD::_free_render_buffer_data(RenderBuffers *rb) { rb->luminance.current = RID(); } - if (rb->ssao.ao_full.is_valid()) { - RD::get_singleton()->free(rb->ssao.ao_full); - RD::get_singleton()->free(rb->ssao.linear_depth); - rb->ssao.ao_full = RID(); - rb->ssao.linear_depth = RID(); - - for (int i = 0; i < rb->ssao.depth_slices.size(); i++) { - RD::get_singleton()->free(rb->ssao.depth_slices[i]); - } - for (int i = 0; i < rb->ssao.depth_tiled_slices.size(); i++) { - RD::get_singleton()->free(rb->ssao.depth_tiled_slices[i]); - } - for (int i = 0; i < rb->ssao.ao_slices.size(); i++) { - RD::get_singleton()->free(rb->ssao.ao_slices[i]); + if (rb->ssao.ao[0].is_valid()) { + RD::get_singleton()->free(rb->ssao.depth); + RD::get_singleton()->free(rb->ssao.ao[0]); + if (rb->ssao.ao[1].is_valid()) { + RD::get_singleton()->free(rb->ssao.ao[1]); } - for (int i = 0; i < rb->ssao.filtered_ao_slices.size(); i++) { - RD::get_singleton()->free(rb->ssao.filtered_ao_slices[i]); - } - for (int i = 0; i < rb->ssao.high_quality_ao_slices.size(); i++) { - RD::get_singleton()->free(rb->ssao.high_quality_ao_slices[i]); + if (rb->ssao.ao_full.is_valid()) { + RD::get_singleton()->free(rb->ssao.ao_full); } + rb->ssao.depth = RID(); + rb->ssao.ao[0] = RID(); + rb->ssao.ao[1] = RID(); + rb->ssao.ao_full = RID(); rb->ssao.depth_slices.clear(); - rb->ssao.depth_tiled_slices.clear(); - rb->ssao.ao_slices.clear(); - rb->ssao.filtered_ao_slices.clear(); - rb->ssao.high_quality_ao_slices.clear(); } if (rb->ssr.blur_radius[0].is_valid()) { @@ -5163,117 +5148,64 @@ void RasterizerSceneRD::_process_ssao(RID p_render_buffers, RID p_environment, R RENDER_TIMESTAMP("Process SSAO"); - int size_x = rb->width; - int size_y = rb->height; - const int buffer_widths[6] = { - (size_x + 1) / 2, - (size_x + 3) / 4, - (size_x + 7) / 8, - (size_x + 15) / 16, - (size_x + 31) / 32, - (size_x + 63) / 64 - }; - const int buffer_heights[6] = { - (size_y + 1) / 2, - (size_y + 3) / 4, - (size_y + 7) / 8, - (size_y + 15) / 16, - (size_y + 31) / 32, - (size_y + 63) / 64 - }; - - if (!rb->ssao.ao_full.is_valid()) { - //allocate SSAO buffers - - { - for (uint32_t i = 0; i < 4; i++) { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R32_SFLOAT; - tf.width = buffer_widths[i]; - tf.height = buffer_heights[i]; - tf.mipmaps = 1; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - RID slice = RD::get_singleton()->texture_create(tf, RD::TextureView()); - rb->ssao.depth_slices.push_back(slice); - } + if (rb->ssao.ao[0].is_valid() && rb->ssao.ao_full.is_valid() != ssao_half_size) { + RD::get_singleton()->free(rb->ssao.depth); + RD::get_singleton()->free(rb->ssao.ao[0]); + if (rb->ssao.ao[1].is_valid()) { + RD::get_singleton()->free(rb->ssao.ao[1]); } - - { - for (uint32_t i = 2; i < 6; i++) { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R16_SFLOAT; - tf.type = RD::TEXTURE_TYPE_2D_ARRAY; - tf.array_layers = 16; - tf.width = buffer_widths[i]; - tf.height = buffer_heights[i]; - tf.mipmaps = 1; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - RID slice = RD::get_singleton()->texture_create(tf, RD::TextureView()); - rb->ssao.depth_tiled_slices.push_back(slice); - } + if (rb->ssao.ao_full.is_valid()) { + RD::get_singleton()->free(rb->ssao.ao_full); } - { - for (uint32_t i = 0; i < 3; i++) { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R8_UNORM; - tf.width = buffer_widths[i]; - tf.height = buffer_heights[i]; - tf.mipmaps = 1; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - RID slice = RD::get_singleton()->texture_create(tf, RD::TextureView()); - rb->ssao.ao_slices.push_back(slice); - } - } + rb->ssao.depth = RID(); + rb->ssao.ao[0] = RID(); + rb->ssao.ao[1] = RID(); + rb->ssao.ao_full = RID(); + rb->ssao.depth_slices.clear(); + } - { - for (uint32_t i = 0; i < 4; i++) { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R8_UNORM; - tf.width = buffer_widths[i]; - tf.height = buffer_heights[i]; - tf.mipmaps = 1; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - RID slice = RD::get_singleton()->texture_create(tf, RD::TextureView()); - rb->ssao.high_quality_ao_slices.push_back(slice); - } - } + if (!rb->ssao.ao[0].is_valid()) { + //allocate depth slices { - for (uint32_t i = 0; i < 4; i++) { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R8_UNORM; - tf.width = buffer_widths[i]; - tf.height = buffer_heights[i]; - tf.mipmaps = 1; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - RID slice = RD::get_singleton()->texture_create(tf, RD::TextureView()); - rb->ssao.filtered_ao_slices.push_back(slice); + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R32_SFLOAT; + tf.width = rb->width / 2; + tf.height = rb->height / 2; + tf.mipmaps = Image::get_image_required_mipmaps(tf.width, tf.height, Image::FORMAT_RF) + 1; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + rb->ssao.depth = RD::get_singleton()->texture_create(tf, RD::TextureView()); + for (uint32_t i = 0; i < tf.mipmaps; i++) { + RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ssao.depth, 0, i); + rb->ssao.depth_slices.push_back(slice); } } { RD::TextureFormat tf; tf.format = RD::DATA_FORMAT_R8_UNORM; - tf.width = size_x; - tf.height = size_y; + tf.width = ssao_half_size ? rb->width / 2 : rb->width; + tf.height = ssao_half_size ? rb->height / 2 : rb->height; tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - rb->ssao.ao_full = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->ssao.ao[0] = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->ssao.ao[1] = RD::get_singleton()->texture_create(tf, RD::TextureView()); } - { + if (ssao_half_size) { + //upsample texture RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R16_UNORM; - tf.width = size_x; - tf.height = size_y; + tf.format = RD::DATA_FORMAT_R8_UNORM; + tf.width = rb->width; + tf.height = rb->height; tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - rb->ssao.linear_depth = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->ssao.ao_full = RD::get_singleton()->texture_create(tf, RD::TextureView()); } _render_buffers_uniform_set_changed(p_render_buffers); } - storage->get_effects()->generate_ssao(rb->depth_texture, Size2i(size_x, size_y), rb->ssao.depth_slices, rb->ssao.linear_depth, rb->ssao.depth_tiled_slices, rb->ssao.ao_slices, rb->ssao.high_quality_ao_slices, rb->ssao.filtered_ao_slices, rb->ssao.ao_full, p_projection, ssao_noise_tolerance, ssao_blur_tolerance, ssao_upsample_tolerance, env->ssao_rejection_radius, env->ssao_intensity, env->ssao_levels, ssao_quality, ssao_full_samples); + storage->get_effects()->generate_ssao(rb->depth_texture, p_normal_buffer, Size2i(rb->width, rb->height), rb->ssao.depth, rb->ssao.depth_slices, rb->ssao.ao[0], rb->ssao.ao_full.is_valid(), rb->ssao.ao[1], rb->ssao.ao_full, env->ssao_intensity, env->ssao_radius, env->ssao_bias, p_projection, ssao_quality, env->ssao_blur, env->ssao_blur_edge_sharpness); } void RasterizerSceneRD::_render_buffers_post_process_and_tonemap(RID p_render_buffers, RID p_environment, RID p_camera_effects, const CameraMatrix &p_projection) { @@ -5438,9 +5370,9 @@ void RasterizerSceneRD::_render_buffers_debug_draw(RID p_render_buffers, RID p_s } } - if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_SSAO && rb->ssao.ao_full.is_valid()) { + if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_SSAO && rb->ssao.ao[0].is_valid()) { Size2 rtsize = storage->render_target_get_size(rb->render_target); - RID ao_buf = rb->ssao.ao_full; + RID ao_buf = rb->ssao.ao_full.is_valid() ? rb->ssao.ao_full : rb->ssao.ao[0]; effects->copy_to_fb_rect(ao_buf, storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), false, true); } @@ -5616,7 +5548,7 @@ RID RasterizerSceneRD::render_buffers_get_ao_texture(RID p_render_buffers) { RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND_V(!rb, RID()); - return rb->ssao.ao_full; + return rb->ssao.ao_full.is_valid() ? rb->ssao.ao_full : rb->ssao.ao[0]; } RID RasterizerSceneRD::render_buffers_get_gi_probe_buffer(RID p_render_buffers) { @@ -8402,7 +8334,7 @@ RasterizerSceneRD::RasterizerSceneRD(RasterizerStorageRD *p_storage) { camera_effects_set_dof_blur_bokeh_shape(RS::DOFBokehShape(int(GLOBAL_GET("rendering/quality/depth_of_field/depth_of_field_bokeh_shape")))); camera_effects_set_dof_blur_quality(RS::DOFBlurQuality(int(GLOBAL_GET("rendering/quality/depth_of_field/depth_of_field_bokeh_quality"))), GLOBAL_GET("rendering/quality/depth_of_field/depth_of_field_use_jitter")); - environment_set_ssao_settings(RS::EnvironmentSSAOQuality(int(GLOBAL_GET("rendering/ssao/quality"))), GLOBAL_GET("rendering/ssao/full_samples"), GLOBAL_GET("rendering/ssao/noise_tolerance"), GLOBAL_GET("rendering/ssao/blur_tolerance"), GLOBAL_GET("rendering/ssao/upsample_tolerance")); + environment_set_ssao_quality(RS::EnvironmentSSAOQuality(int(GLOBAL_GET("rendering/quality/ssao/quality"))), GLOBAL_GET("rendering/quality/ssao/half_size")); screen_space_roughness_limiter = GLOBAL_GET("rendering/quality/screen_filters/screen_space_roughness_limiter_enabled"); screen_space_roughness_limiter_amount = GLOBAL_GET("rendering/quality/screen_filters/screen_space_roughness_limiter_amount"); screen_space_roughness_limiter_limit = GLOBAL_GET("rendering/quality/screen_filters/screen_space_roughness_limiter_limit"); diff --git a/servers/rendering/rasterizer_rd/rasterizer_scene_rd.h b/servers/rendering/rasterizer_rd/rasterizer_scene_rd.h index 65f2a1d157..8a14598250 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_scene_rd.h +++ b/servers/rendering/rasterizer_rd/rasterizer_scene_rd.h @@ -736,11 +736,13 @@ private: /// SSAO bool ssao_enabled = false; - float ssao_rejection_radius = 2.5; - float ssao_intensity = 1.0; - int ssao_levels = 3; + float ssao_radius = 1; + float ssao_intensity = 1; + float ssao_bias = 0.01; float ssao_direct_light_affect = 0.0; float ssao_ao_channel_affect = 0.0; + float ssao_blur_edge_sharpness = 4.0; + RS::EnvironmentSSAOBlur ssao_blur = RS::ENV_SSAO_BLUR_3x3; /// SSR /// @@ -764,11 +766,7 @@ private: }; RS::EnvironmentSSAOQuality ssao_quality = RS::ENV_SSAO_QUALITY_MEDIUM; - bool ssao_full_samples = false; - float ssao_noise_tolerance = -3.0; - float ssao_blur_tolerance = -5.0; - float ssao_upsample_tolerance = -7.0; - + bool ssao_half_size = false; bool glow_bicubic_upscale = false; bool glow_high_quality = false; RS::EnvironmentSSRRoughnessQuality ssr_roughness_quality = RS::ENV_SSR_ROUGNESS_QUALITY_LOW; @@ -850,13 +848,10 @@ private: } luminance; struct SSAO { + RID depth; Vector<RID> depth_slices; - Vector<RID> depth_tiled_slices; - Vector<RID> filtered_ao_slices; - Vector<RID> ao_slices; - Vector<RID> high_quality_ao_slices; - RID linear_depth; - RID ao_full; + RID ao[2]; + RID ao_full; //when using half-size } ssao; struct SSR { @@ -1559,8 +1554,8 @@ public: virtual void environment_set_volumetric_fog_positional_shadow_shrink_size(int p_shrink_size); void environment_set_ssr(RID p_env, bool p_enable, int p_max_steps, float p_fade_int, float p_fade_out, float p_depth_tolerance); - void environment_set_ssao(RID p_env, bool p_enable, float p_rejection_radius, float p_intensity, int p_levels, float p_light_affect, float p_ao_channel_affect); - void environment_set_ssao_settings(RS::EnvironmentSSAOQuality p_quality, bool p_full_samples, float p_noise_tolerance, float p_blur_tolerance, float p_upsample_tolerance); + void environment_set_ssao(RID p_env, bool p_enable, float p_radius, float p_intensity, float p_bias, float p_light_affect, float p_ao_channel_affect, RS::EnvironmentSSAOBlur p_blur, float p_bilateral_sharpness); + void environment_set_ssao_quality(RS::EnvironmentSSAOQuality p_quality, bool p_half_size); bool environment_is_ssao_enabled(RID p_env) const; float environment_get_ssao_ao_affect(RID p_env) const; float environment_get_ssao_light_affect(RID p_env) const; diff --git a/servers/rendering/rasterizer_rd/shaders/SCsub b/servers/rendering/rasterizer_rd/shaders/SCsub index f9b8591307..9d531d63ad 100644 --- a/servers/rendering/rasterizer_rd/shaders/SCsub +++ b/servers/rendering/rasterizer_rd/shaders/SCsub @@ -19,10 +19,9 @@ if "RD_GLSL" in env["BUILDERS"]: env.RD_GLSL("giprobe_sdf.glsl") env.RD_GLSL("luminance_reduce.glsl") env.RD_GLSL("bokeh_dof.glsl") - env.RD_GLSL("ssao_render.glsl") - env.RD_GLSL("ssao_downsample1.glsl") - env.RD_GLSL("ssao_downsample2.glsl") - env.RD_GLSL("ssao_upsample.glsl") + env.RD_GLSL("ssao.glsl") + env.RD_GLSL("ssao_minify.glsl") + env.RD_GLSL("ssao_blur.glsl") env.RD_GLSL("roughness_limiter.glsl") env.RD_GLSL("screen_space_reflection.glsl") env.RD_GLSL("screen_space_reflection_filter.glsl") diff --git a/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl b/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl index 182d1cdfa6..455a3d4a3a 100644 --- a/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl +++ b/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl @@ -2717,7 +2717,7 @@ FRAGMENT_SHADER_CODE #if defined(AO_USED) if (scene_data.ssao_enabled && scene_data.ssao_ao_affect > 0.0) { - float ssao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), gl_FragCoord.xy * scene_data.screen_pixel_size).r; + float ssao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), screen_uv).r; ao = mix(ao, min(ao, ssao), scene_data.ssao_ao_affect); ao_light_affect = mix(ao_light_affect, max(ao_light_affect, scene_data.ssao_light_affect), scene_data.ssao_ao_affect); } @@ -2729,7 +2729,7 @@ FRAGMENT_SHADER_CODE #else if (scene_data.ssao_enabled) { - float ao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), gl_FragCoord.xy * scene_data.screen_pixel_size).r; + float ao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), screen_uv).r; ambient_light = mix(scene_data.ao_color.rgb, ambient_light, ao); float ao_light_affect = mix(1.0, ao, scene_data.ssao_light_affect); specular_light = mix(scene_data.ao_color.rgb, specular_light, ao_light_affect); diff --git a/servers/rendering/rasterizer_rd/shaders/ssao.glsl b/servers/rendering/rasterizer_rd/shaders/ssao.glsl new file mode 100644 index 0000000000..346338181a --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/ssao.glsl @@ -0,0 +1,249 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#define TWO_PI 6.283185307179586476925286766559 + +#ifdef SSAO_QUALITY_HIGH +#define NUM_SAMPLES (20) +#endif + +#ifdef SSAO_QUALITY_ULTRA +#define NUM_SAMPLES (48) +#endif + +#ifdef SSAO_QUALITY_LOW +#define NUM_SAMPLES (8) +#endif + +#if !defined(SSAO_QUALITY_LOW) && !defined(SSAO_QUALITY_HIGH) && !defined(SSAO_QUALITY_ULTRA) +#define NUM_SAMPLES (12) +#endif + +// If using depth mip levels, the log of the maximum pixel offset before we need to switch to a lower +// miplevel to maintain reasonable spatial locality in the cache +// If this number is too small (< 3), too many taps will land in the same pixel, and we'll get bad variance that manifests as flashing. +// If it is too high (> 5), we'll get bad performance because we're not using the MIP levels effectively +#define LOG_MAX_OFFSET (3) + +// This must be less than or equal to the MAX_MIP_LEVEL defined in SSAO.cpp +#define MAX_MIP_LEVEL (4) + +// This is the number of turns around the circle that the spiral pattern makes. This should be prime to prevent +// taps from lining up. This particular choice was tuned for NUM_SAMPLES == 9 + +const int ROTATIONS[] = int[]( + 1, 1, 2, 3, 2, 5, 2, 3, 2, + 3, 3, 5, 5, 3, 4, 7, 5, 5, 7, + 9, 8, 5, 5, 7, 7, 7, 8, 5, 8, + 11, 12, 7, 10, 13, 8, 11, 8, 7, 14, + 11, 11, 13, 12, 13, 19, 17, 13, 11, 18, + 19, 11, 11, 14, 17, 21, 15, 16, 17, 18, + 13, 17, 11, 17, 19, 18, 25, 18, 19, 19, + 29, 21, 19, 27, 31, 29, 21, 18, 17, 29, + 31, 31, 23, 18, 25, 26, 25, 23, 19, 34, + 19, 27, 21, 25, 39, 29, 17, 21, 27); + +//#define NUM_SPIRAL_TURNS (7) +const int NUM_SPIRAL_TURNS = ROTATIONS[NUM_SAMPLES - 1]; + +layout(set = 0, binding = 0) uniform sampler2D source_depth_mipmaps; +layout(r8, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; + +#ifndef USE_HALF_SIZE +layout(set = 2, binding = 0) uniform sampler2D source_depth; +#endif + +layout(set = 3, binding = 0) uniform sampler2D source_normal; + +layout(push_constant, binding = 1, std430) uniform Params { + ivec2 screen_size; + float z_far; + float z_near; + + bool orthogonal; + float intensity_div_r6; + float radius; + float bias; + + vec4 proj_info; + vec2 pixel_size; + float proj_scale; + uint pad; +} +params; + +vec3 reconstructCSPosition(vec2 S, float z) { + if (params.orthogonal) { + return vec3((S.xy * params.proj_info.xy + params.proj_info.zw), z); + } else { + return vec3((S.xy * params.proj_info.xy + params.proj_info.zw) * z, z); + } +} + +vec3 getPosition(ivec2 ssP) { + vec3 P; +#ifdef USE_HALF_SIZE + P.z = texelFetch(source_depth_mipmaps, ssP, 0).r; + P.z = -P.z; +#else + P.z = texelFetch(source_depth, ssP, 0).r; + + P.z = P.z * 2.0 - 1.0; + if (params.orthogonal) { + P.z = ((P.z + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; + } else { + P.z = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - P.z * (params.z_far - params.z_near)); + } + P.z = -P.z; +#endif + // Offset to pixel center + P = reconstructCSPosition(vec2(ssP) + vec2(0.5), P.z); + return P; +} + +/** Returns a unit vector and a screen-space radius for the tap on a unit disk (the caller should scale by the actual disk radius) */ +vec2 tapLocation(int sampleNumber, float spinAngle, out float ssR) { + // Radius relative to ssR + float alpha = (float(sampleNumber) + 0.5) * (1.0 / float(NUM_SAMPLES)); + float angle = alpha * (float(NUM_SPIRAL_TURNS) * 6.28) + spinAngle; + + ssR = alpha; + return vec2(cos(angle), sin(angle)); +} + +/** Read the camera-space position of the point at screen-space pixel ssP + unitOffset * ssR. Assumes length(unitOffset) == 1 */ +vec3 getOffsetPosition(ivec2 ssP, float ssR) { + // Derivation: + // mipLevel = floor(log(ssR / MAX_OFFSET)); + + int mipLevel = clamp(int(floor(log2(ssR))) - LOG_MAX_OFFSET, 0, MAX_MIP_LEVEL); + + vec3 P; + + // We need to divide by 2^mipLevel to read the appropriately scaled coordinate from a MIP-map. + // Manually clamp to the texture size because texelFetch bypasses the texture unit + ivec2 mipP = clamp(ssP >> mipLevel, ivec2(0), (params.screen_size >> mipLevel) - ivec2(1)); + +#ifdef USE_HALF_SIZE + P.z = texelFetch(source_depth_mipmaps, mipP, mipLevel).r; + P.z = -P.z; +#else + if (mipLevel < 1) { + //read from depth buffer + P.z = texelFetch(source_depth, mipP, 0).r; + P.z = P.z * 2.0 - 1.0; + if (params.orthogonal) { + P.z = ((P.z + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; + } else { + P.z = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - P.z * (params.z_far - params.z_near)); + } + P.z = -P.z; + + } else { + //read from mipmaps + P.z = texelFetch(source_depth_mipmaps, mipP, mipLevel - 1).r; + P.z = -P.z; + } +#endif + + // Offset to pixel center + P = reconstructCSPosition(vec2(ssP) + vec2(0.5), P.z); + + return P; +} + +/** Compute the occlusion due to sample with index \a i about the pixel at \a ssC that corresponds + to camera-space point \a C with unit normal \a n_C, using maximum screen-space sampling radius \a ssDiskRadius + + Note that units of H() in the HPG12 paper are meters, not + unitless. The whole falloff/sampling function is therefore + unitless. In this implementation, we factor out (9 / radius). + + Four versions of the falloff function are implemented below +*/ +float sampleAO(in ivec2 ssC, in vec3 C, in vec3 n_C, in float ssDiskRadius, in float p_radius, in int tapIndex, in float randomPatternRotationAngle) { + // Offset on the unit disk, spun for this pixel + float ssR; + vec2 unitOffset = tapLocation(tapIndex, randomPatternRotationAngle, ssR); + ssR *= ssDiskRadius; + + ivec2 ssP = ivec2(ssR * unitOffset) + ssC; + + if (any(lessThan(ssP, ivec2(0))) || any(greaterThanEqual(ssP, params.screen_size))) { + return 0.0; + } + + // The occluding point in camera space + vec3 Q = getOffsetPosition(ssP, ssR); + + vec3 v = Q - C; + + float vv = dot(v, v); + float vn = dot(v, n_C); + + const float epsilon = 0.01; + float radius2 = p_radius * p_radius; + + // A: From the HPG12 paper + // Note large epsilon to avoid overdarkening within cracks + //return float(vv < radius2) * max((vn - bias) / (epsilon + vv), 0.0) * radius2 * 0.6; + + // B: Smoother transition to zero (lowers contrast, smoothing out corners). [Recommended] + float f = max(radius2 - vv, 0.0); + return f * f * f * max((vn - params.bias) / (epsilon + vv), 0.0); + + // C: Medium contrast (which looks better at high radii), no division. Note that the + // contribution still falls off with radius^2, but we've adjusted the rate in a way that is + // more computationally efficient and happens to be aesthetically pleasing. + // return 4.0 * max(1.0 - vv * invRadius2, 0.0) * max(vn - bias, 0.0); + + // D: Low contrast, no division operation + // return 2.0 * float(vv < radius * radius) * max(vn - bias, 0.0); +} + +void main() { + // Pixel being shaded + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing + return; + } + + // World space point being shaded + vec3 C = getPosition(ssC); + +#ifdef USE_HALF_SIZE + vec3 n_C = texelFetch(source_normal, ssC << 1, 0).xyz * 2.0 - 1.0; +#else + vec3 n_C = texelFetch(source_normal, ssC, 0).xyz * 2.0 - 1.0; +#endif + n_C = normalize(n_C); + n_C.y = -n_C.y; //because this code reads flipped + + // Hash function used in the HPG12 AlchemyAO paper + float randomPatternRotationAngle = mod(float((3 * ssC.x ^ ssC.y + ssC.x * ssC.y) * 10), TWO_PI); + + // Reconstruct normals from positions. These will lead to 1-pixel black lines + // at depth discontinuities, however the blur will wipe those out so they are not visible + // in the final image. + + // Choose the screen-space sample radius + // proportional to the projected area of the sphere + + float ssDiskRadius = -params.proj_scale * params.radius; + if (!params.orthogonal) { + ssDiskRadius = -params.proj_scale * params.radius / C.z; + } + float sum = 0.0; + for (int i = 0; i < NUM_SAMPLES; ++i) { + sum += sampleAO(ssC, C, n_C, ssDiskRadius, params.radius, i, randomPatternRotationAngle); + } + + float A = max(0.0, 1.0 - sum * params.intensity_div_r6 * (5.0 / float(NUM_SAMPLES))); + + imageStore(dest_image, ssC, vec4(A)); +} diff --git a/servers/rendering/rasterizer_rd/shaders/ssao_blur.glsl b/servers/rendering/rasterizer_rd/shaders/ssao_blur.glsl new file mode 100644 index 0000000000..3e63e3cb59 --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/ssao_blur.glsl @@ -0,0 +1,153 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform sampler2D source_ssao; +layout(set = 1, binding = 0) uniform sampler2D source_depth; +#ifdef MODE_UPSCALE +layout(set = 2, binding = 0) uniform sampler2D source_depth_mipmaps; +#endif + +layout(r8, set = 3, binding = 0) uniform restrict writeonly image2D dest_image; + +////////////////////////////////////////////////////////////////////////////////////////////// +// Tunable Parameters: + +layout(push_constant, binding = 1, std430) uniform Params { + float edge_sharpness; /** Increase to make depth edges crisper. Decrease to reduce flicker. */ + int filter_scale; + float z_far; + float z_near; + bool orthogonal; + uint pad0; + uint pad1; + uint pad2; + ivec2 axis; /** (1, 0) or (0, 1) */ + ivec2 screen_size; +} +params; + +/** Filter radius in pixels. This will be multiplied by SCALE. */ +#define R (4) + +////////////////////////////////////////////////////////////////////////////////////////////// + +// Gaussian coefficients +const float gaussian[R + 1] = + //float[](0.356642, 0.239400, 0.072410, 0.009869); + //float[](0.398943, 0.241971, 0.053991, 0.004432, 0.000134); // stddev = 1.0 + float[](0.153170, 0.144893, 0.122649, 0.092902, 0.062970); // stddev = 2.0 +//float[](0.111220, 0.107798, 0.098151, 0.083953, 0.067458, 0.050920, 0.036108); // stddev = 3.0 + +void main() { + // Pixel being shaded + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing + return; + } + +#ifdef MODE_UPSCALE + + //closest one should be the same pixel, but check nearby just in case + float depth = texelFetch(source_depth, ssC, 0).r; + + depth = depth * 2.0 - 1.0; + if (params.orthogonal) { + depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; + } else { + depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near)); + } + + vec2 pixel_size = 1.0 / vec2(params.screen_size); + vec2 closest_uv = vec2(ssC) * pixel_size + pixel_size * 0.5; + vec2 from_uv = closest_uv; + vec2 ps2 = pixel_size; // * 2.0; + + float closest_depth = abs(textureLod(source_depth_mipmaps, closest_uv, 0.0).r - depth); + + vec2 offsets[4] = vec2[](vec2(ps2.x, 0), vec2(-ps2.x, 0), vec2(0, ps2.y), vec2(0, -ps2.y)); + for (int i = 0; i < 4; i++) { + vec2 neighbour = from_uv + offsets[i]; + float neighbour_depth = abs(textureLod(source_depth_mipmaps, neighbour, 0.0).r - depth); + if (neighbour_depth < closest_depth) { + closest_uv = neighbour; + closest_depth = neighbour_depth; + } + } + + float visibility = textureLod(source_ssao, closest_uv, 0.0).r; + imageStore(dest_image, ssC, vec4(visibility)); +#else + + float depth = texelFetch(source_depth, ssC, 0).r; + +#ifdef MODE_FULL_SIZE + depth = depth * 2.0 - 1.0; + + if (params.orthogonal) { + depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; + } else { + depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near)); + } + +#endif + float depth_divide = 1.0 / params.z_far; + + //depth *= depth_divide; + + /* + if (depth > params.z_far * 0.999) { + discard; //skybox + } + */ + + float sum = texelFetch(source_ssao, ssC, 0).r; + + // Base weight for depth falloff. Increase this for more blurriness, + // decrease it for better edge discrimination + float BASE = gaussian[0]; + float totalWeight = BASE; + sum *= totalWeight; + + ivec2 clamp_limit = params.screen_size - ivec2(1); + + for (int r = -R; r <= R; ++r) { + // We already handled the zero case above. This loop should be unrolled and the static branch optimized out, + // so the IF statement has no runtime cost + if (r != 0) { + ivec2 ppos = ssC + params.axis * (r * params.filter_scale); + float value = texelFetch(source_ssao, clamp(ppos, ivec2(0), clamp_limit), 0).r; + ivec2 rpos = clamp(ppos, ivec2(0), clamp_limit); + + float temp_depth = texelFetch(source_depth, rpos, 0).r; +#ifdef MODE_FULL_SIZE + temp_depth = temp_depth * 2.0 - 1.0; + if (params.orthogonal) { + temp_depth = ((temp_depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; + } else { + temp_depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - temp_depth * (params.z_far - params.z_near)); + } + //temp_depth *= depth_divide; +#endif + // spatial domain: offset gaussian tap + float weight = 0.3 + gaussian[abs(r)]; + //weight *= max(0.0, dot(temp_normal, normal)); + + // range domain (the "bilateral" weight). As depth difference increases, decrease weight. + weight *= max(0.0, 1.0 - params.edge_sharpness * abs(temp_depth - depth)); + + sum += value * weight; + totalWeight += weight; + } + } + + const float epsilon = 0.0001; + float visibility = sum / (totalWeight + epsilon); + + imageStore(dest_image, ssC, vec4(visibility)); +#endif +} diff --git a/servers/rendering/rasterizer_rd/shaders/ssao_downsample1.glsl b/servers/rendering/rasterizer_rd/shaders/ssao_downsample1.glsl deleted file mode 100644 index 3bfce1a827..0000000000 --- a/servers/rendering/rasterizer_rd/shaders/ssao_downsample1.glsl +++ /dev/null @@ -1,77 +0,0 @@ -// -// Copyright (c) Microsoft. All rights reserved. -// This code is licensed under the MIT License (MIT). -// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF -// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY -// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR -// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. -// -// Developed by Minigraph -// -// Author: James Stanard -// - -#[compute] - -#version 450 - -VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -layout(push_constant, binding = 1, std430) uniform Params { - float z_far; - float z_near; - bool orthogonal; - uint pad; -} -params; - -layout(set = 0, binding = 0) uniform sampler2D source_depth; - -layout(r16f, set = 1, binding = 0) uniform restrict writeonly image2D linear_z; -layout(r32f, set = 2, binding = 0) uniform restrict writeonly image2D downsampled2x; -layout(r16f, set = 3, binding = 0) uniform restrict writeonly image2DArray downsampled2x_atlas; -layout(r32f, set = 4, binding = 0) uniform restrict writeonly image2D downsampled4x; -layout(r16f, set = 5, binding = 0) uniform restrict writeonly image2DArray downsampled4x_atlas; - -float Linearize(uvec2 p_pos) { - float depth = texelFetch(source_depth, ivec2(p_pos), 0).r * 2.0 - 1.0; - if (params.orthogonal) { - depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / (2.0 * params.z_far); - } else { - depth = 2.0 * params.z_near / (params.z_far + params.z_near - depth * (params.z_far - params.z_near)); - } - imageStore(linear_z, ivec2(p_pos), vec4(depth)); - return depth; -} - -shared float local_cache[256]; - -void main() { - uvec2 start = gl_WorkGroupID.xy << 4 | gl_LocalInvocationID.xy; - uint dest_index = gl_LocalInvocationID.y << 4 | gl_LocalInvocationID.x; - local_cache[dest_index + 0] = Linearize(start | uvec2(0, 0)); - local_cache[dest_index + 8] = Linearize(start | uvec2(8, 0)); - local_cache[dest_index + 128] = Linearize(start | uvec2(0, 8)); - local_cache[dest_index + 136] = Linearize(start | uvec2(8, 8)); - - groupMemoryBarrier(); - barrier(); - - uint index = (gl_LocalInvocationID.x << 1) | (gl_LocalInvocationID.y << 5); - - float w1 = local_cache[index]; - - uvec2 pos = gl_GlobalInvocationID.xy; - uint slice = (pos.x & 3) | ((pos.y & 3) << 2); - imageStore(downsampled2x, ivec2(pos), vec4(w1)); - imageStore(downsampled2x_atlas, ivec3(pos >> 2, slice), vec4(w1)); - - if ((gl_LocalInvocationIndex & 011) == 0) { - pos = gl_GlobalInvocationID.xy >> 1; - slice = (pos.x & 3) | ((pos.y & 3) << 2); - imageStore(downsampled4x, ivec2(pos), vec4(w1)); - imageStore(downsampled4x_atlas, ivec3(pos >> 2, slice), vec4(w1)); - } -} diff --git a/servers/rendering/rasterizer_rd/shaders/ssao_downsample2.glsl b/servers/rendering/rasterizer_rd/shaders/ssao_downsample2.glsl deleted file mode 100644 index 9fec881057..0000000000 --- a/servers/rendering/rasterizer_rd/shaders/ssao_downsample2.glsl +++ /dev/null @@ -1,49 +0,0 @@ -// -// Copyright (c) Microsoft. All rights reserved. -// This code is licensed under the MIT License (MIT). -// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF -// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY -// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR -// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. -// -// Developed by Minigraph -// -// Author: James Stanard -// - -#[compute] - -#version 450 - -VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -layout(r32f, set = 0, binding = 0) uniform restrict readonly image2D downsampled4x; -layout(r32f, set = 1, binding = 0) uniform restrict writeonly image2D downsampled8x; -layout(r16f, set = 2, binding = 0) uniform restrict writeonly image2DArray downsampled8x_atlas; -layout(r32f, set = 3, binding = 0) uniform restrict writeonly image2D downsampled16x; -layout(r16f, set = 4, binding = 0) uniform restrict writeonly image2DArray downsampled16x_atlas; - -void main() { - vec4 w1 = imageLoad(downsampled4x, min(ivec2(gl_GlobalInvocationID.xy << 1), imageSize(downsampled4x) - ivec2(2))); - - uvec2 pos = gl_GlobalInvocationID.xy; - uvec2 pos_atlas = pos >> 2; - uint pos_slice = (pos.x & 3) | ((pos.y & 3) << 2); - ivec2 ds8s = imageSize(downsampled8x); - - if (pos.x < ds8s.x && pos.y < ds8s.y) { - imageStore(downsampled8x, ivec2(pos), w1); - } - - imageStore(downsampled8x_atlas, ivec3(pos_atlas, pos_slice), w1); - - if ((gl_LocalInvocationIndex & 011) == 0) { - uvec2 pos = gl_GlobalInvocationID.xy >> 1; - uvec2 pos_atlas = pos >> 2; - uint pos_slice = (pos.x & 3) | ((pos.y & 3) << 2); - imageStore(downsampled16x, ivec2(pos), w1); - imageStore(downsampled16x_atlas, ivec3(pos_atlas, pos_slice), w1); - } -} diff --git a/servers/rendering/rasterizer_rd/shaders/ssao_minify.glsl b/servers/rendering/rasterizer_rd/shaders/ssao_minify.glsl new file mode 100644 index 0000000000..263fca386f --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/ssao_minify.glsl @@ -0,0 +1,45 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(push_constant, binding = 1, std430) uniform Params { + vec2 pixel_size; + float z_far; + float z_near; + ivec2 source_size; + bool orthogonal; + uint pad; +} +params; + +#ifdef MINIFY_START +layout(set = 0, binding = 0) uniform sampler2D source_texture; +#else +layout(r32f, set = 0, binding = 0) uniform restrict readonly image2D source_image; +#endif +layout(r32f, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; + +void main() { + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + + if (any(greaterThan(pos, params.source_size >> 1))) { //too large, do nothing + return; + } + +#ifdef MINIFY_START + float depth = texelFetch(source_texture, pos << 1, 0).r * 2.0 - 1.0; + if (params.orthogonal) { + depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; + } else { + depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near)); + } +#else + float depth = imageLoad(source_image, pos << 1).r; +#endif + + imageStore(dest_image, pos, vec4(depth)); +} diff --git a/servers/rendering/rasterizer_rd/shaders/ssao_render.glsl b/servers/rendering/rasterizer_rd/shaders/ssao_render.glsl deleted file mode 100644 index 42eb49c9fe..0000000000 --- a/servers/rendering/rasterizer_rd/shaders/ssao_render.glsl +++ /dev/null @@ -1,159 +0,0 @@ -// -// Copyright (c) Microsoft. All rights reserved. -// This code is licensed under the MIT License (MIT). -// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF -// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY -// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR -// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. -// -// Developed by Minigraph -// -// Author: James Stanard -// - -#[compute] - -#version 450 - -VERSION_DEFINES - -#ifndef INTERLEAVE_RESULT -#define WIDE_SAMPLING 1 -#endif - -#if WIDE_SAMPLING -// 32x32 cache size: the 16x16 in the center forms the area of focus with the 8-pixel perimeter used for wide gathering. -#define TILE_DIM 32 -layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; -#else -// 16x16 cache size: the 8x8 in the center forms the area of focus with the 4-pixel perimeter used for gathering. -#define TILE_DIM 16 -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; -#endif - -#ifdef INTERLEAVE_RESULT -layout(set = 0, binding = 0) uniform sampler2DArray depth_texture; -#else -layout(set = 0, binding = 0) uniform sampler2D depth_texture; -#endif - -layout(r8, set = 1, binding = 0) uniform restrict writeonly image2D occlusion; -//SamplerState LinearBorderSampler : register(s1); - -layout(push_constant, binding = 1, std430) uniform Params { - vec4 inv_thickness_table[3]; - vec4 sample_weight_table[3]; - vec2 texel_size; - float rejection_fadeoff; - float intensity; -} -params; - -shared float depth_samples[TILE_DIM * TILE_DIM]; - -float test_sample_pair(float front_depth, float inv_range, uint p_base, uint p_offset) { - // "Disocclusion" measures the penetration distance of the depth sample within the sphere. - // Disocclusion < 0 (full occlusion) -> the sample fell in front of the sphere - // Disocclusion > 1 (no occlusion) -> the sample fell behind the sphere - float disocclusion1 = depth_samples[p_base + p_offset] * inv_range - front_depth; - float disocclusion2 = depth_samples[p_base - p_offset] * inv_range - front_depth; - - float pseudo_disocclusion1 = clamp(params.rejection_fadeoff * disocclusion1, 0.0, 1.0); - float pseudo_disocclusion2 = clamp(params.rejection_fadeoff * disocclusion2, 0.0, 1.0); - - return clamp(disocclusion1, pseudo_disocclusion2, 1.0) + - clamp(disocclusion2, pseudo_disocclusion1, 1.0) - - pseudo_disocclusion1 * pseudo_disocclusion2; -} - -float test_samples(uint p_center_index, uint p_x, uint p_y, float p_inv_depth, float p_inv_thickness) { -#if WIDE_SAMPLING - p_x <<= 1; - p_y <<= 1; -#endif - - float inv_range = p_inv_thickness * p_inv_depth; - float front_depth = p_inv_thickness - 0.5; - - if (p_y == 0) { - // Axial - return 0.5 * (test_sample_pair(front_depth, inv_range, p_center_index, p_x) + - test_sample_pair(front_depth, inv_range, p_center_index, p_x * TILE_DIM)); - } else if (p_x == p_y) { - // Diagonal - return 0.5 * (test_sample_pair(front_depth, inv_range, p_center_index, p_x * TILE_DIM - p_x) + - test_sample_pair(front_depth, inv_range, p_center_index, p_x * TILE_DIM + p_x)); - } else { - // L-Shaped - return 0.25 * (test_sample_pair(front_depth, inv_range, p_center_index, p_y * TILE_DIM + p_x) + - test_sample_pair(front_depth, inv_range, p_center_index, p_y * TILE_DIM - p_x) + - test_sample_pair(front_depth, inv_range, p_center_index, p_x * TILE_DIM + p_y) + - test_sample_pair(front_depth, inv_range, p_center_index, p_x * TILE_DIM - p_y)); - } -} - -void main() { -#if WIDE_SAMPLING - vec2 quad_center_uv = clamp(vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 7.5) * params.texel_size, vec2(params.texel_size * 0.5), vec2(1.0 - params.texel_size * 0.5)); -#else - vec2 quad_center_uv = clamp(vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 3.5) * params.texel_size, vec2(params.texel_size * 0.5), vec2(1.0 - params.texel_size * 0.5)); -#endif - - // Fetch four depths and store them in LDS -#ifdef INTERLEAVE_RESULT - vec4 depths = textureGather(depth_texture, vec3(quad_center_uv, gl_GlobalInvocationID.z)); // textureGather -#else - vec4 depths = textureGather(depth_texture, quad_center_uv); -#endif - - uint dest_index = gl_LocalInvocationID.x * 2 + gl_LocalInvocationID.y * 2 * TILE_DIM; - depth_samples[dest_index] = depths.w; - depth_samples[dest_index + 1] = depths.z; - depth_samples[dest_index + TILE_DIM] = depths.x; - depth_samples[dest_index + TILE_DIM + 1] = depths.y; - - groupMemoryBarrier(); - barrier(); - -#if WIDE_SAMPLING - uint index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * TILE_DIM + 8 * TILE_DIM + 8; -#else - uint index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * TILE_DIM + 4 * TILE_DIM + 4; -#endif - const float inv_depth = 1.0 / depth_samples[index]; - - float ao = 0.0; - - if (params.sample_weight_table[0].x > 0.0) { - // 68 samples: sample all cells in *within* a circular radius of 5 - ao += params.sample_weight_table[0].x * test_samples(index, 1, 0, inv_depth, params.inv_thickness_table[0].x); - ao += params.sample_weight_table[0].y * test_samples(index, 2, 0, inv_depth, params.inv_thickness_table[0].y); - ao += params.sample_weight_table[0].z * test_samples(index, 3, 0, inv_depth, params.inv_thickness_table[0].z); - ao += params.sample_weight_table[0].w * test_samples(index, 4, 0, inv_depth, params.inv_thickness_table[0].w); - ao += params.sample_weight_table[1].x * test_samples(index, 1, 1, inv_depth, params.inv_thickness_table[1].x); - ao += params.sample_weight_table[2].x * test_samples(index, 2, 2, inv_depth, params.inv_thickness_table[2].x); - ao += params.sample_weight_table[2].w * test_samples(index, 3, 3, inv_depth, params.inv_thickness_table[2].w); - ao += params.sample_weight_table[1].y * test_samples(index, 1, 2, inv_depth, params.inv_thickness_table[1].y); - ao += params.sample_weight_table[1].z * test_samples(index, 1, 3, inv_depth, params.inv_thickness_table[1].z); - ao += params.sample_weight_table[1].w * test_samples(index, 1, 4, inv_depth, params.inv_thickness_table[1].w); - ao += params.sample_weight_table[2].y * test_samples(index, 2, 3, inv_depth, params.inv_thickness_table[2].y); - ao += params.sample_weight_table[2].z * test_samples(index, 2, 4, inv_depth, params.inv_thickness_table[2].z); - } else { - // SAMPLE_CHECKER - // 36 samples: sample every-other cell in a checker board pattern - ao += params.sample_weight_table[0].y * test_samples(index, 2, 0, inv_depth, params.inv_thickness_table[0].y); - ao += params.sample_weight_table[0].w * test_samples(index, 4, 0, inv_depth, params.inv_thickness_table[0].w); - ao += params.sample_weight_table[1].x * test_samples(index, 1, 1, inv_depth, params.inv_thickness_table[1].x); - ao += params.sample_weight_table[2].x * test_samples(index, 2, 2, inv_depth, params.inv_thickness_table[2].x); - ao += params.sample_weight_table[2].w * test_samples(index, 3, 3, inv_depth, params.inv_thickness_table[2].w); - ao += params.sample_weight_table[1].z * test_samples(index, 1, 3, inv_depth, params.inv_thickness_table[1].z); - ao += params.sample_weight_table[2].z * test_samples(index, 2, 4, inv_depth, params.inv_thickness_table[2].z); - } - -#ifdef INTERLEAVE_RESULT - uvec2 out_pixel = gl_GlobalInvocationID.xy << 2 | uvec2(gl_GlobalInvocationID.z & 3, gl_GlobalInvocationID.z >> 2); -#else - uvec2 out_pixel = gl_GlobalInvocationID.xy; -#endif - imageStore(occlusion, ivec2(out_pixel), vec4(mix(1.0, ao, params.intensity))); -} diff --git a/servers/rendering/rasterizer_rd/shaders/ssao_upsample.glsl b/servers/rendering/rasterizer_rd/shaders/ssao_upsample.glsl deleted file mode 100644 index e91e4a9bd8..0000000000 --- a/servers/rendering/rasterizer_rd/shaders/ssao_upsample.glsl +++ /dev/null @@ -1,216 +0,0 @@ -// -// Copyright (c) Microsoft. All rights reserved. -// This code is licensed under the MIT License (MIT). -// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF -// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY -// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR -// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. -// -// Developed by Minigraph -// -// Author: James Stanard -// - -#[compute] - -#version 450 - -VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -layout(set = 0, binding = 0) uniform sampler2D low_res_depth; -layout(set = 1, binding = 0) uniform sampler2D high_res_depth; -layout(set = 2, binding = 0) uniform sampler2D low_res_ao1; -layout(r8, set = 3, binding = 0) uniform restrict writeonly image2D ao_result; -#ifdef COMBINE_LOWER_RESOLUTIONS -layout(set = 4, binding = 0) uniform sampler2D low_res_ao2; -#endif -#ifdef BLEND_WITH_HIGHER_RESOLUTION -layout(set = 5, binding = 0) uniform sampler2D high_res_ao; -#endif - -//SamplerState LinearSampler : register(s0); - -layout(push_constant, binding = 1, std430) uniform Params { - vec2 inv_low_resolution; - vec2 inv_high_resolution; - float noise_filter_strength; - float step_size; - float blur_tolerance; - float upsample_tolerance; -} -params; - -shared float depth_cache[256]; -shared float ao_cache1[256]; -shared float ao_cache2[256]; - -void prefetch_data(uint p_index, vec2 p_uv) { - vec4 ao1 = textureGather(low_res_ao1, p_uv); // textureGather - -#ifdef COMBINE_LOWER_RESOLUTIONS - ao1 = min(ao1, textureGather(low_res_ao2, p_uv)); -#endif - - ao_cache1[p_index] = ao1.w; - ao_cache1[p_index + 1] = ao1.z; - ao_cache1[p_index + 16] = ao1.x; - ao_cache1[p_index + 17] = ao1.y; - - vec4 ID = 1.0 / textureGather(low_res_depth, p_uv); - depth_cache[p_index] = ID.w; - depth_cache[p_index + 1] = ID.z; - depth_cache[p_index + 16] = ID.x; - depth_cache[p_index + 17] = ID.y; -} - -float smart_blur(float p_a, float p_b, float p_c, float p_d, float p_e, bool p_left, bool p_middle, bool p_right) { - p_b = p_left || p_middle ? p_b : p_c; - p_a = p_left ? p_a : p_b; - p_d = p_right || p_middle ? p_d : p_c; - p_e = p_right ? p_e : p_d; - return ((p_a + p_e) / 2.0 + p_b + p_c + p_d) / 4.0; -} - -bool compare_deltas(float p_d1, float p_d2, float p_l1, float p_l2) { - float temp = p_d1 * p_d2 + params.step_size; - return temp * temp > p_l1 * p_l2 * params.blur_tolerance; -} - -void blur_horizontally(uint p_left_most_index) { - float a0 = ao_cache1[p_left_most_index]; - float a1 = ao_cache1[p_left_most_index + 1]; - float a2 = ao_cache1[p_left_most_index + 2]; - float a3 = ao_cache1[p_left_most_index + 3]; - float a4 = ao_cache1[p_left_most_index + 4]; - float a5 = ao_cache1[p_left_most_index + 5]; - float a6 = ao_cache1[p_left_most_index + 6]; - - float d0 = depth_cache[p_left_most_index]; - float d1 = depth_cache[p_left_most_index + 1]; - float d2 = depth_cache[p_left_most_index + 2]; - float d3 = depth_cache[p_left_most_index + 3]; - float d4 = depth_cache[p_left_most_index + 4]; - float d5 = depth_cache[p_left_most_index + 5]; - float d6 = depth_cache[p_left_most_index + 6]; - - float d01 = d1 - d0; - float d12 = d2 - d1; - float d23 = d3 - d2; - float d34 = d4 - d3; - float d45 = d5 - d4; - float d56 = d6 - d5; - - float l01 = d01 * d01 + params.step_size; - float l12 = d12 * d12 + params.step_size; - float l23 = d23 * d23 + params.step_size; - float l34 = d34 * d34 + params.step_size; - float l45 = d45 * d45 + params.step_size; - float l56 = d56 * d56 + params.step_size; - - bool c02 = compare_deltas(d01, d12, l01, l12); - bool c13 = compare_deltas(d12, d23, l12, l23); - bool c24 = compare_deltas(d23, d34, l23, l34); - bool c35 = compare_deltas(d34, d45, l34, l45); - bool c46 = compare_deltas(d45, d56, l45, l56); - - ao_cache2[p_left_most_index] = smart_blur(a0, a1, a2, a3, a4, c02, c13, c24); - ao_cache2[p_left_most_index + 1] = smart_blur(a1, a2, a3, a4, a5, c13, c24, c35); - ao_cache2[p_left_most_index + 2] = smart_blur(a2, a3, a4, a5, a6, c24, c35, c46); -} - -void blur_vertically(uint p_top_most_index) { - float a0 = ao_cache2[p_top_most_index]; - float a1 = ao_cache2[p_top_most_index + 16]; - float a2 = ao_cache2[p_top_most_index + 32]; - float a3 = ao_cache2[p_top_most_index + 48]; - float a4 = ao_cache2[p_top_most_index + 64]; - float a5 = ao_cache2[p_top_most_index + 80]; - - float d0 = depth_cache[p_top_most_index + 2]; - float d1 = depth_cache[p_top_most_index + 18]; - float d2 = depth_cache[p_top_most_index + 34]; - float d3 = depth_cache[p_top_most_index + 50]; - float d4 = depth_cache[p_top_most_index + 66]; - float d5 = depth_cache[p_top_most_index + 82]; - - float d01 = d1 - d0; - float d12 = d2 - d1; - float d23 = d3 - d2; - float d34 = d4 - d3; - float d45 = d5 - d4; - - float l01 = d01 * d01 + params.step_size; - float l12 = d12 * d12 + params.step_size; - float l23 = d23 * d23 + params.step_size; - float l34 = d34 * d34 + params.step_size; - float l45 = d45 * d45 + params.step_size; - - bool c02 = compare_deltas(d01, d12, l01, l12); - bool c13 = compare_deltas(d12, d23, l12, l23); - bool c24 = compare_deltas(d23, d34, l23, l34); - bool c35 = compare_deltas(d34, d45, l34, l45); - - float ao_result1 = smart_blur(a0, a1, a2, a3, a4, c02, c13, c24); - float ao_result2 = smart_blur(a1, a2, a3, a4, a5, c13, c24, c35); - - ao_cache1[p_top_most_index] = ao_result1; - ao_cache1[p_top_most_index + 16] = ao_result2; -} - -// We essentially want 5 weights: 4 for each low-res pixel and 1 to blend in when none of the 4 really -// match. The filter strength is 1 / DeltaZTolerance. So a tolerance of 0.01 would yield a strength of 100. -// Note that a perfect match of low to high depths would yield a weight of 10^6, completely superceding any -// noise filtering. The noise filter is intended to soften the effects of shimmering when the high-res depth -// buffer has a lot of small holes in it causing the low-res depth buffer to inaccurately represent it. -float bilateral_upsample(float p_high_depth, float p_high_ao, vec4 p_low_depths, vec4 p_low_ao) { - vec4 weights = vec4(9.0, 3.0, 1.0, 3.0) / (abs(p_high_depth - p_low_depths) + params.upsample_tolerance); - float total_weight = dot(weights, vec4(1.0)) + params.noise_filter_strength; - float weighted_sum = dot(p_low_ao, weights) + params.noise_filter_strength; - return p_high_ao * weighted_sum / total_weight; -} - -void main() { - // Load 4 pixels per thread into LDS to fill the 16x16 LDS cache with depth and AO - prefetch_data(gl_LocalInvocationID.x << 1 | gl_LocalInvocationID.y << 5, vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 2.5) * params.inv_low_resolution); - groupMemoryBarrier(); - barrier(); - - // Goal: End up with a 9x9 patch that is blurred so we can upsample. Blur radius is 2 pixels, so start with 13x13 area. - - // Horizontally blur the pixels. 13x13 -> 9x13 - if (gl_LocalInvocationIndex < 39) - blur_horizontally((gl_LocalInvocationIndex / 3) * 16 + (gl_LocalInvocationIndex % 3) * 3); - groupMemoryBarrier(); - barrier(); - - // Vertically blur the pixels. 9x13 -> 9x9 - if (gl_LocalInvocationIndex < 45) - blur_vertically((gl_LocalInvocationIndex / 9) * 32 + gl_LocalInvocationIndex % 9); - groupMemoryBarrier(); - barrier(); - - // Bilateral upsample - uint index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 16; - vec4 low_SSAOs = vec4(ao_cache1[index + 16], ao_cache1[index + 17], ao_cache1[index + 1], ao_cache1[index]); - - // We work on a quad of pixels at once because then we can gather 4 each of high and low-res depth values - vec2 UV0 = (gl_GlobalInvocationID.xy - 0.5) * params.inv_low_resolution; - vec2 UV1 = (gl_GlobalInvocationID.xy * 2.0 - 0.5) * params.inv_high_resolution; - -#ifdef BLEND_WITH_HIGHER_RESOLUTION - vec4 hi_SSAOs = textureGather(high_res_ao, UV1); -#else - vec4 hi_SSAOs = vec4(1.0); -#endif - vec4 Low_depths = textureGather(low_res_depth, UV0); - vec4 high_depths = textureGather(high_res_depth, UV1); - - ivec2 OutST = ivec2(gl_GlobalInvocationID.xy << 1); - - imageStore(ao_result, OutST + ivec2(-1, 0), vec4(bilateral_upsample(high_depths.x, hi_SSAOs.x, Low_depths.xyzw, low_SSAOs.xyzw))); - imageStore(ao_result, OutST + ivec2(0, 0), vec4(bilateral_upsample(high_depths.y, hi_SSAOs.y, Low_depths.yzwx, low_SSAOs.yzwx))); - imageStore(ao_result, OutST + ivec2(0, -1), vec4(bilateral_upsample(high_depths.z, hi_SSAOs.z, Low_depths.zwxy, low_SSAOs.zwxy))); - imageStore(ao_result, OutST + ivec2(-1, -1), vec4(bilateral_upsample(high_depths.w, hi_SSAOs.w, Low_depths.wxyz, low_SSAOs.wxyz))); -} diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 2573e6e6dd..6df66e7b20 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -433,7 +433,6 @@ public: TEXTURE_SLICE_2D, TEXTURE_SLICE_CUBEMAP, TEXTURE_SLICE_3D, - TEXTURE_SLICE_2D_ARRAY, }; virtual RID texture_create_shared_from_slice(const TextureView &p_view, RID p_with_texture, uint32_t p_layer, uint32_t p_mipmap, TextureSliceType p_slice_type = TEXTURE_SLICE_2D) = 0; diff --git a/servers/rendering/rendering_server_raster.h b/servers/rendering/rendering_server_raster.h index 1bafe7a91d..fb3baeca95 100644 --- a/servers/rendering/rendering_server_raster.h +++ b/servers/rendering/rendering_server_raster.h @@ -578,8 +578,8 @@ public: BIND6(environment_set_ssr, RID, bool, int, float, float, float) BIND1(environment_set_ssr_roughness_quality, EnvironmentSSRRoughnessQuality) - BIND7(environment_set_ssao, RID, bool, float, float, int, float, float) - BIND5(environment_set_ssao_settings, EnvironmentSSAOQuality, bool, float, float, float) + BIND9(environment_set_ssao, RID, bool, float, float, float, float, float, EnvironmentSSAOBlur, float) + BIND2(environment_set_ssao_quality, EnvironmentSSAOQuality, bool) BIND11(environment_set_glow, RID, bool, Vector<float>, float, float, float, float, EnvironmentGlowBlendMode, float, float, float) BIND1(environment_glow_set_use_bicubic_upscale, bool) diff --git a/servers/rendering/rendering_server_wrap_mt.h b/servers/rendering/rendering_server_wrap_mt.h index 3c2b0b9475..305a3aaee7 100644 --- a/servers/rendering/rendering_server_wrap_mt.h +++ b/servers/rendering/rendering_server_wrap_mt.h @@ -484,9 +484,9 @@ public: FUNC6(environment_set_ssr, RID, bool, int, float, float, float) FUNC1(environment_set_ssr_roughness_quality, EnvironmentSSRRoughnessQuality) - FUNC7(environment_set_ssao, RID, bool, float, float, int, float, float) + FUNC9(environment_set_ssao, RID, bool, float, float, float, float, float, EnvironmentSSAOBlur, float) - FUNC5(environment_set_ssao_settings, EnvironmentSSAOQuality, bool, float, float, float) + FUNC2(environment_set_ssao_quality, EnvironmentSSAOQuality, bool) FUNC11(environment_set_sdfgi, RID, bool, EnvironmentSDFGICascades, float, EnvironmentSDFGIYScale, bool, bool, bool, float, float, float) FUNC1(environment_set_sdfgi_ray_count, EnvironmentSDFGIRayCount) |