diff options
author | clayjohn <claynjohn@gmail.com> | 2020-09-14 23:47:07 -0700 |
---|---|---|
committer | clayjohn <claynjohn@gmail.com> | 2020-10-18 13:15:51 -0700 |
commit | 366ee4677487223015f1f7f92bf955452a5cf83f (patch) | |
tree | b58a460a385cd3b4ec1c3fb6a6e8d2071a184b37 /servers/rendering | |
parent | 4467412c9f78f3ddaf7edd4627ec7d629a6234df (diff) |
Replace SAO implementation with MSSAO
Diffstat (limited to 'servers/rendering')
17 files changed, 1027 insertions, 716 deletions
diff --git a/servers/rendering/rasterizer.h b/servers/rendering/rasterizer.h index cecd2f2212..2919ef07c8 100644 --- a/servers/rendering/rasterizer.h +++ b/servers/rendering/rasterizer.h @@ -98,9 +98,9 @@ public: virtual void environment_set_ssr(RID p_env, bool p_enable, int p_max_steps, float p_fade_int, float p_fade_out, float p_depth_tolerance) = 0; virtual void environment_set_ssr_roughness_quality(RS::EnvironmentSSRRoughnessQuality p_quality) = 0; - virtual void environment_set_ssao(RID p_env, bool p_enable, float p_radius, float p_intensity, float p_bias, float p_light_affect, float p_ao_channel_affect, RS::EnvironmentSSAOBlur p_blur, float p_bilateral_sharpness) = 0; + virtual void environment_set_ssao(RID p_env, bool p_enable, float p_rejection_radius, float p_intensity, int p_levels, float p_light_affect, float p_ao_channel_affect) = 0; - virtual void environment_set_ssao_quality(RS::EnvironmentSSAOQuality p_quality, bool p_half_size) = 0; + virtual void environment_set_ssao_settings(RS::EnvironmentSSAOQuality p_quality, bool p_full_samples, float p_noise_tolerance, float p_blur_tolerance, float p_upsample_tolerance) = 0; virtual void environment_set_sdfgi(RID p_env, bool p_enable, RS::EnvironmentSDFGICascades p_cascades, float p_min_cell_size, RS::EnvironmentSDFGIYScale p_y_scale, bool p_use_occlusion, bool p_use_multibounce, bool p_read_sky, float p_energy, float p_normal_bias, float p_probe_bias) = 0; diff --git a/servers/rendering/rasterizer_rd/rasterizer_effects_rd.cpp b/servers/rendering/rasterizer_rd/rasterizer_effects_rd.cpp index 71acd4ceb6..cfcd2e4534 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_effects_rd.cpp +++ b/servers/rendering/rasterizer_rd/rasterizer_effects_rd.cpp @@ -926,154 +926,330 @@ void RasterizerEffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, con RD::get_singleton()->compute_list_end(); } -void RasterizerEffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, const Size2i &p_depth_buffer_size, RID p_depth_mipmaps_texture, const Vector<RID> &depth_mipmaps, RID p_ao1, bool p_half_size, RID p_ao2, RID p_upscale_buffer, float p_intensity, float p_radius, float p_bias, const CameraMatrix &p_projection, RS::EnvironmentSSAOQuality p_quality, RS::EnvironmentSSAOBlur p_blur, float p_edge_sharpness) { - //minify first - ssao.minify_push_constant.orthogonal = p_projection.is_orthogonal(); - ssao.minify_push_constant.z_near = p_projection.get_z_near(); - ssao.minify_push_constant.z_far = p_projection.get_z_far(); - ssao.minify_push_constant.pixel_size[0] = 1.0 / p_depth_buffer_size.x; - ssao.minify_push_constant.pixel_size[1] = 1.0 / p_depth_buffer_size.y; - ssao.minify_push_constant.source_size[0] = p_depth_buffer_size.x; - ssao.minify_push_constant.source_size[1] = p_depth_buffer_size.y; +void RasterizerEffectsRD::_compute_ssao(RD::ComputeListID p_compute_list, RID p_destination, RID p_depth_buffer, const float p_tan_half_fov_h, int p_width, int p_height, int p_depth, float p_rejection_radius, float p_intensity, bool p_use_full_samples) { + const float sample_thickness[12] = { + Math::sqrt(1.0f - 0.2f * 0.2f), + Math::sqrt(1.0f - 0.4f * 0.4f), + Math::sqrt(1.0f - 0.6f * 0.6f), + Math::sqrt(1.0f - 0.8f * 0.8f), + Math::sqrt(1.0f - 0.2f * 0.2f - 0.2f * 0.2f), + Math::sqrt(1.0f - 0.2f * 0.2f - 0.4f * 0.4f), + Math::sqrt(1.0f - 0.2f * 0.2f - 0.6f * 0.6f), + Math::sqrt(1.0f - 0.2f * 0.2f - 0.8f * 0.8f), + Math::sqrt(1.0f - 0.4f * 0.4f - 0.4f * 0.4f), + Math::sqrt(1.0f - 0.4f * 0.4f - 0.6f * 0.6f), + Math::sqrt(1.0f - 0.4f * 0.4f - 0.8f * 0.8f), + Math::sqrt(1.0f - 0.6f * 0.6f - 0.6f * 0.6f) + }; + + // Here we compute multipliers that convert the center depth value into (the reciprocal of) + // sphere thicknesses at each sample location. This assumes a maximum sample radius of 5 + // units, but since a sphere has no thickness at its extent, we don't need to sample that far + // out. Only samples whole integer offsets with distance less than 25 are used. This means + // that there is no sample at (3, 4) because its distance is exactly 25 (and has a thickness of 0.) + + // The shaders are set up to sample a circular region within a 5-pixel radius. + const float screenspace_diameter = 10.0f; + + // SphereDiameter = CenterDepth * thickness_multiplier. This will compute the thickness of a sphere centered + // at a specific depth. The ellipsoid scale can stretch a sphere into an ellipsoid, which changes the + // characteristics of the AO. + // tan_half_fov_h: Radius of sphere in depth units if its center lies at Z = 1 + // screenspace_diameter: Diameter of sample sphere in pixel units + // screenspace_diameter / p_width: Ratio of the screen width that the sphere actually covers + // Note about the "2.0f * ": Diameter = 2 * Radius + float thickness_multiplier = 2.0f * p_tan_half_fov_h * screenspace_diameter / p_width; + + if (p_depth == 1) { + thickness_multiplier *= 2.0f; + } + + // This will transform a depth value from [0, thickness] to [0, 1]. + float inverse_range_factor = 1.0f / thickness_multiplier; + + // The thicknesses are smaller for all off-center samples of the sphere. Compute thicknesses relative + // to the center sample. + ssao.ssao_render_push_constant.inv_thickness_table[0] = inverse_range_factor / sample_thickness[0]; + ssao.ssao_render_push_constant.inv_thickness_table[1] = inverse_range_factor / sample_thickness[1]; + ssao.ssao_render_push_constant.inv_thickness_table[2] = inverse_range_factor / sample_thickness[2]; + ssao.ssao_render_push_constant.inv_thickness_table[3] = inverse_range_factor / sample_thickness[3]; + ssao.ssao_render_push_constant.inv_thickness_table[4] = inverse_range_factor / sample_thickness[4]; + ssao.ssao_render_push_constant.inv_thickness_table[5] = inverse_range_factor / sample_thickness[5]; + ssao.ssao_render_push_constant.inv_thickness_table[6] = inverse_range_factor / sample_thickness[6]; + ssao.ssao_render_push_constant.inv_thickness_table[7] = inverse_range_factor / sample_thickness[7]; + ssao.ssao_render_push_constant.inv_thickness_table[8] = inverse_range_factor / sample_thickness[8]; + ssao.ssao_render_push_constant.inv_thickness_table[9] = inverse_range_factor / sample_thickness[9]; + ssao.ssao_render_push_constant.inv_thickness_table[10] = inverse_range_factor / sample_thickness[10]; + ssao.ssao_render_push_constant.inv_thickness_table[11] = inverse_range_factor / sample_thickness[11]; + + // These are the weights that are multiplied against the samples because not all samples are + // equally important. The farther the sample is from the center location, the less they matter. + // We use the thickness of the sphere to determine the weight. The scalars in front are the number + // of samples with this weight because we sum the samples together before multiplying by the weight, + // so as an aggregate all of those samples matter more. After generating this table, the weights + // are normalized. + ssao.ssao_render_push_constant.sample_weight_table[0] = 4.0f * sample_thickness[0]; // Axial + ssao.ssao_render_push_constant.sample_weight_table[1] = 4.0f * sample_thickness[1]; // Axial + ssao.ssao_render_push_constant.sample_weight_table[2] = 4.0f * sample_thickness[2]; // Axial + ssao.ssao_render_push_constant.sample_weight_table[3] = 4.0f * sample_thickness[3]; // Axial + ssao.ssao_render_push_constant.sample_weight_table[4] = 4.0f * sample_thickness[4]; // Diagonal + ssao.ssao_render_push_constant.sample_weight_table[5] = 8.0f * sample_thickness[5]; // L-shaped + ssao.ssao_render_push_constant.sample_weight_table[6] = 8.0f * sample_thickness[6]; // L-shaped + ssao.ssao_render_push_constant.sample_weight_table[7] = 8.0f * sample_thickness[7]; // L-shaped + ssao.ssao_render_push_constant.sample_weight_table[8] = 4.0f * sample_thickness[8]; // Diagonal + ssao.ssao_render_push_constant.sample_weight_table[9] = 8.0f * sample_thickness[9]; // L-shaped + ssao.ssao_render_push_constant.sample_weight_table[10] = 8.0f * sample_thickness[10]; // L-shaped + ssao.ssao_render_push_constant.sample_weight_table[11] = 4.0f * sample_thickness[11]; // Diagonal + + // If we aren't using all of the samples, delete their weights before we normalize. + if (!p_use_full_samples) { + ssao.ssao_render_push_constant.sample_weight_table[0] = 0.0f; + ssao.ssao_render_push_constant.sample_weight_table[2] = 0.0f; + ssao.ssao_render_push_constant.sample_weight_table[5] = 0.0f; + ssao.ssao_render_push_constant.sample_weight_table[7] = 0.0f; + ssao.ssao_render_push_constant.sample_weight_table[9] = 0.0f; + } + + // Normalize the weights by dividing by the sum of all weights + float total_weight = 0.0f; + for (int i = 0; i < 12; ++i) { + total_weight += ssao.ssao_render_push_constant.sample_weight_table[i]; + } + + for (int i = 0; i < 12; ++i) { + ssao.ssao_render_push_constant.sample_weight_table[i] /= total_weight; + } + + ssao.ssao_render_push_constant.texel_size[0] = 1.0f / float(p_width); + ssao.ssao_render_push_constant.texel_size[1] = 1.0f / float(p_height); + ssao.ssao_render_push_constant.rejection_fadeoff = 1.0f / -p_rejection_radius; + ssao.ssao_render_push_constant.intensity = p_intensity; + ssao.ssao_render_push_constant.intensity = p_intensity; + + RID render_uniform_set = RID(); + bool uniform_set_needs_update = false; + + if (ssao.render_uniform_set_cache.has(p_depth_buffer)) { + render_uniform_set = ssao.render_uniform_set_cache[p_depth_buffer]; + if (!RD::get_singleton()->uniform_set_is_valid(render_uniform_set)) { + uniform_set_needs_update = true; + } + } else { + uniform_set_needs_update = true; + } - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + if (uniform_set_needs_update) { + Vector<RD::Uniform> uniforms; + RD::Uniform u; + u.type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 0; + u.ids.push_back(ssao.render_sampler); + u.ids.push_back(p_depth_buffer); + uniforms.push_back(u); - /* FIRST PASS */ - // Minify the depth buffer. + render_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.ssao_render_shader.version_get_shader(ssao.ssao_render_shader_version, 0), 0); + texture_to_compute_uniform_set_cache[p_depth_buffer] = render_uniform_set; + } - for (int i = 0; i < depth_mipmaps.size(); i++) { - if (i == 0) { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_MINIFY_FIRST]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_buffer), 0); - } else { - if (i == 1) { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_MINIFY_MIPMAP]); - } + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, render_uniform_set, 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(depth_mipmaps[i - 1]), 0); - } - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(depth_mipmaps[i]), 1); + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_uniform_set_from_image(p_destination), 1); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.minify_push_constant, sizeof(SSAOMinifyPushConstant)); - // shrink after set - ssao.minify_push_constant.source_size[0] = MAX(1, ssao.minify_push_constant.source_size[0] >> 1); - ssao.minify_push_constant.source_size[1] = MAX(1, ssao.minify_push_constant.source_size[1] >> 1); + int x_groups = (p_width + 7) / 8; + int y_groups = (p_height + 7) / 8; + int z_groups = p_depth; - int x_groups = (ssao.minify_push_constant.source_size[0] - 1) / 8 + 1; - int y_groups = (ssao.minify_push_constant.source_size[1] - 1) / 8 + 1; + if (z_groups == 1) { + x_groups = (p_width + 15) / 16; + y_groups = (p_height + 15) / 16; + } - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); - RD::get_singleton()->compute_list_add_barrier(compute_list); + RD::get_singleton()->compute_list_set_push_constant(p_compute_list, &ssao.ssao_render_push_constant, sizeof(SSAORenderPushConstant)); + + RD::get_singleton()->compute_list_dispatch(p_compute_list, x_groups, y_groups, z_groups); +} + +void RasterizerEffectsRD::_upsample_ssao(RD::ComputeListID p_compute_list, RID p_destination, RID p_hi_res_depth, RID p_lo_res_depth, RID p_interleaved_ao, RID p_high_quality_ao, RID p_hi_res_ao, int p_low_width, int p_low_height, int p_high_width, int p_high_height, int p_screen_width, float p_noise_tolerance, float p_blur_tolerance, float p_upscale_tolerance) { + SSAOMode pipeline = SSAO_MAX; + if (p_hi_res_ao == RID()) { + pipeline = p_high_quality_ao == RID() ? SSAO_BLUR_UPSCALE : SSAO_BLUR_UPSCALE_MIN; + } else { + pipeline = p_high_quality_ao == RID() ? SSAO_BLUR_UPSCALE_BLEND : SSAO_BLUR_UPSCALE_MIN_BLEND; } - /* SECOND PASS */ - // Gather samples + RD::get_singleton()->compute_list_bind_compute_pipeline(p_compute_list, ssao.pipelines[pipeline]); + + float blur_tolerance = 1.0f - powf(10.0f, p_blur_tolerance) * float(p_screen_width) / float(p_low_width); + blur_tolerance *= blur_tolerance; + float upsample_tolerance = powf(10.0f, p_upscale_tolerance); + float noise_filter_weight = 1.0f / (powf(10.0f, p_noise_tolerance) + upsample_tolerance); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[(SSAO_GATHER_LOW + p_quality) + (p_half_size ? 4 : 0)]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_mipmaps_texture), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_ao1), 1); - if (!p_half_size) { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_buffer), 2); + ssao.upsample_push_constant.inv_low_resolution[0] = 1.0 / float(p_low_width); + ssao.upsample_push_constant.inv_low_resolution[1] = 1.0 / float(p_low_height); + ssao.upsample_push_constant.inv_high_resolution[0] = 1.0 / float(p_high_width); + ssao.upsample_push_constant.inv_high_resolution[1] = 1.0 / float(p_high_height); + ssao.upsample_push_constant.noise_filter_strength = noise_filter_weight; + ssao.upsample_push_constant.step_size = float(p_screen_width) / float(p_low_width); + ssao.upsample_push_constant.blur_tolerance = blur_tolerance; + ssao.upsample_push_constant.upsample_tolerance = upsample_tolerance; + + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_compute_uniform_set_from_texture(p_lo_res_depth), 0); + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_compute_uniform_set_from_texture(p_hi_res_depth), 1); + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_compute_uniform_set_from_texture(p_interleaved_ao), 2); + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_uniform_set_from_image(p_destination), 3); + + if (p_high_quality_ao != RID()) { + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_compute_uniform_set_from_texture(p_high_quality_ao), 4); } - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_normal_buffer), 3); - ssao.gather_push_constant.screen_size[0] = p_depth_buffer_size.x; - ssao.gather_push_constant.screen_size[1] = p_depth_buffer_size.y; - if (p_half_size) { - ssao.gather_push_constant.screen_size[0] >>= 1; - ssao.gather_push_constant.screen_size[1] >>= 1; + if (p_hi_res_ao != RID()) { + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_compute_uniform_set_from_texture(p_hi_res_ao), 5); } - ssao.gather_push_constant.z_far = p_projection.get_z_far(); - ssao.gather_push_constant.z_near = p_projection.get_z_near(); - ssao.gather_push_constant.orthogonal = p_projection.is_orthogonal(); - ssao.gather_push_constant.proj_info[0] = -2.0f / (ssao.gather_push_constant.screen_size[0] * p_projection.matrix[0][0]); - ssao.gather_push_constant.proj_info[1] = -2.0f / (ssao.gather_push_constant.screen_size[1] * p_projection.matrix[1][1]); - ssao.gather_push_constant.proj_info[2] = (1.0f - p_projection.matrix[0][2]) / p_projection.matrix[0][0]; - ssao.gather_push_constant.proj_info[3] = (1.0f + p_projection.matrix[1][2]) / p_projection.matrix[1][1]; - //ssao.gather_push_constant.proj_info[2] = (1.0f - p_projection.matrix[0][2]) / p_projection.matrix[0][0]; - //ssao.gather_push_constant.proj_info[3] = -(1.0f + p_projection.matrix[1][2]) / p_projection.matrix[1][1]; + int x_groups = ((p_high_width + 17) / 16); + int y_groups = ((p_high_height + 17) / 16); - ssao.gather_push_constant.radius = p_radius; + RD::get_singleton()->compute_list_set_push_constant(p_compute_list, &ssao.upsample_push_constant, sizeof(SSAOUpsamplePushConstant)); + + RD::get_singleton()->compute_list_dispatch(p_compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_add_barrier(p_compute_list); +} - ssao.gather_push_constant.proj_scale = float(p_projection.get_pixels_per_meter(ssao.gather_push_constant.screen_size[0])); - ssao.gather_push_constant.bias = p_bias; - ssao.gather_push_constant.intensity_div_r6 = p_intensity / pow(p_radius, 6.0f); +// Implementation comes from Microsofts DirectX samples miniengine here https://github.com/microsoft/DirectX-Graphics-Samples/blob/master/MiniEngine/Core/SSAO.cpp +void RasterizerEffectsRD::generate_ssao(RID p_depth_buffer, const Size2i &p_depth_buffer_size, const Vector<RID> &depth_mipmaps, RID p_linear_z, const Vector<RID> &p_tiled_depth_mipmaps, const Vector<RID> &p_ao_slices, const Vector<RID> &p_high_quality_ao_slices, const Vector<RID> &p_filtered_ao_slices, RID p_ao_full, const CameraMatrix &p_projection, float p_noise_tolerance, float p_blur_tolerance, float p_upsample_tolerance, float p_rejection_radius, float p_intensity, int p_levels, RS::EnvironmentSSAOQuality p_quality, bool p_full_samples) { + ssao.downsample1_push_constant.orthogonal = p_projection.is_orthogonal(); + ssao.downsample1_push_constant.z_near = p_projection.get_z_near(); + ssao.downsample1_push_constant.z_far = p_projection.get_z_far(); + + const int buffer_width1 = (p_depth_buffer_size.x + 1) / 2; + const int buffer_width2 = (p_depth_buffer_size.x + 3) / 4; + const int buffer_width3 = (p_depth_buffer_size.x + 7) / 8; + const int buffer_width4 = (p_depth_buffer_size.x + 15) / 16; + const int buffer_width5 = (p_depth_buffer_size.x + 31) / 32; + const int buffer_width6 = (p_depth_buffer_size.x + 63) / 64; + const int buffer_height1 = (p_depth_buffer_size.y + 1) / 2; + const int buffer_height2 = (p_depth_buffer_size.y + 3) / 4; + const int buffer_height3 = (p_depth_buffer_size.y + 7) / 8; + const int buffer_height4 = (p_depth_buffer_size.y + 15) / 16; + const int buffer_height5 = (p_depth_buffer_size.y + 31) / 32; + const int buffer_height6 = (p_depth_buffer_size.y + 63) / 64; - ssao.gather_push_constant.pixel_size[0] = 1.0 / p_depth_buffer_size.x; - ssao.gather_push_constant.pixel_size[1] = 1.0 / p_depth_buffer_size.y; + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.gather_push_constant, sizeof(SSAOGatherPushConstant)); + /* FIRST PASS */ + // Downsample the depth buffer. + { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_DOWNSAMPLE1]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_buffer), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_linear_z), 1); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(depth_mipmaps[0]), 2); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_tiled_depth_mipmaps[0]), 3); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(depth_mipmaps[1]), 4); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_tiled_depth_mipmaps[1]), 5); - int x_groups = (ssao.gather_push_constant.screen_size[0] - 1) / 8 + 1; - int y_groups = (ssao.gather_push_constant.screen_size[1] - 1) / 8 + 1; + int x_groups = (buffer_width4 * 8 + 7) / 8; + int y_groups = (buffer_height4 * 8 + 7) / 8; - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); - RD::get_singleton()->compute_list_add_barrier(compute_list); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.downsample1_push_constant, sizeof(SSAODownsample1PushConstant)); - /* THIRD PASS */ - // Blur horizontal - - ssao.blur_push_constant.edge_sharpness = p_edge_sharpness; - ssao.blur_push_constant.filter_scale = p_blur; - ssao.blur_push_constant.screen_size[0] = ssao.gather_push_constant.screen_size[0]; - ssao.blur_push_constant.screen_size[1] = ssao.gather_push_constant.screen_size[1]; - ssao.blur_push_constant.z_far = p_projection.get_z_far(); - ssao.blur_push_constant.z_near = p_projection.get_z_near(); - ssao.blur_push_constant.orthogonal = p_projection.is_orthogonal(); - ssao.blur_push_constant.axis[0] = 1; - ssao.blur_push_constant.axis[1] = 0; - - if (p_blur != RS::ENV_SSAO_BLUR_DISABLED) { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[p_half_size ? SSAO_BLUR_PASS_HALF : SSAO_BLUR_PASS]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao1), 0); - if (p_half_size) { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_mipmaps_texture), 1); - } else { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_buffer), 1); - } - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_ao2), 3); + RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + } + if (p_levels > 2) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_DOWNSAMPLE2]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(depth_mipmaps[1]), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(depth_mipmaps[2]), 1); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_tiled_depth_mipmaps[2]), 2); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(depth_mipmaps[3]), 3); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_tiled_depth_mipmaps[3]), 4); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.blur_push_constant, sizeof(SSAOBlurPushConstant)); + int x_groups = (buffer_width6 * 8 + 7) / 8; + int y_groups = (buffer_height6 * 8 + 7) / 8; RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); + } - /* THIRD PASS */ - // Blur vertical + /* SECOND PASS */ + // compute AO for each level used - ssao.blur_push_constant.axis[0] = 0; - ssao.blur_push_constant.axis[1] = 1; + { + const float fov_tangent = 0.5 / p_projection.matrix[0][0]; + + if (p_levels > 3) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_RENDER1]); + _compute_ssao(compute_list, p_filtered_ao_slices[3], p_tiled_depth_mipmaps[3], fov_tangent, buffer_width6, buffer_height6, 16, p_rejection_radius, p_intensity, p_full_samples); + if (p_quality >= RS::ENV_SSAO_QUALITY_LOW) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_RENDER2]); + _compute_ssao(compute_list, p_high_quality_ao_slices[3], depth_mipmaps[3], fov_tangent, buffer_width4, buffer_height4, 1, p_rejection_radius, p_intensity, p_full_samples); + } + } + if (p_levels > 2) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_RENDER1]); + _compute_ssao(compute_list, p_filtered_ao_slices[2], p_tiled_depth_mipmaps[2], fov_tangent, buffer_width5, buffer_height5, 16, p_rejection_radius, p_intensity, p_full_samples); + if (p_quality >= RS::ENV_SSAO_QUALITY_MEDIUM) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_RENDER2]); + _compute_ssao(compute_list, p_high_quality_ao_slices[2], depth_mipmaps[2], fov_tangent, buffer_width3, buffer_height3, 1, p_rejection_radius, p_intensity, p_full_samples); + } + } + if (p_levels > 1) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_RENDER1]); + _compute_ssao(compute_list, p_filtered_ao_slices[1], p_tiled_depth_mipmaps[1], fov_tangent, buffer_width4, buffer_height4, 16, p_rejection_radius, p_intensity, p_full_samples); + if (p_quality >= RS::ENV_SSAO_QUALITY_HIGH) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_RENDER2]); + _compute_ssao(compute_list, p_high_quality_ao_slices[1], depth_mipmaps[1], fov_tangent, buffer_width2, buffer_height2, 1, p_rejection_radius, p_intensity, p_full_samples); + } + } + { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_RENDER1]); + _compute_ssao(compute_list, p_filtered_ao_slices[0], p_tiled_depth_mipmaps[0], fov_tangent, buffer_width3, buffer_height3, 16, p_rejection_radius, p_intensity, p_full_samples); + if (p_quality >= RS::ENV_SSAO_QUALITY_ULTRA) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_RENDER2]); + _compute_ssao(compute_list, p_high_quality_ao_slices[0], depth_mipmaps[0], fov_tangent, buffer_width1, buffer_height1, 1, p_rejection_radius, p_intensity, p_full_samples); + } + } + } + RD::get_singleton()->compute_list_add_barrier(compute_list); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao2), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_ao1), 3); + /* THIRD PASS */ + // blend and upsample levels for final result - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.blur_push_constant, sizeof(SSAOBlurPushConstant)); + { + RID NextSRV = p_filtered_ao_slices[3]; - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); - } - if (p_half_size) { //must upscale + if (p_levels > 3) { + _upsample_ssao(compute_list, p_ao_slices[2], depth_mipmaps[2], depth_mipmaps[3], NextSRV, p_quality >= RS::ENV_SSAO_QUALITY_LOW ? p_high_quality_ao_slices[3] : RID(), + p_filtered_ao_slices[2], buffer_width4, buffer_height4, buffer_width3, buffer_height3, p_depth_buffer_size.x, + p_noise_tolerance, p_blur_tolerance, p_upsample_tolerance); - /* FOURTH PASS */ - // upscale if half size - //back to full size - ssao.blur_push_constant.screen_size[0] = p_depth_buffer_size.x; - ssao.blur_push_constant.screen_size[1] = p_depth_buffer_size.y; + NextSRV = p_ao_slices[2]; + } else { + NextSRV = p_filtered_ao_slices[2]; + } - RD::get_singleton()->compute_list_add_barrier(compute_list); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_BLUR_UPSCALE]); + if (p_levels > 2) { + _upsample_ssao(compute_list, p_ao_slices[1], depth_mipmaps[1], depth_mipmaps[2], NextSRV, p_quality >= RS::ENV_SSAO_QUALITY_MEDIUM ? p_high_quality_ao_slices[2] : RID(), + p_filtered_ao_slices[1], buffer_width3, buffer_height3, buffer_width2, buffer_height2, p_depth_buffer_size.x, + p_noise_tolerance, p_blur_tolerance, p_upsample_tolerance); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao1), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_upscale_buffer), 3); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_buffer), 1); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_mipmaps_texture), 2); + NextSRV = p_ao_slices[1]; + } else { + NextSRV = p_filtered_ao_slices[1]; + } - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.blur_push_constant, sizeof(SSAOBlurPushConstant)); //not used but set anyway + if (p_levels > 1) { + _upsample_ssao(compute_list, p_ao_slices[0], depth_mipmaps[0], depth_mipmaps[1], NextSRV, p_quality >= RS::ENV_SSAO_QUALITY_HIGH ? p_high_quality_ao_slices[1] : RID(), + p_filtered_ao_slices[0], buffer_width2, buffer_height2, buffer_width1, buffer_height1, p_depth_buffer_size.x, + p_noise_tolerance, p_blur_tolerance, p_upsample_tolerance); - x_groups = (p_depth_buffer_size.x - 1) / 8 + 1; - y_groups = (p_depth_buffer_size.y - 1) / 8 + 1; + NextSRV = p_ao_slices[0]; + } else { + NextSRV = p_filtered_ao_slices[0]; + } - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + _upsample_ssao(compute_list, p_ao_full, p_linear_z, depth_mipmaps[0], NextSRV, p_quality >= RS::ENV_SSAO_QUALITY_ULTRA ? p_high_quality_ao_slices[0] : RID(), + RID(), buffer_width1, buffer_height1, p_depth_buffer_size.x, p_depth_buffer_size.y, p_depth_buffer_size.x, + p_noise_tolerance, p_blur_tolerance, p_upsample_tolerance); } RD::get_singleton()->compute_list_end(); @@ -1456,54 +1632,67 @@ RasterizerEffectsRD::RasterizerEffectsRD() { } { + RD::SamplerState ssao_sampler; + ssao_sampler.mag_filter = RD::SAMPLER_FILTER_LINEAR; + ssao_sampler.min_filter = RD::SAMPLER_FILTER_LINEAR; + ssao_sampler.max_lod = 0; + ssao_sampler.border_color = RD::SAMPLER_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + ssao_sampler.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_BORDER; + ssao_sampler.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_BORDER; + ssao_sampler.repeat_w = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_BORDER; + + ssao.render_sampler = RD::get_singleton()->sampler_create(ssao_sampler); // Initialize ssao uint32_t pipeline = 0; { Vector<String> ssao_modes; - ssao_modes.push_back("\n#define MINIFY_START\n"); ssao_modes.push_back("\n"); - ssao.minify_shader.initialize(ssao_modes); + ssao.downsample1_shader.initialize(ssao_modes); - ssao.minify_shader_version = ssao.minify_shader.version_create(); + ssao.downsample1_shader_version = ssao.downsample1_shader.version_create(); - for (int i = 0; i <= SSAO_MINIFY_MIPMAP; i++) { - ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.minify_shader.version_get_shader(ssao.minify_shader_version, i)); - pipeline++; - } + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.downsample1_shader.version_get_shader(ssao.downsample1_shader_version, 0)); + pipeline++; + } + { + Vector<String> ssao_modes; + ssao_modes.push_back("\n"); + + ssao.downsample2_shader.initialize(ssao_modes); + + ssao.downsample2_shader_version = ssao.downsample2_shader.version_create(); + + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.downsample2_shader.version_get_shader(ssao.downsample2_shader_version, 0)); + pipeline++; } { Vector<String> ssao_modes; - ssao_modes.push_back("\n#define SSAO_QUALITY_LOW\n"); + ssao_modes.push_back("\n#define INTERLEAVE_RESULT\n"); ssao_modes.push_back("\n"); - ssao_modes.push_back("\n#define SSAO_QUALITY_HIGH\n"); - ssao_modes.push_back("\n#define SSAO_QUALITY_ULTRA\n"); - ssao_modes.push_back("\n#define SSAO_QUALITY_LOW\n#define USE_HALF_SIZE\n"); - ssao_modes.push_back("\n#define USE_HALF_SIZE\n"); - ssao_modes.push_back("\n#define SSAO_QUALITY_HIGH\n#define USE_HALF_SIZE\n"); - ssao_modes.push_back("\n#define SSAO_QUALITY_ULTRA\n#define USE_HALF_SIZE\n"); - ssao.gather_shader.initialize(ssao_modes); + ssao.ssao_render_shader.initialize(ssao_modes); - ssao.gather_shader_version = ssao.gather_shader.version_create(); + ssao.ssao_render_shader_version = ssao.ssao_render_shader.version_create(); - for (int i = SSAO_GATHER_LOW; i <= SSAO_GATHER_ULTRA_HALF; i++) { - ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.gather_shader.version_get_shader(ssao.gather_shader_version, i - SSAO_GATHER_LOW)); + for (int i = SSAO_RENDER1; i <= SSAO_RENDER2; i++) { + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.ssao_render_shader.version_get_shader(ssao.ssao_render_shader_version, i - SSAO_RENDER1)); pipeline++; } } { Vector<String> ssao_modes; - ssao_modes.push_back("\n#define MODE_FULL_SIZE\n"); ssao_modes.push_back("\n"); - ssao_modes.push_back("\n#define MODE_UPSCALE\n"); + ssao_modes.push_back("\n#define COMBINE_LOWER_RESOLUTIONS\n"); + ssao_modes.push_back("\n#define BLEND_WITH_HIGHER_RESOLUTION\n"); + ssao_modes.push_back("\n#define COMBINE_LOWER_RESOLUTIONS\n#define BLEND_WITH_HIGHER_RESOLUTION\n"); - ssao.blur_shader.initialize(ssao_modes); + ssao.upsample_shader.initialize(ssao_modes); - ssao.blur_shader_version = ssao.blur_shader.version_create(); + ssao.upsample_shader_version = ssao.upsample_shader.version_create(); - for (int i = SSAO_BLUR_PASS; i <= SSAO_BLUR_UPSCALE; i++) { - ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.blur_shader.version_get_shader(ssao.blur_shader_version, i - SSAO_BLUR_PASS)); + for (int i = SSAO_BLUR_UPSCALE; i <= SSAO_BLUR_UPSCALE_MIN_BLEND; i++) { + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.upsample_shader.version_get_shader(ssao.upsample_shader_version, i - SSAO_BLUR_UPSCALE)); pipeline++; } @@ -1745,6 +1934,7 @@ RasterizerEffectsRD::~RasterizerEffectsRD() { RD::get_singleton()->free(default_sampler); RD::get_singleton()->free(default_mipmap_sampler); + RD::get_singleton()->free(ssao.render_sampler); RD::get_singleton()->free(index_buffer); //array gets freed as dependency RD::get_singleton()->free(filter.coefficient_buffer); @@ -1760,9 +1950,10 @@ RasterizerEffectsRD::~RasterizerEffectsRD() { roughness_limiter.shader.version_free(roughness_limiter.shader_version); sort.shader.version_free(sort.shader_version); specular_merge.shader.version_free(specular_merge.shader_version); - ssao.blur_shader.version_free(ssao.blur_shader_version); - ssao.gather_shader.version_free(ssao.gather_shader_version); - ssao.minify_shader.version_free(ssao.minify_shader_version); + ssao.upsample_shader.version_free(ssao.upsample_shader_version); + ssao.ssao_render_shader.version_free(ssao.ssao_render_shader_version); + ssao.downsample1_shader.version_free(ssao.downsample1_shader_version); + ssao.downsample2_shader.version_free(ssao.downsample2_shader_version); ssr.shader.version_free(ssr.shader_version); ssr_filter.shader.version_free(ssr_filter.shader_version); ssr_scale.shader.version_free(ssr_scale.shader_version); diff --git a/servers/rendering/rasterizer_rd/rasterizer_effects_rd.h b/servers/rendering/rasterizer_rd/rasterizer_effects_rd.h index e434bbc372..2b3d2f124b 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_effects_rd.h +++ b/servers/rendering/rasterizer_rd/rasterizer_effects_rd.h @@ -49,9 +49,10 @@ #include "servers/rendering/rasterizer_rd/shaders/shadow_reduce.glsl.gen.h" #include "servers/rendering/rasterizer_rd/shaders/sort.glsl.gen.h" #include "servers/rendering/rasterizer_rd/shaders/specular_merge.glsl.gen.h" -#include "servers/rendering/rasterizer_rd/shaders/ssao.glsl.gen.h" -#include "servers/rendering/rasterizer_rd/shaders/ssao_blur.glsl.gen.h" -#include "servers/rendering/rasterizer_rd/shaders/ssao_minify.glsl.gen.h" +#include "servers/rendering/rasterizer_rd/shaders/ssao_downsample1.glsl.gen.h" +#include "servers/rendering/rasterizer_rd/shaders/ssao_downsample2.glsl.gen.h" +#include "servers/rendering/rasterizer_rd/shaders/ssao_render.glsl.gen.h" +#include "servers/rendering/rasterizer_rd/shaders/ssao_upsample.glsl.gen.h" #include "servers/rendering/rasterizer_rd/shaders/subsurface_scattering.glsl.gen.h" #include "servers/rendering/rasterizer_rd/shaders/tonemap.glsl.gen.h" @@ -279,72 +280,60 @@ class RasterizerEffectsRD { } bokeh; enum SSAOMode { - SSAO_MINIFY_FIRST, - SSAO_MINIFY_MIPMAP, - SSAO_GATHER_LOW, - SSAO_GATHER_MEDIUM, - SSAO_GATHER_HIGH, - SSAO_GATHER_ULTRA, - SSAO_GATHER_LOW_HALF, - SSAO_GATHER_MEDIUM_HALF, - SSAO_GATHER_HIGH_HALF, - SSAO_GATHER_ULTRA_HALF, - SSAO_BLUR_PASS, - SSAO_BLUR_PASS_HALF, + SSAO_DOWNSAMPLE1, + SSAO_DOWNSAMPLE2, + SSAO_RENDER1, + SSAO_RENDER2, SSAO_BLUR_UPSCALE, + SSAO_BLUR_UPSCALE_MIN, + SSAO_BLUR_UPSCALE_BLEND, + SSAO_BLUR_UPSCALE_MIN_BLEND, SSAO_MAX }; - struct SSAOMinifyPushConstant { - float pixel_size[2]; + struct SSAODownsample1PushConstant { float z_far; float z_near; - int32_t source_size[2]; uint32_t orthogonal; uint32_t pad; }; - struct SSAOGatherPushConstant { - int32_t screen_size[2]; - float z_far; - float z_near; - - uint32_t orthogonal; - float intensity_div_r6; - float radius; - float bias; - - float proj_info[4]; - float pixel_size[2]; - float proj_scale; - uint32_t pad; + struct SSAORenderPushConstant { + float inv_thickness_table[12]; + float sample_weight_table[12]; + float texel_size[2]; + float rejection_fadeoff; + float intensity; }; - struct SSAOBlurPushConstant { - float edge_sharpness; - int32_t filter_scale; - float z_far; - float z_near; - uint32_t orthogonal; - uint32_t pad[3]; - int32_t axis[2]; - int32_t screen_size[2]; + struct SSAOUpsamplePushConstant { + float inv_low_resolution[2]; + float inv_high_resolution[2]; + float noise_filter_strength; + float step_size; + float blur_tolerance; + float upsample_tolerance; }; struct SSAO { - SSAOMinifyPushConstant minify_push_constant; - SsaoMinifyShaderRD minify_shader; - RID minify_shader_version; + SSAODownsample1PushConstant downsample1_push_constant; + SsaoDownsample1ShaderRD downsample1_shader; + RID downsample1_shader_version; + + SsaoDownsample2ShaderRD downsample2_shader; + RID downsample2_shader_version; - SSAOGatherPushConstant gather_push_constant; - SsaoShaderRD gather_shader; - RID gather_shader_version; + SSAORenderPushConstant ssao_render_push_constant; + SsaoRenderShaderRD ssao_render_shader; + RID ssao_render_shader_version; - SSAOBlurPushConstant blur_push_constant; - SsaoBlurShaderRD blur_shader; - RID blur_shader_version; + SSAOUpsamplePushConstant upsample_push_constant; + SsaoUpsampleShaderRD upsample_shader; + RID upsample_shader_version; RID pipelines[SSAO_MAX]; + RID render_sampler; + Map<RID, RID> render_uniform_set_cache; } ssao; struct RoughnessLimiterPushConstant { @@ -654,7 +643,9 @@ public: void tonemapper(RID p_source_color, RID p_dst_framebuffer, const TonemapSettings &p_settings); - void generate_ssao(RID p_depth_buffer, RID p_normal_buffer, const Size2i &p_depth_buffer_size, RID p_depth_mipmaps_texture, const Vector<RID> &depth_mipmaps, RID p_ao1, bool p_half_size, RID p_ao2, RID p_upscale_buffer, float p_intensity, float p_radius, float p_bias, const CameraMatrix &p_projection, RS::EnvironmentSSAOQuality p_quality, RS::EnvironmentSSAOBlur p_blur, float p_edge_sharpness); + _FORCE_INLINE_ void _compute_ssao(RD::ComputeListID p_compute_list, RID p_destination, RID p_depth_buffer, const float p_tan_half_fov_h, int p_width, int p_height, int p_depth, float p_rejection_radius, float p_intensity, bool p_full_samples); + _FORCE_INLINE_ void _upsample_ssao(RD::ComputeListID p_compute_list, RID p_destination, RID p_hi_res_depth, RID p_lo_res_depth, RID p_interleaved_ao, RID p_high_quality_ao, RID p_hi_res_ao, int p_low_width, int p_low_height, int p_high_width, int p_high_high, int p_screen_width, float p_noise_tolerance, float p_blur_tolerance, float p_upscale_tolerance); + void generate_ssao(RID p_depth_buffer, const Size2i &p_depth_buffer_size, const Vector<RID> &depth_mipmaps, RID p_linear_z, const Vector<RID> &p_tiled_depth_mipmaps, const Vector<RID> &p_ao_slices, const Vector<RID> &p_high_quality_ao_slices, const Vector<RID> &p_filtered_ao_slices, RID p_ao_full, const CameraMatrix &p_projection, float p_noise_tolerance, float p_blur_tolerance, float p_upsample_tolerance, float p_rejection_radius, float p_intensity, int p_levels, RS::EnvironmentSSAOQuality p_quality, bool p_full_samples); void roughness_limit(RID p_source_normal, RID p_roughness, const Size2i &p_size, float p_curve); void cubemap_downsample(RID p_source_cubemap, RID p_dest_cubemap, const Size2i &p_size); diff --git a/servers/rendering/rasterizer_rd/rasterizer_scene_rd.cpp b/servers/rendering/rasterizer_rd/rasterizer_scene_rd.cpp index ce823b7198..3afb82d168 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_scene_rd.cpp +++ b/servers/rendering/rasterizer_rd/rasterizer_scene_rd.cpp @@ -3099,22 +3099,25 @@ RS::EnvironmentSSRRoughnessQuality RasterizerSceneRD::environment_get_ssr_roughn return ssr_roughness_quality; } -void RasterizerSceneRD::environment_set_ssao(RID p_env, bool p_enable, float p_radius, float p_intensity, float p_bias, float p_light_affect, float p_ao_channel_affect, RS::EnvironmentSSAOBlur p_blur, float p_bilateral_sharpness) { +void RasterizerSceneRD::environment_set_ssao(RID p_env, bool p_enable, float p_rejection_radius, float p_intensity, int p_levels, float p_light_affect, float p_ao_channel_affect) { Environment *env = environment_owner.getornull(p_env); ERR_FAIL_COND(!env); env->ssao_enabled = p_enable; - env->ssao_radius = p_radius; + + env->ssao_rejection_radius = p_rejection_radius; env->ssao_intensity = p_intensity; - env->ssao_bias = p_bias; + env->ssao_levels = p_levels; env->ssao_direct_light_affect = p_light_affect; env->ssao_ao_channel_affect = p_ao_channel_affect; - env->ssao_blur = p_blur; } -void RasterizerSceneRD::environment_set_ssao_quality(RS::EnvironmentSSAOQuality p_quality, bool p_half_size) { +void RasterizerSceneRD::environment_set_ssao_settings(RS::EnvironmentSSAOQuality p_quality, bool p_full_samples, float p_noise_tolerance, float p_blur_tolerance, float p_upsample_tolerance) { ssao_quality = p_quality; - ssao_half_size = p_half_size; + ssao_full_samples = p_full_samples; + ssao_noise_tolerance = Math::lerp(-8.0f, 0.0f, p_noise_tolerance); + ssao_blur_tolerance = Math::lerp(-8.0f, -1.0f, p_blur_tolerance); + ssao_upsample_tolerance = Math::lerp(-12.0f, -1.0f, p_upsample_tolerance); } bool RasterizerSceneRD::environment_is_ssao_enabled(RID p_env) const { @@ -5034,21 +5037,33 @@ void RasterizerSceneRD::_free_render_buffer_data(RenderBuffers *rb) { rb->luminance.current = RID(); } - if (rb->ssao.ao[0].is_valid()) { - RD::get_singleton()->free(rb->ssao.depth); - RD::get_singleton()->free(rb->ssao.ao[0]); - if (rb->ssao.ao[1].is_valid()) { - RD::get_singleton()->free(rb->ssao.ao[1]); + if (rb->ssao.ao_full.is_valid()) { + RD::get_singleton()->free(rb->ssao.ao_full); + RD::get_singleton()->free(rb->ssao.linear_depth); + rb->ssao.ao_full = RID(); + rb->ssao.linear_depth = RID(); + + for (int i = 0; i < rb->ssao.depth_slices.size(); i++) { + RD::get_singleton()->free(rb->ssao.depth_slices[i]); + } + for (int i = 0; i < rb->ssao.depth_tiled_slices.size(); i++) { + RD::get_singleton()->free(rb->ssao.depth_tiled_slices[i]); + } + for (int i = 0; i < rb->ssao.ao_slices.size(); i++) { + RD::get_singleton()->free(rb->ssao.ao_slices[i]); } - if (rb->ssao.ao_full.is_valid()) { - RD::get_singleton()->free(rb->ssao.ao_full); + for (int i = 0; i < rb->ssao.filtered_ao_slices.size(); i++) { + RD::get_singleton()->free(rb->ssao.filtered_ao_slices[i]); + } + for (int i = 0; i < rb->ssao.high_quality_ao_slices.size(); i++) { + RD::get_singleton()->free(rb->ssao.high_quality_ao_slices[i]); } - rb->ssao.depth = RID(); - rb->ssao.ao[0] = RID(); - rb->ssao.ao[1] = RID(); - rb->ssao.ao_full = RID(); rb->ssao.depth_slices.clear(); + rb->ssao.depth_tiled_slices.clear(); + rb->ssao.ao_slices.clear(); + rb->ssao.filtered_ao_slices.clear(); + rb->ssao.high_quality_ao_slices.clear(); } if (rb->ssr.blur_radius[0].is_valid()) { @@ -5147,64 +5162,117 @@ void RasterizerSceneRD::_process_ssao(RID p_render_buffers, RID p_environment, R RENDER_TIMESTAMP("Process SSAO"); - if (rb->ssao.ao[0].is_valid() && rb->ssao.ao_full.is_valid() != ssao_half_size) { - RD::get_singleton()->free(rb->ssao.depth); - RD::get_singleton()->free(rb->ssao.ao[0]); - if (rb->ssao.ao[1].is_valid()) { - RD::get_singleton()->free(rb->ssao.ao[1]); + int size_x = rb->width; + int size_y = rb->height; + const int buffer_widths[6] = { + (size_x + 1) / 2, + (size_x + 3) / 4, + (size_x + 7) / 8, + (size_x + 15) / 16, + (size_x + 31) / 32, + (size_x + 63) / 64 + }; + const int buffer_heights[6] = { + (size_y + 1) / 2, + (size_y + 3) / 4, + (size_y + 7) / 8, + (size_y + 15) / 16, + (size_y + 31) / 32, + (size_y + 63) / 64 + }; + + if (!rb->ssao.ao_full.is_valid()) { + //allocate SSAO buffers + + { + for (uint32_t i = 0; i < 4; i++) { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R32_SFLOAT; + tf.width = buffer_widths[i]; + tf.height = buffer_heights[i]; + tf.mipmaps = 1; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + RID slice = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->ssao.depth_slices.push_back(slice); + } } - if (rb->ssao.ao_full.is_valid()) { - RD::get_singleton()->free(rb->ssao.ao_full); + + { + for (uint32_t i = 2; i < 6; i++) { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R16_SFLOAT; + tf.type = RD::TEXTURE_TYPE_2D_ARRAY; + tf.array_layers = 16; + tf.width = buffer_widths[i]; + tf.height = buffer_heights[i]; + tf.mipmaps = 1; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + RID slice = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->ssao.depth_tiled_slices.push_back(slice); + } } - rb->ssao.depth = RID(); - rb->ssao.ao[0] = RID(); - rb->ssao.ao[1] = RID(); - rb->ssao.ao_full = RID(); - rb->ssao.depth_slices.clear(); - } + { + for (uint32_t i = 0; i < 3; i++) { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8_UNORM; + tf.width = buffer_widths[i]; + tf.height = buffer_heights[i]; + tf.mipmaps = 1; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + RID slice = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->ssao.ao_slices.push_back(slice); + } + } - if (!rb->ssao.ao[0].is_valid()) { - //allocate depth slices + { + for (uint32_t i = 0; i < 4; i++) { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8_UNORM; + tf.width = buffer_widths[i]; + tf.height = buffer_heights[i]; + tf.mipmaps = 1; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + RID slice = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->ssao.high_quality_ao_slices.push_back(slice); + } + } { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R32_SFLOAT; - tf.width = rb->width / 2; - tf.height = rb->height / 2; - tf.mipmaps = Image::get_image_required_mipmaps(tf.width, tf.height, Image::FORMAT_RF) + 1; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - rb->ssao.depth = RD::get_singleton()->texture_create(tf, RD::TextureView()); - for (uint32_t i = 0; i < tf.mipmaps; i++) { - RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ssao.depth, 0, i); - rb->ssao.depth_slices.push_back(slice); + for (uint32_t i = 0; i < 4; i++) { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8_UNORM; + tf.width = buffer_widths[i]; + tf.height = buffer_heights[i]; + tf.mipmaps = 1; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + RID slice = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->ssao.filtered_ao_slices.push_back(slice); } } { RD::TextureFormat tf; tf.format = RD::DATA_FORMAT_R8_UNORM; - tf.width = ssao_half_size ? rb->width / 2 : rb->width; - tf.height = ssao_half_size ? rb->height / 2 : rb->height; + tf.width = size_x; + tf.height = size_y; tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - rb->ssao.ao[0] = RD::get_singleton()->texture_create(tf, RD::TextureView()); - rb->ssao.ao[1] = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->ssao.ao_full = RD::get_singleton()->texture_create(tf, RD::TextureView()); } - if (ssao_half_size) { - //upsample texture + { RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R8_UNORM; - tf.width = rb->width; - tf.height = rb->height; + tf.format = RD::DATA_FORMAT_R16_UNORM; + tf.width = size_x; + tf.height = size_y; tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - rb->ssao.ao_full = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->ssao.linear_depth = RD::get_singleton()->texture_create(tf, RD::TextureView()); } _render_buffers_uniform_set_changed(p_render_buffers); } - storage->get_effects()->generate_ssao(rb->depth_texture, p_normal_buffer, Size2i(rb->width, rb->height), rb->ssao.depth, rb->ssao.depth_slices, rb->ssao.ao[0], rb->ssao.ao_full.is_valid(), rb->ssao.ao[1], rb->ssao.ao_full, env->ssao_intensity, env->ssao_radius, env->ssao_bias, p_projection, ssao_quality, env->ssao_blur, env->ssao_blur_edge_sharpness); + storage->get_effects()->generate_ssao(rb->depth_texture, Size2i(size_x, size_y), rb->ssao.depth_slices, rb->ssao.linear_depth, rb->ssao.depth_tiled_slices, rb->ssao.ao_slices, rb->ssao.high_quality_ao_slices, rb->ssao.filtered_ao_slices, rb->ssao.ao_full, p_projection, ssao_noise_tolerance, ssao_blur_tolerance, ssao_upsample_tolerance, env->ssao_rejection_radius, env->ssao_intensity, env->ssao_levels, ssao_quality, ssao_full_samples); } void RasterizerSceneRD::_render_buffers_post_process_and_tonemap(RID p_render_buffers, RID p_environment, RID p_camera_effects, const CameraMatrix &p_projection) { @@ -5371,9 +5439,9 @@ void RasterizerSceneRD::_render_buffers_debug_draw(RID p_render_buffers, RID p_s } } - if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_SSAO && rb->ssao.ao[0].is_valid()) { + if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_SSAO && rb->ssao.ao_full.is_valid()) { Size2 rtsize = storage->render_target_get_size(rb->render_target); - RID ao_buf = rb->ssao.ao_full.is_valid() ? rb->ssao.ao_full : rb->ssao.ao[0]; + RID ao_buf = rb->ssao.ao_full; effects->copy_to_fb_rect(ao_buf, storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), false, true); } @@ -5549,7 +5617,7 @@ RID RasterizerSceneRD::render_buffers_get_ao_texture(RID p_render_buffers) { RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND_V(!rb, RID()); - return rb->ssao.ao_full.is_valid() ? rb->ssao.ao_full : rb->ssao.ao[0]; + return rb->ssao.ao_full; } RID RasterizerSceneRD::render_buffers_get_gi_probe_buffer(RID p_render_buffers) { @@ -8334,7 +8402,7 @@ RasterizerSceneRD::RasterizerSceneRD(RasterizerStorageRD *p_storage) { camera_effects_set_dof_blur_bokeh_shape(RS::DOFBokehShape(int(GLOBAL_GET("rendering/quality/depth_of_field/depth_of_field_bokeh_shape")))); camera_effects_set_dof_blur_quality(RS::DOFBlurQuality(int(GLOBAL_GET("rendering/quality/depth_of_field/depth_of_field_bokeh_quality"))), GLOBAL_GET("rendering/quality/depth_of_field/depth_of_field_use_jitter")); - environment_set_ssao_quality(RS::EnvironmentSSAOQuality(int(GLOBAL_GET("rendering/quality/ssao/quality"))), GLOBAL_GET("rendering/quality/ssao/half_size")); + environment_set_ssao_settings(RS::EnvironmentSSAOQuality(int(GLOBAL_GET("rendering/ssao/quality"))), GLOBAL_GET("rendering/ssao/full_samples"), GLOBAL_GET("rendering/ssao/noise_tolerance"), GLOBAL_GET("rendering/ssao/blur_tolerance"), GLOBAL_GET("rendering/ssao/upsample_tolerance")); screen_space_roughness_limiter = GLOBAL_GET("rendering/quality/screen_filters/screen_space_roughness_limiter_enabled"); screen_space_roughness_limiter_amount = GLOBAL_GET("rendering/quality/screen_filters/screen_space_roughness_limiter_amount"); screen_space_roughness_limiter_limit = GLOBAL_GET("rendering/quality/screen_filters/screen_space_roughness_limiter_limit"); diff --git a/servers/rendering/rasterizer_rd/rasterizer_scene_rd.h b/servers/rendering/rasterizer_rd/rasterizer_scene_rd.h index 50647d54bf..c24f62d9de 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_scene_rd.h +++ b/servers/rendering/rasterizer_rd/rasterizer_scene_rd.h @@ -736,13 +736,11 @@ private: /// SSAO bool ssao_enabled = false; - float ssao_radius = 1; - float ssao_intensity = 1; - float ssao_bias = 0.01; + float ssao_rejection_radius = 2.5; + float ssao_intensity = 1.0; + int ssao_levels = 3; float ssao_direct_light_affect = 0.0; float ssao_ao_channel_affect = 0.0; - float ssao_blur_edge_sharpness = 4.0; - RS::EnvironmentSSAOBlur ssao_blur = RS::ENV_SSAO_BLUR_3x3; /// SSR /// @@ -766,7 +764,11 @@ private: }; RS::EnvironmentSSAOQuality ssao_quality = RS::ENV_SSAO_QUALITY_MEDIUM; - bool ssao_half_size = false; + bool ssao_full_samples = false; + float ssao_noise_tolerance = -3.0; + float ssao_blur_tolerance = -5.0; + float ssao_upsample_tolerance = -7.0; + bool glow_bicubic_upscale = false; bool glow_high_quality = false; RS::EnvironmentSSRRoughnessQuality ssr_roughness_quality = RS::ENV_SSR_ROUGNESS_QUALITY_LOW; @@ -848,10 +850,13 @@ private: } luminance; struct SSAO { - RID depth; Vector<RID> depth_slices; - RID ao[2]; - RID ao_full; //when using half-size + Vector<RID> depth_tiled_slices; + Vector<RID> filtered_ao_slices; + Vector<RID> ao_slices; + Vector<RID> high_quality_ao_slices; + RID linear_depth; + RID ao_full; } ssao; struct SSR { @@ -1554,8 +1559,8 @@ public: virtual void environment_set_volumetric_fog_positional_shadow_shrink_size(int p_shrink_size); void environment_set_ssr(RID p_env, bool p_enable, int p_max_steps, float p_fade_int, float p_fade_out, float p_depth_tolerance); - void environment_set_ssao(RID p_env, bool p_enable, float p_radius, float p_intensity, float p_bias, float p_light_affect, float p_ao_channel_affect, RS::EnvironmentSSAOBlur p_blur, float p_bilateral_sharpness); - void environment_set_ssao_quality(RS::EnvironmentSSAOQuality p_quality, bool p_half_size); + void environment_set_ssao(RID p_env, bool p_enable, float p_rejection_radius, float p_intensity, int p_levels, float p_light_affect, float p_ao_channel_affect); + void environment_set_ssao_settings(RS::EnvironmentSSAOQuality p_quality, bool p_full_samples, float p_noise_tolerance, float p_blur_tolerance, float p_upsample_tolerance); bool environment_is_ssao_enabled(RID p_env) const; float environment_get_ssao_ao_affect(RID p_env) const; float environment_get_ssao_light_affect(RID p_env) const; diff --git a/servers/rendering/rasterizer_rd/shaders/SCsub b/servers/rendering/rasterizer_rd/shaders/SCsub index 9d531d63ad..f9b8591307 100644 --- a/servers/rendering/rasterizer_rd/shaders/SCsub +++ b/servers/rendering/rasterizer_rd/shaders/SCsub @@ -19,9 +19,10 @@ if "RD_GLSL" in env["BUILDERS"]: env.RD_GLSL("giprobe_sdf.glsl") env.RD_GLSL("luminance_reduce.glsl") env.RD_GLSL("bokeh_dof.glsl") - env.RD_GLSL("ssao.glsl") - env.RD_GLSL("ssao_minify.glsl") - env.RD_GLSL("ssao_blur.glsl") + env.RD_GLSL("ssao_render.glsl") + env.RD_GLSL("ssao_downsample1.glsl") + env.RD_GLSL("ssao_downsample2.glsl") + env.RD_GLSL("ssao_upsample.glsl") env.RD_GLSL("roughness_limiter.glsl") env.RD_GLSL("screen_space_reflection.glsl") env.RD_GLSL("screen_space_reflection_filter.glsl") diff --git a/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl b/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl index ec199c0d0e..a16b14e551 100644 --- a/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl +++ b/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl @@ -2702,7 +2702,7 @@ FRAGMENT_SHADER_CODE #if defined(AO_USED) if (scene_data.ssao_enabled && scene_data.ssao_ao_affect > 0.0) { - float ssao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), screen_uv).r; + float ssao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), gl_FragCoord.xy * scene_data.screen_pixel_size).r; ao = mix(ao, min(ao, ssao), scene_data.ssao_ao_affect); ao_light_affect = mix(ao_light_affect, max(ao_light_affect, scene_data.ssao_light_affect), scene_data.ssao_ao_affect); } @@ -2714,7 +2714,7 @@ FRAGMENT_SHADER_CODE #else if (scene_data.ssao_enabled) { - float ao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), screen_uv).r; + float ao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), gl_FragCoord.xy * scene_data.screen_pixel_size).r; ambient_light = mix(scene_data.ao_color.rgb, ambient_light, ao); float ao_light_affect = mix(1.0, ao, scene_data.ssao_light_affect); specular_light = mix(scene_data.ao_color.rgb, specular_light, ao_light_affect); diff --git a/servers/rendering/rasterizer_rd/shaders/ssao.glsl b/servers/rendering/rasterizer_rd/shaders/ssao.glsl deleted file mode 100644 index 346338181a..0000000000 --- a/servers/rendering/rasterizer_rd/shaders/ssao.glsl +++ /dev/null @@ -1,249 +0,0 @@ -#[compute] - -#version 450 - -VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -#define TWO_PI 6.283185307179586476925286766559 - -#ifdef SSAO_QUALITY_HIGH -#define NUM_SAMPLES (20) -#endif - -#ifdef SSAO_QUALITY_ULTRA -#define NUM_SAMPLES (48) -#endif - -#ifdef SSAO_QUALITY_LOW -#define NUM_SAMPLES (8) -#endif - -#if !defined(SSAO_QUALITY_LOW) && !defined(SSAO_QUALITY_HIGH) && !defined(SSAO_QUALITY_ULTRA) -#define NUM_SAMPLES (12) -#endif - -// If using depth mip levels, the log of the maximum pixel offset before we need to switch to a lower -// miplevel to maintain reasonable spatial locality in the cache -// If this number is too small (< 3), too many taps will land in the same pixel, and we'll get bad variance that manifests as flashing. -// If it is too high (> 5), we'll get bad performance because we're not using the MIP levels effectively -#define LOG_MAX_OFFSET (3) - -// This must be less than or equal to the MAX_MIP_LEVEL defined in SSAO.cpp -#define MAX_MIP_LEVEL (4) - -// This is the number of turns around the circle that the spiral pattern makes. This should be prime to prevent -// taps from lining up. This particular choice was tuned for NUM_SAMPLES == 9 - -const int ROTATIONS[] = int[]( - 1, 1, 2, 3, 2, 5, 2, 3, 2, - 3, 3, 5, 5, 3, 4, 7, 5, 5, 7, - 9, 8, 5, 5, 7, 7, 7, 8, 5, 8, - 11, 12, 7, 10, 13, 8, 11, 8, 7, 14, - 11, 11, 13, 12, 13, 19, 17, 13, 11, 18, - 19, 11, 11, 14, 17, 21, 15, 16, 17, 18, - 13, 17, 11, 17, 19, 18, 25, 18, 19, 19, - 29, 21, 19, 27, 31, 29, 21, 18, 17, 29, - 31, 31, 23, 18, 25, 26, 25, 23, 19, 34, - 19, 27, 21, 25, 39, 29, 17, 21, 27); - -//#define NUM_SPIRAL_TURNS (7) -const int NUM_SPIRAL_TURNS = ROTATIONS[NUM_SAMPLES - 1]; - -layout(set = 0, binding = 0) uniform sampler2D source_depth_mipmaps; -layout(r8, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; - -#ifndef USE_HALF_SIZE -layout(set = 2, binding = 0) uniform sampler2D source_depth; -#endif - -layout(set = 3, binding = 0) uniform sampler2D source_normal; - -layout(push_constant, binding = 1, std430) uniform Params { - ivec2 screen_size; - float z_far; - float z_near; - - bool orthogonal; - float intensity_div_r6; - float radius; - float bias; - - vec4 proj_info; - vec2 pixel_size; - float proj_scale; - uint pad; -} -params; - -vec3 reconstructCSPosition(vec2 S, float z) { - if (params.orthogonal) { - return vec3((S.xy * params.proj_info.xy + params.proj_info.zw), z); - } else { - return vec3((S.xy * params.proj_info.xy + params.proj_info.zw) * z, z); - } -} - -vec3 getPosition(ivec2 ssP) { - vec3 P; -#ifdef USE_HALF_SIZE - P.z = texelFetch(source_depth_mipmaps, ssP, 0).r; - P.z = -P.z; -#else - P.z = texelFetch(source_depth, ssP, 0).r; - - P.z = P.z * 2.0 - 1.0; - if (params.orthogonal) { - P.z = ((P.z + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; - } else { - P.z = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - P.z * (params.z_far - params.z_near)); - } - P.z = -P.z; -#endif - // Offset to pixel center - P = reconstructCSPosition(vec2(ssP) + vec2(0.5), P.z); - return P; -} - -/** Returns a unit vector and a screen-space radius for the tap on a unit disk (the caller should scale by the actual disk radius) */ -vec2 tapLocation(int sampleNumber, float spinAngle, out float ssR) { - // Radius relative to ssR - float alpha = (float(sampleNumber) + 0.5) * (1.0 / float(NUM_SAMPLES)); - float angle = alpha * (float(NUM_SPIRAL_TURNS) * 6.28) + spinAngle; - - ssR = alpha; - return vec2(cos(angle), sin(angle)); -} - -/** Read the camera-space position of the point at screen-space pixel ssP + unitOffset * ssR. Assumes length(unitOffset) == 1 */ -vec3 getOffsetPosition(ivec2 ssP, float ssR) { - // Derivation: - // mipLevel = floor(log(ssR / MAX_OFFSET)); - - int mipLevel = clamp(int(floor(log2(ssR))) - LOG_MAX_OFFSET, 0, MAX_MIP_LEVEL); - - vec3 P; - - // We need to divide by 2^mipLevel to read the appropriately scaled coordinate from a MIP-map. - // Manually clamp to the texture size because texelFetch bypasses the texture unit - ivec2 mipP = clamp(ssP >> mipLevel, ivec2(0), (params.screen_size >> mipLevel) - ivec2(1)); - -#ifdef USE_HALF_SIZE - P.z = texelFetch(source_depth_mipmaps, mipP, mipLevel).r; - P.z = -P.z; -#else - if (mipLevel < 1) { - //read from depth buffer - P.z = texelFetch(source_depth, mipP, 0).r; - P.z = P.z * 2.0 - 1.0; - if (params.orthogonal) { - P.z = ((P.z + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; - } else { - P.z = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - P.z * (params.z_far - params.z_near)); - } - P.z = -P.z; - - } else { - //read from mipmaps - P.z = texelFetch(source_depth_mipmaps, mipP, mipLevel - 1).r; - P.z = -P.z; - } -#endif - - // Offset to pixel center - P = reconstructCSPosition(vec2(ssP) + vec2(0.5), P.z); - - return P; -} - -/** Compute the occlusion due to sample with index \a i about the pixel at \a ssC that corresponds - to camera-space point \a C with unit normal \a n_C, using maximum screen-space sampling radius \a ssDiskRadius - - Note that units of H() in the HPG12 paper are meters, not - unitless. The whole falloff/sampling function is therefore - unitless. In this implementation, we factor out (9 / radius). - - Four versions of the falloff function are implemented below -*/ -float sampleAO(in ivec2 ssC, in vec3 C, in vec3 n_C, in float ssDiskRadius, in float p_radius, in int tapIndex, in float randomPatternRotationAngle) { - // Offset on the unit disk, spun for this pixel - float ssR; - vec2 unitOffset = tapLocation(tapIndex, randomPatternRotationAngle, ssR); - ssR *= ssDiskRadius; - - ivec2 ssP = ivec2(ssR * unitOffset) + ssC; - - if (any(lessThan(ssP, ivec2(0))) || any(greaterThanEqual(ssP, params.screen_size))) { - return 0.0; - } - - // The occluding point in camera space - vec3 Q = getOffsetPosition(ssP, ssR); - - vec3 v = Q - C; - - float vv = dot(v, v); - float vn = dot(v, n_C); - - const float epsilon = 0.01; - float radius2 = p_radius * p_radius; - - // A: From the HPG12 paper - // Note large epsilon to avoid overdarkening within cracks - //return float(vv < radius2) * max((vn - bias) / (epsilon + vv), 0.0) * radius2 * 0.6; - - // B: Smoother transition to zero (lowers contrast, smoothing out corners). [Recommended] - float f = max(radius2 - vv, 0.0); - return f * f * f * max((vn - params.bias) / (epsilon + vv), 0.0); - - // C: Medium contrast (which looks better at high radii), no division. Note that the - // contribution still falls off with radius^2, but we've adjusted the rate in a way that is - // more computationally efficient and happens to be aesthetically pleasing. - // return 4.0 * max(1.0 - vv * invRadius2, 0.0) * max(vn - bias, 0.0); - - // D: Low contrast, no division operation - // return 2.0 * float(vv < radius * radius) * max(vn - bias, 0.0); -} - -void main() { - // Pixel being shaded - ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing - return; - } - - // World space point being shaded - vec3 C = getPosition(ssC); - -#ifdef USE_HALF_SIZE - vec3 n_C = texelFetch(source_normal, ssC << 1, 0).xyz * 2.0 - 1.0; -#else - vec3 n_C = texelFetch(source_normal, ssC, 0).xyz * 2.0 - 1.0; -#endif - n_C = normalize(n_C); - n_C.y = -n_C.y; //because this code reads flipped - - // Hash function used in the HPG12 AlchemyAO paper - float randomPatternRotationAngle = mod(float((3 * ssC.x ^ ssC.y + ssC.x * ssC.y) * 10), TWO_PI); - - // Reconstruct normals from positions. These will lead to 1-pixel black lines - // at depth discontinuities, however the blur will wipe those out so they are not visible - // in the final image. - - // Choose the screen-space sample radius - // proportional to the projected area of the sphere - - float ssDiskRadius = -params.proj_scale * params.radius; - if (!params.orthogonal) { - ssDiskRadius = -params.proj_scale * params.radius / C.z; - } - float sum = 0.0; - for (int i = 0; i < NUM_SAMPLES; ++i) { - sum += sampleAO(ssC, C, n_C, ssDiskRadius, params.radius, i, randomPatternRotationAngle); - } - - float A = max(0.0, 1.0 - sum * params.intensity_div_r6 * (5.0 / float(NUM_SAMPLES))); - - imageStore(dest_image, ssC, vec4(A)); -} diff --git a/servers/rendering/rasterizer_rd/shaders/ssao_blur.glsl b/servers/rendering/rasterizer_rd/shaders/ssao_blur.glsl deleted file mode 100644 index 3e63e3cb59..0000000000 --- a/servers/rendering/rasterizer_rd/shaders/ssao_blur.glsl +++ /dev/null @@ -1,153 +0,0 @@ -#[compute] - -#version 450 - -VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -layout(set = 0, binding = 0) uniform sampler2D source_ssao; -layout(set = 1, binding = 0) uniform sampler2D source_depth; -#ifdef MODE_UPSCALE -layout(set = 2, binding = 0) uniform sampler2D source_depth_mipmaps; -#endif - -layout(r8, set = 3, binding = 0) uniform restrict writeonly image2D dest_image; - -////////////////////////////////////////////////////////////////////////////////////////////// -// Tunable Parameters: - -layout(push_constant, binding = 1, std430) uniform Params { - float edge_sharpness; /** Increase to make depth edges crisper. Decrease to reduce flicker. */ - int filter_scale; - float z_far; - float z_near; - bool orthogonal; - uint pad0; - uint pad1; - uint pad2; - ivec2 axis; /** (1, 0) or (0, 1) */ - ivec2 screen_size; -} -params; - -/** Filter radius in pixels. This will be multiplied by SCALE. */ -#define R (4) - -////////////////////////////////////////////////////////////////////////////////////////////// - -// Gaussian coefficients -const float gaussian[R + 1] = - //float[](0.356642, 0.239400, 0.072410, 0.009869); - //float[](0.398943, 0.241971, 0.053991, 0.004432, 0.000134); // stddev = 1.0 - float[](0.153170, 0.144893, 0.122649, 0.092902, 0.062970); // stddev = 2.0 -//float[](0.111220, 0.107798, 0.098151, 0.083953, 0.067458, 0.050920, 0.036108); // stddev = 3.0 - -void main() { - // Pixel being shaded - ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing - return; - } - -#ifdef MODE_UPSCALE - - //closest one should be the same pixel, but check nearby just in case - float depth = texelFetch(source_depth, ssC, 0).r; - - depth = depth * 2.0 - 1.0; - if (params.orthogonal) { - depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; - } else { - depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near)); - } - - vec2 pixel_size = 1.0 / vec2(params.screen_size); - vec2 closest_uv = vec2(ssC) * pixel_size + pixel_size * 0.5; - vec2 from_uv = closest_uv; - vec2 ps2 = pixel_size; // * 2.0; - - float closest_depth = abs(textureLod(source_depth_mipmaps, closest_uv, 0.0).r - depth); - - vec2 offsets[4] = vec2[](vec2(ps2.x, 0), vec2(-ps2.x, 0), vec2(0, ps2.y), vec2(0, -ps2.y)); - for (int i = 0; i < 4; i++) { - vec2 neighbour = from_uv + offsets[i]; - float neighbour_depth = abs(textureLod(source_depth_mipmaps, neighbour, 0.0).r - depth); - if (neighbour_depth < closest_depth) { - closest_uv = neighbour; - closest_depth = neighbour_depth; - } - } - - float visibility = textureLod(source_ssao, closest_uv, 0.0).r; - imageStore(dest_image, ssC, vec4(visibility)); -#else - - float depth = texelFetch(source_depth, ssC, 0).r; - -#ifdef MODE_FULL_SIZE - depth = depth * 2.0 - 1.0; - - if (params.orthogonal) { - depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; - } else { - depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near)); - } - -#endif - float depth_divide = 1.0 / params.z_far; - - //depth *= depth_divide; - - /* - if (depth > params.z_far * 0.999) { - discard; //skybox - } - */ - - float sum = texelFetch(source_ssao, ssC, 0).r; - - // Base weight for depth falloff. Increase this for more blurriness, - // decrease it for better edge discrimination - float BASE = gaussian[0]; - float totalWeight = BASE; - sum *= totalWeight; - - ivec2 clamp_limit = params.screen_size - ivec2(1); - - for (int r = -R; r <= R; ++r) { - // We already handled the zero case above. This loop should be unrolled and the static branch optimized out, - // so the IF statement has no runtime cost - if (r != 0) { - ivec2 ppos = ssC + params.axis * (r * params.filter_scale); - float value = texelFetch(source_ssao, clamp(ppos, ivec2(0), clamp_limit), 0).r; - ivec2 rpos = clamp(ppos, ivec2(0), clamp_limit); - - float temp_depth = texelFetch(source_depth, rpos, 0).r; -#ifdef MODE_FULL_SIZE - temp_depth = temp_depth * 2.0 - 1.0; - if (params.orthogonal) { - temp_depth = ((temp_depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; - } else { - temp_depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - temp_depth * (params.z_far - params.z_near)); - } - //temp_depth *= depth_divide; -#endif - // spatial domain: offset gaussian tap - float weight = 0.3 + gaussian[abs(r)]; - //weight *= max(0.0, dot(temp_normal, normal)); - - // range domain (the "bilateral" weight). As depth difference increases, decrease weight. - weight *= max(0.0, 1.0 - params.edge_sharpness * abs(temp_depth - depth)); - - sum += value * weight; - totalWeight += weight; - } - } - - const float epsilon = 0.0001; - float visibility = sum / (totalWeight + epsilon); - - imageStore(dest_image, ssC, vec4(visibility)); -#endif -} diff --git a/servers/rendering/rasterizer_rd/shaders/ssao_downsample1.glsl b/servers/rendering/rasterizer_rd/shaders/ssao_downsample1.glsl new file mode 100644 index 0000000000..3bfce1a827 --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/ssao_downsample1.glsl @@ -0,0 +1,77 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// This code is licensed under the MIT License (MIT). +// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY +// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR +// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. +// +// Developed by Minigraph +// +// Author: James Stanard +// + +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(push_constant, binding = 1, std430) uniform Params { + float z_far; + float z_near; + bool orthogonal; + uint pad; +} +params; + +layout(set = 0, binding = 0) uniform sampler2D source_depth; + +layout(r16f, set = 1, binding = 0) uniform restrict writeonly image2D linear_z; +layout(r32f, set = 2, binding = 0) uniform restrict writeonly image2D downsampled2x; +layout(r16f, set = 3, binding = 0) uniform restrict writeonly image2DArray downsampled2x_atlas; +layout(r32f, set = 4, binding = 0) uniform restrict writeonly image2D downsampled4x; +layout(r16f, set = 5, binding = 0) uniform restrict writeonly image2DArray downsampled4x_atlas; + +float Linearize(uvec2 p_pos) { + float depth = texelFetch(source_depth, ivec2(p_pos), 0).r * 2.0 - 1.0; + if (params.orthogonal) { + depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / (2.0 * params.z_far); + } else { + depth = 2.0 * params.z_near / (params.z_far + params.z_near - depth * (params.z_far - params.z_near)); + } + imageStore(linear_z, ivec2(p_pos), vec4(depth)); + return depth; +} + +shared float local_cache[256]; + +void main() { + uvec2 start = gl_WorkGroupID.xy << 4 | gl_LocalInvocationID.xy; + uint dest_index = gl_LocalInvocationID.y << 4 | gl_LocalInvocationID.x; + local_cache[dest_index + 0] = Linearize(start | uvec2(0, 0)); + local_cache[dest_index + 8] = Linearize(start | uvec2(8, 0)); + local_cache[dest_index + 128] = Linearize(start | uvec2(0, 8)); + local_cache[dest_index + 136] = Linearize(start | uvec2(8, 8)); + + groupMemoryBarrier(); + barrier(); + + uint index = (gl_LocalInvocationID.x << 1) | (gl_LocalInvocationID.y << 5); + + float w1 = local_cache[index]; + + uvec2 pos = gl_GlobalInvocationID.xy; + uint slice = (pos.x & 3) | ((pos.y & 3) << 2); + imageStore(downsampled2x, ivec2(pos), vec4(w1)); + imageStore(downsampled2x_atlas, ivec3(pos >> 2, slice), vec4(w1)); + + if ((gl_LocalInvocationIndex & 011) == 0) { + pos = gl_GlobalInvocationID.xy >> 1; + slice = (pos.x & 3) | ((pos.y & 3) << 2); + imageStore(downsampled4x, ivec2(pos), vec4(w1)); + imageStore(downsampled4x_atlas, ivec3(pos >> 2, slice), vec4(w1)); + } +} diff --git a/servers/rendering/rasterizer_rd/shaders/ssao_downsample2.glsl b/servers/rendering/rasterizer_rd/shaders/ssao_downsample2.glsl new file mode 100644 index 0000000000..9fec881057 --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/ssao_downsample2.glsl @@ -0,0 +1,49 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// This code is licensed under the MIT License (MIT). +// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY +// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR +// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. +// +// Developed by Minigraph +// +// Author: James Stanard +// + +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(r32f, set = 0, binding = 0) uniform restrict readonly image2D downsampled4x; +layout(r32f, set = 1, binding = 0) uniform restrict writeonly image2D downsampled8x; +layout(r16f, set = 2, binding = 0) uniform restrict writeonly image2DArray downsampled8x_atlas; +layout(r32f, set = 3, binding = 0) uniform restrict writeonly image2D downsampled16x; +layout(r16f, set = 4, binding = 0) uniform restrict writeonly image2DArray downsampled16x_atlas; + +void main() { + vec4 w1 = imageLoad(downsampled4x, min(ivec2(gl_GlobalInvocationID.xy << 1), imageSize(downsampled4x) - ivec2(2))); + + uvec2 pos = gl_GlobalInvocationID.xy; + uvec2 pos_atlas = pos >> 2; + uint pos_slice = (pos.x & 3) | ((pos.y & 3) << 2); + ivec2 ds8s = imageSize(downsampled8x); + + if (pos.x < ds8s.x && pos.y < ds8s.y) { + imageStore(downsampled8x, ivec2(pos), w1); + } + + imageStore(downsampled8x_atlas, ivec3(pos_atlas, pos_slice), w1); + + if ((gl_LocalInvocationIndex & 011) == 0) { + uvec2 pos = gl_GlobalInvocationID.xy >> 1; + uvec2 pos_atlas = pos >> 2; + uint pos_slice = (pos.x & 3) | ((pos.y & 3) << 2); + imageStore(downsampled16x, ivec2(pos), w1); + imageStore(downsampled16x_atlas, ivec3(pos_atlas, pos_slice), w1); + } +} diff --git a/servers/rendering/rasterizer_rd/shaders/ssao_minify.glsl b/servers/rendering/rasterizer_rd/shaders/ssao_minify.glsl deleted file mode 100644 index 263fca386f..0000000000 --- a/servers/rendering/rasterizer_rd/shaders/ssao_minify.glsl +++ /dev/null @@ -1,45 +0,0 @@ -#[compute] - -#version 450 - -VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -layout(push_constant, binding = 1, std430) uniform Params { - vec2 pixel_size; - float z_far; - float z_near; - ivec2 source_size; - bool orthogonal; - uint pad; -} -params; - -#ifdef MINIFY_START -layout(set = 0, binding = 0) uniform sampler2D source_texture; -#else -layout(r32f, set = 0, binding = 0) uniform restrict readonly image2D source_image; -#endif -layout(r32f, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; - -void main() { - ivec2 pos = ivec2(gl_GlobalInvocationID.xy); - - if (any(greaterThan(pos, params.source_size >> 1))) { //too large, do nothing - return; - } - -#ifdef MINIFY_START - float depth = texelFetch(source_texture, pos << 1, 0).r * 2.0 - 1.0; - if (params.orthogonal) { - depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; - } else { - depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near)); - } -#else - float depth = imageLoad(source_image, pos << 1).r; -#endif - - imageStore(dest_image, pos, vec4(depth)); -} diff --git a/servers/rendering/rasterizer_rd/shaders/ssao_render.glsl b/servers/rendering/rasterizer_rd/shaders/ssao_render.glsl new file mode 100644 index 0000000000..42eb49c9fe --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/ssao_render.glsl @@ -0,0 +1,159 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// This code is licensed under the MIT License (MIT). +// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY +// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR +// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. +// +// Developed by Minigraph +// +// Author: James Stanard +// + +#[compute] + +#version 450 + +VERSION_DEFINES + +#ifndef INTERLEAVE_RESULT +#define WIDE_SAMPLING 1 +#endif + +#if WIDE_SAMPLING +// 32x32 cache size: the 16x16 in the center forms the area of focus with the 8-pixel perimeter used for wide gathering. +#define TILE_DIM 32 +layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; +#else +// 16x16 cache size: the 8x8 in the center forms the area of focus with the 4-pixel perimeter used for gathering. +#define TILE_DIM 16 +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; +#endif + +#ifdef INTERLEAVE_RESULT +layout(set = 0, binding = 0) uniform sampler2DArray depth_texture; +#else +layout(set = 0, binding = 0) uniform sampler2D depth_texture; +#endif + +layout(r8, set = 1, binding = 0) uniform restrict writeonly image2D occlusion; +//SamplerState LinearBorderSampler : register(s1); + +layout(push_constant, binding = 1, std430) uniform Params { + vec4 inv_thickness_table[3]; + vec4 sample_weight_table[3]; + vec2 texel_size; + float rejection_fadeoff; + float intensity; +} +params; + +shared float depth_samples[TILE_DIM * TILE_DIM]; + +float test_sample_pair(float front_depth, float inv_range, uint p_base, uint p_offset) { + // "Disocclusion" measures the penetration distance of the depth sample within the sphere. + // Disocclusion < 0 (full occlusion) -> the sample fell in front of the sphere + // Disocclusion > 1 (no occlusion) -> the sample fell behind the sphere + float disocclusion1 = depth_samples[p_base + p_offset] * inv_range - front_depth; + float disocclusion2 = depth_samples[p_base - p_offset] * inv_range - front_depth; + + float pseudo_disocclusion1 = clamp(params.rejection_fadeoff * disocclusion1, 0.0, 1.0); + float pseudo_disocclusion2 = clamp(params.rejection_fadeoff * disocclusion2, 0.0, 1.0); + + return clamp(disocclusion1, pseudo_disocclusion2, 1.0) + + clamp(disocclusion2, pseudo_disocclusion1, 1.0) - + pseudo_disocclusion1 * pseudo_disocclusion2; +} + +float test_samples(uint p_center_index, uint p_x, uint p_y, float p_inv_depth, float p_inv_thickness) { +#if WIDE_SAMPLING + p_x <<= 1; + p_y <<= 1; +#endif + + float inv_range = p_inv_thickness * p_inv_depth; + float front_depth = p_inv_thickness - 0.5; + + if (p_y == 0) { + // Axial + return 0.5 * (test_sample_pair(front_depth, inv_range, p_center_index, p_x) + + test_sample_pair(front_depth, inv_range, p_center_index, p_x * TILE_DIM)); + } else if (p_x == p_y) { + // Diagonal + return 0.5 * (test_sample_pair(front_depth, inv_range, p_center_index, p_x * TILE_DIM - p_x) + + test_sample_pair(front_depth, inv_range, p_center_index, p_x * TILE_DIM + p_x)); + } else { + // L-Shaped + return 0.25 * (test_sample_pair(front_depth, inv_range, p_center_index, p_y * TILE_DIM + p_x) + + test_sample_pair(front_depth, inv_range, p_center_index, p_y * TILE_DIM - p_x) + + test_sample_pair(front_depth, inv_range, p_center_index, p_x * TILE_DIM + p_y) + + test_sample_pair(front_depth, inv_range, p_center_index, p_x * TILE_DIM - p_y)); + } +} + +void main() { +#if WIDE_SAMPLING + vec2 quad_center_uv = clamp(vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 7.5) * params.texel_size, vec2(params.texel_size * 0.5), vec2(1.0 - params.texel_size * 0.5)); +#else + vec2 quad_center_uv = clamp(vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 3.5) * params.texel_size, vec2(params.texel_size * 0.5), vec2(1.0 - params.texel_size * 0.5)); +#endif + + // Fetch four depths and store them in LDS +#ifdef INTERLEAVE_RESULT + vec4 depths = textureGather(depth_texture, vec3(quad_center_uv, gl_GlobalInvocationID.z)); // textureGather +#else + vec4 depths = textureGather(depth_texture, quad_center_uv); +#endif + + uint dest_index = gl_LocalInvocationID.x * 2 + gl_LocalInvocationID.y * 2 * TILE_DIM; + depth_samples[dest_index] = depths.w; + depth_samples[dest_index + 1] = depths.z; + depth_samples[dest_index + TILE_DIM] = depths.x; + depth_samples[dest_index + TILE_DIM + 1] = depths.y; + + groupMemoryBarrier(); + barrier(); + +#if WIDE_SAMPLING + uint index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * TILE_DIM + 8 * TILE_DIM + 8; +#else + uint index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * TILE_DIM + 4 * TILE_DIM + 4; +#endif + const float inv_depth = 1.0 / depth_samples[index]; + + float ao = 0.0; + + if (params.sample_weight_table[0].x > 0.0) { + // 68 samples: sample all cells in *within* a circular radius of 5 + ao += params.sample_weight_table[0].x * test_samples(index, 1, 0, inv_depth, params.inv_thickness_table[0].x); + ao += params.sample_weight_table[0].y * test_samples(index, 2, 0, inv_depth, params.inv_thickness_table[0].y); + ao += params.sample_weight_table[0].z * test_samples(index, 3, 0, inv_depth, params.inv_thickness_table[0].z); + ao += params.sample_weight_table[0].w * test_samples(index, 4, 0, inv_depth, params.inv_thickness_table[0].w); + ao += params.sample_weight_table[1].x * test_samples(index, 1, 1, inv_depth, params.inv_thickness_table[1].x); + ao += params.sample_weight_table[2].x * test_samples(index, 2, 2, inv_depth, params.inv_thickness_table[2].x); + ao += params.sample_weight_table[2].w * test_samples(index, 3, 3, inv_depth, params.inv_thickness_table[2].w); + ao += params.sample_weight_table[1].y * test_samples(index, 1, 2, inv_depth, params.inv_thickness_table[1].y); + ao += params.sample_weight_table[1].z * test_samples(index, 1, 3, inv_depth, params.inv_thickness_table[1].z); + ao += params.sample_weight_table[1].w * test_samples(index, 1, 4, inv_depth, params.inv_thickness_table[1].w); + ao += params.sample_weight_table[2].y * test_samples(index, 2, 3, inv_depth, params.inv_thickness_table[2].y); + ao += params.sample_weight_table[2].z * test_samples(index, 2, 4, inv_depth, params.inv_thickness_table[2].z); + } else { + // SAMPLE_CHECKER + // 36 samples: sample every-other cell in a checker board pattern + ao += params.sample_weight_table[0].y * test_samples(index, 2, 0, inv_depth, params.inv_thickness_table[0].y); + ao += params.sample_weight_table[0].w * test_samples(index, 4, 0, inv_depth, params.inv_thickness_table[0].w); + ao += params.sample_weight_table[1].x * test_samples(index, 1, 1, inv_depth, params.inv_thickness_table[1].x); + ao += params.sample_weight_table[2].x * test_samples(index, 2, 2, inv_depth, params.inv_thickness_table[2].x); + ao += params.sample_weight_table[2].w * test_samples(index, 3, 3, inv_depth, params.inv_thickness_table[2].w); + ao += params.sample_weight_table[1].z * test_samples(index, 1, 3, inv_depth, params.inv_thickness_table[1].z); + ao += params.sample_weight_table[2].z * test_samples(index, 2, 4, inv_depth, params.inv_thickness_table[2].z); + } + +#ifdef INTERLEAVE_RESULT + uvec2 out_pixel = gl_GlobalInvocationID.xy << 2 | uvec2(gl_GlobalInvocationID.z & 3, gl_GlobalInvocationID.z >> 2); +#else + uvec2 out_pixel = gl_GlobalInvocationID.xy; +#endif + imageStore(occlusion, ivec2(out_pixel), vec4(mix(1.0, ao, params.intensity))); +} diff --git a/servers/rendering/rasterizer_rd/shaders/ssao_upsample.glsl b/servers/rendering/rasterizer_rd/shaders/ssao_upsample.glsl new file mode 100644 index 0000000000..e91e4a9bd8 --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/ssao_upsample.glsl @@ -0,0 +1,216 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// This code is licensed under the MIT License (MIT). +// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY +// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR +// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. +// +// Developed by Minigraph +// +// Author: James Stanard +// + +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform sampler2D low_res_depth; +layout(set = 1, binding = 0) uniform sampler2D high_res_depth; +layout(set = 2, binding = 0) uniform sampler2D low_res_ao1; +layout(r8, set = 3, binding = 0) uniform restrict writeonly image2D ao_result; +#ifdef COMBINE_LOWER_RESOLUTIONS +layout(set = 4, binding = 0) uniform sampler2D low_res_ao2; +#endif +#ifdef BLEND_WITH_HIGHER_RESOLUTION +layout(set = 5, binding = 0) uniform sampler2D high_res_ao; +#endif + +//SamplerState LinearSampler : register(s0); + +layout(push_constant, binding = 1, std430) uniform Params { + vec2 inv_low_resolution; + vec2 inv_high_resolution; + float noise_filter_strength; + float step_size; + float blur_tolerance; + float upsample_tolerance; +} +params; + +shared float depth_cache[256]; +shared float ao_cache1[256]; +shared float ao_cache2[256]; + +void prefetch_data(uint p_index, vec2 p_uv) { + vec4 ao1 = textureGather(low_res_ao1, p_uv); // textureGather + +#ifdef COMBINE_LOWER_RESOLUTIONS + ao1 = min(ao1, textureGather(low_res_ao2, p_uv)); +#endif + + ao_cache1[p_index] = ao1.w; + ao_cache1[p_index + 1] = ao1.z; + ao_cache1[p_index + 16] = ao1.x; + ao_cache1[p_index + 17] = ao1.y; + + vec4 ID = 1.0 / textureGather(low_res_depth, p_uv); + depth_cache[p_index] = ID.w; + depth_cache[p_index + 1] = ID.z; + depth_cache[p_index + 16] = ID.x; + depth_cache[p_index + 17] = ID.y; +} + +float smart_blur(float p_a, float p_b, float p_c, float p_d, float p_e, bool p_left, bool p_middle, bool p_right) { + p_b = p_left || p_middle ? p_b : p_c; + p_a = p_left ? p_a : p_b; + p_d = p_right || p_middle ? p_d : p_c; + p_e = p_right ? p_e : p_d; + return ((p_a + p_e) / 2.0 + p_b + p_c + p_d) / 4.0; +} + +bool compare_deltas(float p_d1, float p_d2, float p_l1, float p_l2) { + float temp = p_d1 * p_d2 + params.step_size; + return temp * temp > p_l1 * p_l2 * params.blur_tolerance; +} + +void blur_horizontally(uint p_left_most_index) { + float a0 = ao_cache1[p_left_most_index]; + float a1 = ao_cache1[p_left_most_index + 1]; + float a2 = ao_cache1[p_left_most_index + 2]; + float a3 = ao_cache1[p_left_most_index + 3]; + float a4 = ao_cache1[p_left_most_index + 4]; + float a5 = ao_cache1[p_left_most_index + 5]; + float a6 = ao_cache1[p_left_most_index + 6]; + + float d0 = depth_cache[p_left_most_index]; + float d1 = depth_cache[p_left_most_index + 1]; + float d2 = depth_cache[p_left_most_index + 2]; + float d3 = depth_cache[p_left_most_index + 3]; + float d4 = depth_cache[p_left_most_index + 4]; + float d5 = depth_cache[p_left_most_index + 5]; + float d6 = depth_cache[p_left_most_index + 6]; + + float d01 = d1 - d0; + float d12 = d2 - d1; + float d23 = d3 - d2; + float d34 = d4 - d3; + float d45 = d5 - d4; + float d56 = d6 - d5; + + float l01 = d01 * d01 + params.step_size; + float l12 = d12 * d12 + params.step_size; + float l23 = d23 * d23 + params.step_size; + float l34 = d34 * d34 + params.step_size; + float l45 = d45 * d45 + params.step_size; + float l56 = d56 * d56 + params.step_size; + + bool c02 = compare_deltas(d01, d12, l01, l12); + bool c13 = compare_deltas(d12, d23, l12, l23); + bool c24 = compare_deltas(d23, d34, l23, l34); + bool c35 = compare_deltas(d34, d45, l34, l45); + bool c46 = compare_deltas(d45, d56, l45, l56); + + ao_cache2[p_left_most_index] = smart_blur(a0, a1, a2, a3, a4, c02, c13, c24); + ao_cache2[p_left_most_index + 1] = smart_blur(a1, a2, a3, a4, a5, c13, c24, c35); + ao_cache2[p_left_most_index + 2] = smart_blur(a2, a3, a4, a5, a6, c24, c35, c46); +} + +void blur_vertically(uint p_top_most_index) { + float a0 = ao_cache2[p_top_most_index]; + float a1 = ao_cache2[p_top_most_index + 16]; + float a2 = ao_cache2[p_top_most_index + 32]; + float a3 = ao_cache2[p_top_most_index + 48]; + float a4 = ao_cache2[p_top_most_index + 64]; + float a5 = ao_cache2[p_top_most_index + 80]; + + float d0 = depth_cache[p_top_most_index + 2]; + float d1 = depth_cache[p_top_most_index + 18]; + float d2 = depth_cache[p_top_most_index + 34]; + float d3 = depth_cache[p_top_most_index + 50]; + float d4 = depth_cache[p_top_most_index + 66]; + float d5 = depth_cache[p_top_most_index + 82]; + + float d01 = d1 - d0; + float d12 = d2 - d1; + float d23 = d3 - d2; + float d34 = d4 - d3; + float d45 = d5 - d4; + + float l01 = d01 * d01 + params.step_size; + float l12 = d12 * d12 + params.step_size; + float l23 = d23 * d23 + params.step_size; + float l34 = d34 * d34 + params.step_size; + float l45 = d45 * d45 + params.step_size; + + bool c02 = compare_deltas(d01, d12, l01, l12); + bool c13 = compare_deltas(d12, d23, l12, l23); + bool c24 = compare_deltas(d23, d34, l23, l34); + bool c35 = compare_deltas(d34, d45, l34, l45); + + float ao_result1 = smart_blur(a0, a1, a2, a3, a4, c02, c13, c24); + float ao_result2 = smart_blur(a1, a2, a3, a4, a5, c13, c24, c35); + + ao_cache1[p_top_most_index] = ao_result1; + ao_cache1[p_top_most_index + 16] = ao_result2; +} + +// We essentially want 5 weights: 4 for each low-res pixel and 1 to blend in when none of the 4 really +// match. The filter strength is 1 / DeltaZTolerance. So a tolerance of 0.01 would yield a strength of 100. +// Note that a perfect match of low to high depths would yield a weight of 10^6, completely superceding any +// noise filtering. The noise filter is intended to soften the effects of shimmering when the high-res depth +// buffer has a lot of small holes in it causing the low-res depth buffer to inaccurately represent it. +float bilateral_upsample(float p_high_depth, float p_high_ao, vec4 p_low_depths, vec4 p_low_ao) { + vec4 weights = vec4(9.0, 3.0, 1.0, 3.0) / (abs(p_high_depth - p_low_depths) + params.upsample_tolerance); + float total_weight = dot(weights, vec4(1.0)) + params.noise_filter_strength; + float weighted_sum = dot(p_low_ao, weights) + params.noise_filter_strength; + return p_high_ao * weighted_sum / total_weight; +} + +void main() { + // Load 4 pixels per thread into LDS to fill the 16x16 LDS cache with depth and AO + prefetch_data(gl_LocalInvocationID.x << 1 | gl_LocalInvocationID.y << 5, vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 2.5) * params.inv_low_resolution); + groupMemoryBarrier(); + barrier(); + + // Goal: End up with a 9x9 patch that is blurred so we can upsample. Blur radius is 2 pixels, so start with 13x13 area. + + // Horizontally blur the pixels. 13x13 -> 9x13 + if (gl_LocalInvocationIndex < 39) + blur_horizontally((gl_LocalInvocationIndex / 3) * 16 + (gl_LocalInvocationIndex % 3) * 3); + groupMemoryBarrier(); + barrier(); + + // Vertically blur the pixels. 9x13 -> 9x9 + if (gl_LocalInvocationIndex < 45) + blur_vertically((gl_LocalInvocationIndex / 9) * 32 + gl_LocalInvocationIndex % 9); + groupMemoryBarrier(); + barrier(); + + // Bilateral upsample + uint index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 16; + vec4 low_SSAOs = vec4(ao_cache1[index + 16], ao_cache1[index + 17], ao_cache1[index + 1], ao_cache1[index]); + + // We work on a quad of pixels at once because then we can gather 4 each of high and low-res depth values + vec2 UV0 = (gl_GlobalInvocationID.xy - 0.5) * params.inv_low_resolution; + vec2 UV1 = (gl_GlobalInvocationID.xy * 2.0 - 0.5) * params.inv_high_resolution; + +#ifdef BLEND_WITH_HIGHER_RESOLUTION + vec4 hi_SSAOs = textureGather(high_res_ao, UV1); +#else + vec4 hi_SSAOs = vec4(1.0); +#endif + vec4 Low_depths = textureGather(low_res_depth, UV0); + vec4 high_depths = textureGather(high_res_depth, UV1); + + ivec2 OutST = ivec2(gl_GlobalInvocationID.xy << 1); + + imageStore(ao_result, OutST + ivec2(-1, 0), vec4(bilateral_upsample(high_depths.x, hi_SSAOs.x, Low_depths.xyzw, low_SSAOs.xyzw))); + imageStore(ao_result, OutST + ivec2(0, 0), vec4(bilateral_upsample(high_depths.y, hi_SSAOs.y, Low_depths.yzwx, low_SSAOs.yzwx))); + imageStore(ao_result, OutST + ivec2(0, -1), vec4(bilateral_upsample(high_depths.z, hi_SSAOs.z, Low_depths.zwxy, low_SSAOs.zwxy))); + imageStore(ao_result, OutST + ivec2(-1, -1), vec4(bilateral_upsample(high_depths.w, hi_SSAOs.w, Low_depths.wxyz, low_SSAOs.wxyz))); +} diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 6df66e7b20..2573e6e6dd 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -433,6 +433,7 @@ public: TEXTURE_SLICE_2D, TEXTURE_SLICE_CUBEMAP, TEXTURE_SLICE_3D, + TEXTURE_SLICE_2D_ARRAY, }; virtual RID texture_create_shared_from_slice(const TextureView &p_view, RID p_with_texture, uint32_t p_layer, uint32_t p_mipmap, TextureSliceType p_slice_type = TEXTURE_SLICE_2D) = 0; diff --git a/servers/rendering/rendering_server_raster.h b/servers/rendering/rendering_server_raster.h index 04c16cfd4a..3df8c4ed33 100644 --- a/servers/rendering/rendering_server_raster.h +++ b/servers/rendering/rendering_server_raster.h @@ -578,8 +578,8 @@ public: BIND6(environment_set_ssr, RID, bool, int, float, float, float) BIND1(environment_set_ssr_roughness_quality, EnvironmentSSRRoughnessQuality) - BIND9(environment_set_ssao, RID, bool, float, float, float, float, float, EnvironmentSSAOBlur, float) - BIND2(environment_set_ssao_quality, EnvironmentSSAOQuality, bool) + BIND7(environment_set_ssao, RID, bool, float, float, int, float, float) + BIND5(environment_set_ssao_settings, EnvironmentSSAOQuality, bool, float, float, float) BIND11(environment_set_glow, RID, bool, int, float, float, float, float, EnvironmentGlowBlendMode, float, float, float) BIND1(environment_glow_set_use_bicubic_upscale, bool) diff --git a/servers/rendering/rendering_server_wrap_mt.h b/servers/rendering/rendering_server_wrap_mt.h index c490be4a6b..998a2366ef 100644 --- a/servers/rendering/rendering_server_wrap_mt.h +++ b/servers/rendering/rendering_server_wrap_mt.h @@ -484,9 +484,9 @@ public: FUNC6(environment_set_ssr, RID, bool, int, float, float, float) FUNC1(environment_set_ssr_roughness_quality, EnvironmentSSRRoughnessQuality) - FUNC9(environment_set_ssao, RID, bool, float, float, float, float, float, EnvironmentSSAOBlur, float) + FUNC7(environment_set_ssao, RID, bool, float, float, int, float, float) - FUNC2(environment_set_ssao_quality, EnvironmentSSAOQuality, bool) + FUNC5(environment_set_ssao_settings, EnvironmentSSAOQuality, bool, float, float, float) FUNC11(environment_set_sdfgi, RID, bool, EnvironmentSDFGICascades, float, EnvironmentSDFGIYScale, bool, bool, bool, float, float, float) FUNC1(environment_set_sdfgi_ray_count, EnvironmentSDFGIRayCount) |