diff options
Diffstat (limited to 'servers')
35 files changed, 8260 insertions, 937 deletions
diff --git a/servers/rendering/rasterizer.h b/servers/rendering/rasterizer.h index 026725bf01..1027034902 100644 --- a/servers/rendering/rasterizer.h +++ b/servers/rendering/rasterizer.h @@ -50,6 +50,16 @@ public:  	virtual int get_directional_light_shadow_size(RID p_light_intance) = 0;  	virtual void set_directional_shadow_count(int p_count) = 0; +	/* SDFGI UPDATE */ + +	struct InstanceBase; + +	virtual void sdfgi_update(RID p_render_buffers, RID p_environment, const Vector3 &p_world_position) = 0; +	virtual int sdfgi_get_pending_region_count(RID p_render_buffers) const = 0; +	virtual AABB sdfgi_get_pending_region_bounds(RID p_render_buffers, int p_region) const = 0; +	virtual uint32_t sdfgi_get_pending_region_cascade(RID p_render_buffers, int p_region) const = 0; +	virtual void sdfgi_update_probes(RID p_render_buffers, RID p_environment, const RID *p_directional_light_instances, uint32_t p_directional_light_count, const RID *p_positional_light_instances, uint32_t p_positional_light_count) = 0; +  	/* SKY API */  	virtual RID sky_create() = 0; @@ -86,6 +96,11 @@ public:  	virtual void environment_set_ssao_quality(RS::EnvironmentSSAOQuality p_quality, bool p_half_size) = 0; +	virtual void environment_set_sdfgi(RID p_env, bool p_enable, RS::EnvironmentSDFGICascades p_cascades, float p_min_cell_size, RS::EnvironmentSDFGIYScale p_y_scale, bool p_use_occlusion, bool p_use_multibounce, bool p_read_sky, bool p_enhance_ssr, float p_energy, float p_normal_bias, float p_probe_bias) = 0; + +	virtual void environment_set_sdfgi_ray_count(RS::EnvironmentSDFGIRayCount p_ray_count) = 0; +	virtual void environment_set_sdfgi_frames_to_converge(RS::EnvironmentSDFGIFramesToConverge p_frames) = 0; +  	virtual void environment_set_tonemap(RID p_env, RS::EnvironmentToneMapper p_tone_mapper, float p_exposure, float p_white, bool p_auto_exposure, float p_min_luminance, float p_max_luminance, float p_auto_exp_speed, float p_auto_exp_scale) = 0;  	virtual void environment_set_adjustment(RID p_env, bool p_enable, float p_brightness, float p_contrast, float p_saturation, RID p_ramp) = 0; @@ -111,8 +126,6 @@ public:  	virtual void shadows_quality_set(RS::ShadowQuality p_quality) = 0;  	virtual void directional_shadow_quality_set(RS::ShadowQuality p_quality) = 0; -	struct InstanceBase; -  	struct InstanceDependency {  		void instance_notify_changed(bool p_aabb, bool p_dependencies);  		void instance_notify_deleted(RID p_deleted); @@ -248,6 +261,7 @@ public:  	virtual RID light_instance_create(RID p_light) = 0;  	virtual void light_instance_set_transform(RID p_light_instance, const Transform &p_transform) = 0; +	virtual void light_instance_set_aabb(RID p_light_instance, const AABB &p_aabb) = 0;  	virtual void light_instance_set_shadow_transform(RID p_light_instance, const CameraMatrix &p_projection, const Transform &p_transform, float p_far, float p_split, int p_pass, float p_shadow_texel_size, float p_bias_scale = 1.0, float p_range_begin = 0, const Vector2 &p_uv_scale = Vector2()) = 0;  	virtual void light_instance_mark_visible(RID p_light_instance) = 0;  	virtual bool light_instances_can_render_shadow_cube() const { @@ -273,10 +287,14 @@ public:  	virtual bool gi_probe_needs_update(RID p_probe) const = 0;  	virtual void gi_probe_update(RID p_probe, bool p_update_light_instances, const Vector<RID> &p_light_instances, int p_dynamic_object_count, InstanceBase **p_dynamic_objects) = 0; +	virtual void gi_probe_set_quality(RS::GIProbeQuality) = 0; +  	virtual void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, InstanceBase **p_cull_result, int p_cull_count, RID *p_light_cull_result, int p_light_cull_count, RID *p_reflection_probe_cull_result, int p_reflection_probe_cull_count, RID *p_gi_probe_cull_result, int p_gi_probe_cull_count, RID *p_decal_cull_result, int p_decal_cull_count, InstanceBase **p_lightmap_cull_result, int p_lightmap_cull_count, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass) = 0;  	virtual void render_shadow(RID p_light, RID p_shadow_atlas, int p_pass, InstanceBase **p_cull_result, int p_cull_count) = 0;  	virtual void render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, InstanceBase **p_cull_result, int p_cull_count, RID p_framebuffer, const Rect2i &p_region) = 0; +	virtual void render_sdfgi(RID p_render_buffers, int p_region, InstanceBase **p_cull_result, int p_cull_count) = 0; +	virtual void render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const RID **p_positional_light_cull_result, const uint32_t *p_positional_light_cull_count) = 0;  	virtual void set_scene_pass(uint64_t p_pass) = 0;  	virtual void set_time(double p_time, double p_step) = 0; @@ -285,7 +303,7 @@ public:  	virtual RID render_buffers_create() = 0;  	virtual void render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_width, int p_height, RS::ViewportMSAA p_msaa, RS::ViewportScreenSpaceAA p_screen_space_aa) = 0; -	virtual void screen_space_roughness_limiter_set_active(bool p_enable, float p_curve) = 0; +	virtual void screen_space_roughness_limiter_set_active(bool p_enable, float p_amount, float p_limit) = 0;  	virtual bool screen_space_roughness_limiter_is_active() const = 0;  	virtual void sub_surface_scattering_set_quality(RS::SubSurfaceScatteringQuality p_quality) = 0; @@ -295,6 +313,8 @@ public:  	virtual bool free(RID p_rid) = 0; +	virtual void sdfgi_set_debug_probe_select(const Vector3 &p_position, const Vector3 &p_dir) = 0; +  	virtual void update() = 0;  	virtual ~RasterizerScene() {}  }; @@ -484,7 +504,8 @@ public:  	virtual void light_set_negative(RID p_light, bool p_enable) = 0;  	virtual void light_set_cull_mask(RID p_light, uint32_t p_mask) = 0;  	virtual void light_set_reverse_cull_face_mode(RID p_light, bool p_enabled) = 0; -	virtual void light_set_use_gi(RID p_light, bool p_enable) = 0; +	virtual void light_set_bake_mode(RID p_light, RS::LightBakeMode p_bake_mode) = 0; +	virtual void light_set_max_sdfgi_cascade(RID p_light, uint32_t p_cascade) = 0;  	virtual void light_omni_set_shadow_mode(RID p_light, RS::LightOmniShadowMode p_mode) = 0; @@ -503,7 +524,8 @@ public:  	virtual AABB light_get_aabb(RID p_light) const = 0;  	virtual float light_get_param(RID p_light, RS::LightParam p_param) = 0;  	virtual Color light_get_color(RID p_light) = 0; -	virtual bool light_get_use_gi(RID p_light) = 0; +	virtual RS::LightBakeMode light_get_bake_mode(RID p_light) = 0; +	virtual uint32_t light_get_max_sdfgi_cascade(RID p_light) = 0;  	virtual uint64_t light_get_version(RID p_light) const = 0;  	/* PROBE API */ @@ -513,9 +535,9 @@ public:  	virtual void reflection_probe_set_update_mode(RID p_probe, RS::ReflectionProbeUpdateMode p_mode) = 0;  	virtual void reflection_probe_set_resolution(RID p_probe, int p_resolution) = 0;  	virtual void reflection_probe_set_intensity(RID p_probe, float p_intensity) = 0; -	virtual void reflection_probe_set_interior_ambient(RID p_probe, const Color &p_ambient) = 0; -	virtual void reflection_probe_set_interior_ambient_energy(RID p_probe, float p_energy) = 0; -	virtual void reflection_probe_set_interior_ambient_probe_contribution(RID p_probe, float p_contrib) = 0; +	virtual void reflection_probe_set_ambient_mode(RID p_probe, RS::ReflectionProbeAmbientMode p_mode) = 0; +	virtual void reflection_probe_set_ambient_color(RID p_probe, const Color &p_color) = 0; +	virtual void reflection_probe_set_ambient_energy(RID p_probe, float p_energy) = 0;  	virtual void reflection_probe_set_max_distance(RID p_probe, float p_distance) = 0;  	virtual void reflection_probe_set_extents(RID p_probe, const Vector3 &p_extents) = 0;  	virtual void reflection_probe_set_origin_offset(RID p_probe, const Vector3 &p_offset) = 0; diff --git a/servers/rendering/rasterizer_rd/rasterizer_canvas_rd.cpp b/servers/rendering/rasterizer_rd/rasterizer_canvas_rd.cpp index 4c477ca5f4..aad2be45c6 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_canvas_rd.cpp +++ b/servers/rendering/rasterizer_rd/rasterizer_canvas_rd.cpp @@ -2497,7 +2497,7 @@ RasterizerCanvasRD::~RasterizerCanvasRD() {  		_dispose_bindings();  		//anything remains?  		if (bindings.texture_bindings.size()) { -			ERR_PRINT("Some texture bindings were not properly freed (leaked canvasitems?"); +			ERR_PRINT("Some texture bindings were not properly freed (leaked CanvasItems?)");  			const TextureBindingID *key = nullptr;  			while ((key = bindings.texture_bindings.next(key))) {  				TextureBinding *tb = bindings.texture_bindings[*key]; diff --git a/servers/rendering/rasterizer_rd/rasterizer_effects_rd.cpp b/servers/rendering/rasterizer_rd/rasterizer_effects_rd.cpp index 303cb7ad42..3f594ab264 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_effects_rd.cpp +++ b/servers/rendering/rasterizer_rd/rasterizer_effects_rd.cpp @@ -218,7 +218,7 @@ void RasterizerEffectsRD::copy_to_atlas_fb(RID p_source_rd_texture, RID p_dest_f  	RD::get_singleton()->draw_list_draw(draw_list, true);  } -void RasterizerEffectsRD::copy_to_fb_rect(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2i &p_rect, bool p_flip_y, bool p_force_luminance, bool p_alpha_to_zero) { +void RasterizerEffectsRD::copy_to_fb_rect(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2i &p_rect, bool p_flip_y, bool p_force_luminance, bool p_alpha_to_zero, bool p_srgb, RID p_secondary) {  	zeromem(©_to_fb.push_constant, sizeof(CopyToFbPushConstant));  	if (p_flip_y) { @@ -230,10 +230,16 @@ void RasterizerEffectsRD::copy_to_fb_rect(RID p_source_rd_texture, RID p_dest_fr  	if (p_alpha_to_zero) {  		copy_to_fb.push_constant.alpha_to_zero = true;  	} +	if (p_srgb) { +		copy_to_fb.push_constant.srgb = true; +	}  	RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD, Vector<Color>(), 1.0, 0, p_rect); -	RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, copy_to_fb.pipelines[COPY_TO_FB_COPY].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer))); +	RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, copy_to_fb.pipelines[p_secondary.is_valid() ? COPY_TO_FB_COPY2 : COPY_TO_FB_COPY].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer)));  	RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_source_rd_texture), 0); +	if (p_secondary.is_valid()) { +		RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_secondary), 1); +	}  	RD::get_singleton()->draw_list_bind_index_array(draw_list, index_array);  	RD::get_singleton()->draw_list_set_push_constant(draw_list, ©_to_fb.push_constant, sizeof(CopyToFbPushConstant));  	RD::get_singleton()->draw_list_draw(draw_list, true); @@ -434,7 +440,7 @@ void RasterizerEffectsRD::gaussian_glow(RID p_source_rd_texture, RID p_texture,  	RD::get_singleton()->compute_list_end();  } -void RasterizerEffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal, RenderingServer::EnvironmentSSRRoughnessQuality p_roughness_quality, RID p_roughness, RID p_blur_radius, RID p_blur_radius2, RID p_metallic, const Color &p_metallic_mask, RID p_depth, RID p_scale_depth, RID p_scale_normal, RID p_output, RID p_output_blur, const Size2i &p_screen_size, int p_max_steps, float p_fade_in, float p_fade_out, float p_tolerance, const CameraMatrix &p_camera) { +void RasterizerEffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, RenderingServer::EnvironmentSSRRoughnessQuality p_roughness_quality, RID p_blur_radius, RID p_blur_radius2, RID p_metallic, const Color &p_metallic_mask, RID p_depth, RID p_scale_depth, RID p_scale_normal, RID p_output, RID p_output_blur, const Size2i &p_screen_size, int p_max_steps, float p_fade_in, float p_fade_out, float p_tolerance, const CameraMatrix &p_camera) {  	RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();  	int32_t x_groups = (p_screen_size.width - 1) / 8 + 1; @@ -451,7 +457,7 @@ void RasterizerEffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal, R  		RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssr_scale.pipeline);  		RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_diffuse), 0); -		RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture_pair(p_depth, p_normal), 1); +		RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture_pair(p_depth, p_normal_roughness), 1);  		RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_output_blur), 2);  		RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_scale_depth, p_scale_normal), 3); @@ -491,7 +497,7 @@ void RasterizerEffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal, R  		if (p_roughness_quality != RS::ENV_SSR_ROUGNESS_QUALITY_DISABLED) {  			RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_output, p_blur_radius), 1); -			RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture_pair(p_metallic, p_roughness), 3); +			RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture_pair(p_metallic, p_normal_roughness), 3);  		} else {  			RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_output), 1);  			RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_metallic), 3); @@ -1201,6 +1207,28 @@ void RasterizerEffectsRD::render_sky(RD::DrawListID p_list, float p_time, RID p_  	RD::get_singleton()->draw_list_draw(draw_list, true);  } +void RasterizerEffectsRD::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples) { +	ResolvePushConstant push_constant; +	push_constant.screen_size[0] = p_screen_size.x; +	push_constant.screen_size[1] = p_screen_size.y; +	push_constant.samples = p_samples; + +	RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); +	RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, resolve.pipelines[p_source_giprobe.is_valid() ? RESOLVE_MODE_GI_GIPROBE : RESOLVE_MODE_GI]); +	RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture_pair(p_source_depth, p_source_normal_roughness), 0); +	RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_dest_depth, p_dest_normal_roughness), 1); +	if (p_source_giprobe.is_valid()) { +		RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_giprobe), 2); +		RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_giprobe), 3); +	} + +	RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ResolvePushConstant)); + +	RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.x, p_screen_size.y, 1, 8, 8, 1); + +	RD::get_singleton()->compute_list_end(); +} +  RasterizerEffectsRD::RasterizerEffectsRD() {  	{ // Initialize copy  		Vector<String> copy_modes; @@ -1228,6 +1256,7 @@ RasterizerEffectsRD::RasterizerEffectsRD() {  		Vector<String> copy_modes;  		copy_modes.push_back("\n");  		copy_modes.push_back("\n#define MODE_PANORAMA_TO_DP\n"); +		copy_modes.push_back("\n#define MODE_TWO_SOURCES\n");  		copy_to_fb.shader.initialize(copy_modes); @@ -1517,6 +1546,20 @@ RasterizerEffectsRD::RasterizerEffectsRD() {  		}  	} +	{ +		Vector<String> resolve_modes; +		resolve_modes.push_back("\n#define MODE_RESOLVE_GI\n"); +		resolve_modes.push_back("\n#define MODE_RESOLVE_GI\n#define GIPROBE_RESOLVE\n"); + +		resolve.shader.initialize(resolve_modes); + +		resolve.shader_version = resolve.shader.version_create(); + +		for (int i = 0; i < RESOLVE_MODE_MAX; i++) { +			resolve.pipelines[i] = RD::get_singleton()->compute_pipeline_create(resolve.shader.version_get_shader(resolve.shader_version, i)); +		} +	} +  	RD::SamplerState sampler;  	sampler.mag_filter = RD::SAMPLER_FILTER_LINEAR;  	sampler.min_filter = RD::SAMPLER_FILTER_LINEAR; @@ -1569,6 +1612,7 @@ RasterizerEffectsRD::~RasterizerEffectsRD() {  	cubemap_downsampler.shader.version_free(cubemap_downsampler.shader_version);  	filter.shader.version_free(filter.shader_version);  	luminance_reduce.shader.version_free(luminance_reduce.shader_version); +	resolve.shader.version_free(resolve.shader_version);  	roughness.shader.version_free(roughness.shader_version);  	roughness_limiter.shader.version_free(roughness_limiter.shader_version);  	specular_merge.shader.version_free(specular_merge.shader_version); diff --git a/servers/rendering/rasterizer_rd/rasterizer_effects_rd.h b/servers/rendering/rasterizer_rd/rasterizer_effects_rd.h index 8a55d2d13c..80849654de 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_effects_rd.h +++ b/servers/rendering/rasterizer_rd/rasterizer_effects_rd.h @@ -41,6 +41,7 @@  #include "servers/rendering/rasterizer_rd/shaders/cubemap_filter.glsl.gen.h"  #include "servers/rendering/rasterizer_rd/shaders/cubemap_roughness.glsl.gen.h"  #include "servers/rendering/rasterizer_rd/shaders/luminance_reduce.glsl.gen.h" +#include "servers/rendering/rasterizer_rd/shaders/resolve.glsl.gen.h"  #include "servers/rendering/rasterizer_rd/shaders/roughness_limiter.glsl.gen.h"  #include "servers/rendering/rasterizer_rd/shaders/screen_space_reflection.glsl.gen.h"  #include "servers/rendering/rasterizer_rd/shaders/screen_space_reflection_filter.glsl.gen.h" @@ -114,6 +115,7 @@ class RasterizerEffectsRD {  	enum CopyToFBMode {  		COPY_TO_FB_COPY,  		COPY_TO_FB_COPY_PANORAMA_TO_DP, +		COPY_TO_FB_COPY2,  		COPY_TO_FB_MAX,  	}; @@ -126,7 +128,8 @@ class RasterizerEffectsRD {  		uint32_t force_luminance;  		uint32_t alpha_to_zero; -		uint32_t pad[2]; +		uint32_t srgb; +		uint32_t pad;  	};  	struct CopyToFb { @@ -512,6 +515,25 @@ class RasterizerEffectsRD {  		RID pipelines[3]; //3 quality levels  	} sss; +	struct ResolvePushConstant { +		int32_t screen_size[2]; +		int32_t samples; +		uint32_t pad; +	}; + +	enum ResolveMode { +		RESOLVE_MODE_GI, +		RESOLVE_MODE_GI_GIPROBE, +		RESOLVE_MODE_MAX +	}; + +	struct Resolve { +		ResolvePushConstant push_constant; +		ResolveShaderRD shader; +		RID shader_version; +		RID pipelines[RESOLVE_MODE_MAX]; //3 quality levels +	} resolve; +  	RID default_sampler;  	RID default_mipmap_sampler;  	RID index_buffer; @@ -544,7 +566,7 @@ class RasterizerEffectsRD {  	RID _get_compute_uniform_set_from_image_pair(RID p_texture, RID p_texture2);  public: -	void copy_to_fb_rect(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2i &p_rect, bool p_flip_y = false, bool p_force_luminance = false, bool p_alpha_to_zero = false); +	void copy_to_fb_rect(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2i &p_rect, bool p_flip_y = false, bool p_force_luminance = false, bool p_alpha_to_zero = false, bool p_srgb = false, RID p_secondary = RID());  	void copy_to_rect(RID p_source_rd_texture, RID p_dest_texture, const Rect2i &p_rect, bool p_flip_y = false, bool p_force_luminance = false, bool p_all_source = false, bool p_8_bit_dst = false);  	void copy_cubemap_to_panorama(RID p_source_cube, RID p_dest_panorama, const Size2i &p_panorama_size, float p_lod, bool p_is_array);  	void copy_depth_to_rect(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2i &p_rect, bool p_flip_y = false); @@ -605,10 +627,12 @@ public:  	void cubemap_filter(RID p_source_cubemap, Vector<RID> p_dest_cubemap, bool p_use_array);  	void render_sky(RD::DrawListID p_list, float p_time, RID p_fb, RID p_samplers, RID p_lights, RenderPipelineVertexFormatCacheRD *p_pipeline, RID p_uniform_set, RID p_texture_set, const CameraMatrix &p_camera, const Basis &p_orientation, float p_multiplier, const Vector3 &p_position); -	void screen_space_reflection(RID p_diffuse, RID p_normal, RS::EnvironmentSSRRoughnessQuality p_roughness_quality, RID p_roughness, RID p_blur_radius, RID p_blur_radius2, RID p_metallic, const Color &p_metallic_mask, RID p_depth, RID p_scale_depth, RID p_scale_normal, RID p_output, RID p_output_blur, const Size2i &p_screen_size, int p_max_steps, float p_fade_in, float p_fade_out, float p_tolerance, const CameraMatrix &p_camera); +	void screen_space_reflection(RID p_diffuse, RID p_normal_roughness, RS::EnvironmentSSRRoughnessQuality p_roughness_quality, RID p_blur_radius, RID p_blur_radius2, RID p_metallic, const Color &p_metallic_mask, RID p_depth, RID p_scale_depth, RID p_scale_normal, RID p_output, RID p_output_blur, const Size2i &p_screen_size, int p_max_steps, float p_fade_in, float p_fade_out, float p_tolerance, const CameraMatrix &p_camera);  	void merge_specular(RID p_dest_framebuffer, RID p_specular, RID p_base, RID p_reflection);  	void sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_depth, const CameraMatrix &p_camera, const Size2i &p_screen_size, float p_scale, float p_depth_scale, RS::SubSurfaceScatteringQuality p_quality); +	void resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples); +  	RasterizerEffectsRD();  	~RasterizerEffectsRD();  }; diff --git a/servers/rendering/rasterizer_rd/rasterizer_rd.h b/servers/rendering/rasterizer_rd/rasterizer_rd.h index cb53a531ac..cdcc6bfd73 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_rd.h +++ b/servers/rendering/rasterizer_rd/rasterizer_rd.h @@ -74,6 +74,7 @@ public:  	_ALWAYS_INLINE_ uint64_t get_frame_number() const { return frame; }  	_ALWAYS_INLINE_ float get_frame_delta_time() const { return delta; } +	_ALWAYS_INLINE_ double get_total_time() const { return time; }  	static Error is_viable() {  		return OK; diff --git a/servers/rendering/rasterizer_rd/rasterizer_scene_high_end_rd.cpp b/servers/rendering/rasterizer_rd/rasterizer_scene_high_end_rd.cpp index 7d351f249a..890ada019f 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_scene_high_end_rd.cpp +++ b/servers/rendering/rasterizer_rd/rasterizer_scene_high_end_rd.cpp @@ -268,8 +268,8 @@ void RasterizerSceneHighEndRD::ShaderData::set_code(const String &p_code) {  	blend_state_blend.attachments.push_back(blend_attachment);  	RD::PipelineColorBlendState blend_state_opaque = RD::PipelineColorBlendState::create_disabled(1);  	RD::PipelineColorBlendState blend_state_opaque_specular = RD::PipelineColorBlendState::create_disabled(2); -	RD::PipelineColorBlendState blend_state_depth_normal = RD::PipelineColorBlendState::create_disabled(1); -	RD::PipelineColorBlendState blend_state_depth_normal_roughness = RD::PipelineColorBlendState::create_disabled(2); +	RD::PipelineColorBlendState blend_state_depth_normal_roughness = RD::PipelineColorBlendState::create_disabled(1); +	RD::PipelineColorBlendState blend_state_depth_normal_roughness_giprobe = RD::PipelineColorBlendState::create_disabled(2);  	//update pipelines @@ -310,12 +310,12 @@ void RasterizerSceneHighEndRD::ShaderData::set_code(const String &p_code) {  				RD::PipelineDepthStencilState depth_stencil = depth_stencil_state;  				if (uses_alpha || uses_blend_alpha) { -					if (k == SHADER_VERSION_COLOR_PASS || k == SHADER_VERSION_VCT_COLOR_PASS || k == SHADER_VERSION_LIGHTMAP_COLOR_PASS) { +					if (k == SHADER_VERSION_COLOR_PASS || k == SHADER_VERSION_COLOR_PASS_WITH_FORWARD_GI || k == SHADER_VERSION_LIGHTMAP_COLOR_PASS) {  						blend_state = blend_state_blend;  						if (depth_draw == DEPTH_DRAW_OPAQUE) {  							depth_stencil.enable_depth_write = false; //alpha does not draw depth  						} -					} else if (uses_depth_pre_pass && (k == SHADER_VERSION_DEPTH_PASS || k == SHADER_VERSION_DEPTH_PASS_DP || k == SHADER_VERSION_DEPTH_PASS_WITH_NORMAL || k == SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS || k == SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL)) { +					} else if (uses_depth_pre_pass && (k == SHADER_VERSION_DEPTH_PASS || k == SHADER_VERSION_DEPTH_PASS_DP || k == SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS || k == SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL)) {  						if (k == SHADER_VERSION_DEPTH_PASS || k == SHADER_VERSION_DEPTH_PASS_DP) {  							//none, blend state contains nothing  						} else if (k == SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL) { @@ -328,20 +328,23 @@ void RasterizerSceneHighEndRD::ShaderData::set_code(const String &p_code) {  						continue; // do not use this version (will error if using it is attempted)  					}  				} else { -					if (k == SHADER_VERSION_COLOR_PASS || k == SHADER_VERSION_VCT_COLOR_PASS || k == SHADER_VERSION_LIGHTMAP_COLOR_PASS) { +					if (k == SHADER_VERSION_COLOR_PASS || k == SHADER_VERSION_COLOR_PASS_WITH_FORWARD_GI || k == SHADER_VERSION_LIGHTMAP_COLOR_PASS) {  						blend_state = blend_state_opaque;  					} else if (k == SHADER_VERSION_DEPTH_PASS || k == SHADER_VERSION_DEPTH_PASS_DP) {  						//none, leave empty -					} else if (k == SHADER_VERSION_DEPTH_PASS_WITH_NORMAL) { -						blend_state = blend_state_depth_normal;  					} else if (k == SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS) {  						blend_state = blend_state_depth_normal_roughness; +					} else if (k == SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_GIPROBE) { +						blend_state = blend_state_depth_normal_roughness_giprobe;  					} else if (k == SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL) {  						blend_state = RD::PipelineColorBlendState::create_disabled(5); //writes to normal and roughness in opaque way - +					} else if (k == SHADER_VERSION_DEPTH_PASS_WITH_SDF) { +						blend_state = RD::PipelineColorBlendState(); //no color targets for SDF  					} else {  						//specular write  						blend_state = blend_state_opaque_specular; +						depth_stencil.enable_depth_test = false; +						depth_stencil.enable_depth_write = false;  					}  				} @@ -607,7 +610,78 @@ void RasterizerSceneHighEndRD::RenderBufferDataHighEnd::ensure_specular() {  	}  } +void RasterizerSceneHighEndRD::RenderBufferDataHighEnd::ensure_gi() { +	if (!reflection_buffer.is_valid()) { +		RD::TextureFormat tf; +		tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; +		tf.width = width; +		tf.height = height; +		tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + +		reflection_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); +		ambient_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); +	} +} + +void RasterizerSceneHighEndRD::RenderBufferDataHighEnd::ensure_giprobe() { +	if (!giprobe_buffer.is_valid()) { +		RD::TextureFormat tf; +		tf.format = RD::DATA_FORMAT_R8G8_UINT; +		tf.width = width; +		tf.height = height; +		tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT; + +		if (msaa != RS::VIEWPORT_MSAA_DISABLED) { +			RD::TextureFormat tf_aa = tf; +			tf_aa.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; +			tf_aa.samples = texture_samples; +			giprobe_buffer_msaa = RD::get_singleton()->texture_create(tf_aa, RD::TextureView()); +		} else { +			tf.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; +		} + +		tf.usage_bits |= RD::TEXTURE_USAGE_STORAGE_BIT; + +		giprobe_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); + +		Vector<RID> fb; +		if (msaa != RS::VIEWPORT_MSAA_DISABLED) { +			fb.push_back(depth_msaa); +			fb.push_back(normal_roughness_buffer_msaa); +			fb.push_back(giprobe_buffer_msaa); +		} else { +			fb.push_back(depth); +			fb.push_back(normal_roughness_buffer); +			fb.push_back(giprobe_buffer); +		} + +		depth_normal_roughness_giprobe_fb = RD::get_singleton()->framebuffer_create(fb); +	} +} +  void RasterizerSceneHighEndRD::RenderBufferDataHighEnd::clear() { +	if (ambient_buffer != RID() && ambient_buffer != color) { +		RD::get_singleton()->free(ambient_buffer); +		ambient_buffer = RID(); +	} + +	if (reflection_buffer != RID() && reflection_buffer != specular) { +		RD::get_singleton()->free(reflection_buffer); +		reflection_buffer = RID(); +	} + +	if (giprobe_buffer != RID()) { +		RD::get_singleton()->free(giprobe_buffer); +		giprobe_buffer = RID(); + +		if (giprobe_buffer_msaa.is_valid()) { +			RD::get_singleton()->free(giprobe_buffer_msaa); +			giprobe_buffer_msaa = RID(); +		} + +		depth_normal_roughness_giprobe_fb = RID(); +	} +  	if (color_msaa.is_valid()) {  		RD::get_singleton()->free(color_msaa);  		color_msaa = RID(); @@ -634,24 +708,18 @@ void RasterizerSceneHighEndRD::RenderBufferDataHighEnd::clear() {  	color_fb = RID();  	depth_fb = RID(); -	if (normal_buffer.is_valid()) { -		RD::get_singleton()->free(normal_buffer); -		if (normal_buffer_msaa.is_valid()) { -			RD::get_singleton()->free(normal_buffer_msaa); -			normal_buffer_msaa = RID(); +	if (normal_roughness_buffer.is_valid()) { +		RD::get_singleton()->free(normal_roughness_buffer); +		if (normal_roughness_buffer_msaa.is_valid()) { +			RD::get_singleton()->free(normal_roughness_buffer_msaa); +			normal_roughness_buffer_msaa = RID();  		} -		normal_buffer = RID(); -		depth_normal_fb = RID(); +		normal_roughness_buffer = RID(); +		depth_normal_roughness_fb = RID();  	} -	if (roughness_buffer.is_valid()) { -		RD::get_singleton()->free(roughness_buffer); -		if (roughness_buffer_msaa.is_valid()) { -			RD::get_singleton()->free(roughness_buffer_msaa); -			roughness_buffer_msaa = RID(); -		} -		roughness_buffer = RID(); -		depth_normal_roughness_fb = RID(); +	if (!render_sdfgi_uniform_set.is_null() && RD::get_singleton()->uniform_set_is_valid(render_sdfgi_uniform_set)) { +		RD::get_singleton()->free(render_sdfgi_uniform_set);  	}  } @@ -686,7 +754,7 @@ void RasterizerSceneHighEndRD::RenderBufferDataHighEnd::configure(RID p_color_bu  		tf.width = p_width;  		tf.height = p_height;  		tf.type = RD::TEXTURE_TYPE_2D; -		tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; +		tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT;  		RD::TextureSamples ts[RS::VIEWPORT_MSAA_MAX] = {  			RD::TEXTURE_SAMPLES_1, @@ -702,7 +770,7 @@ void RasterizerSceneHighEndRD::RenderBufferDataHighEnd::configure(RID p_color_bu  		color_msaa = RD::get_singleton()->texture_create(tf, RD::TextureView());  		tf.format = RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_D24_UNORM_S8_UINT, RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ? RD::DATA_FORMAT_D24_UNORM_S8_UINT : RD::DATA_FORMAT_D32_SFLOAT_S8_UINT; -		tf.usage_bits = RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; +		tf.usage_bits = RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT;  		depth_msaa = RD::get_singleton()->texture_create(tf, RD::TextureView()); @@ -722,80 +790,38 @@ void RasterizerSceneHighEndRD::RenderBufferDataHighEnd::configure(RID p_color_bu  	}  } -void RasterizerSceneHighEndRD::_allocate_normal_texture(RenderBufferDataHighEnd *rb) { -	if (rb->normal_buffer.is_valid()) { +void RasterizerSceneHighEndRD::_allocate_normal_roughness_texture(RenderBufferDataHighEnd *rb) { +	if (rb->normal_roughness_buffer.is_valid()) {  		return;  	}  	RD::TextureFormat tf; -	tf.format = RD::DATA_FORMAT_A2B10G10R10_UNORM_PACK32; +	tf.format = RD::DATA_FORMAT_R8G8B8A8_UNORM;  	tf.width = rb->width;  	tf.height = rb->height;  	tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT;  	if (rb->msaa != RS::VIEWPORT_MSAA_DISABLED) { -		tf.usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; -	} else { -		tf.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; -	} - -	rb->normal_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); - -	if (rb->msaa == RS::VIEWPORT_MSAA_DISABLED) { -		Vector<RID> fb; -		fb.push_back(rb->depth); -		fb.push_back(rb->normal_buffer); -		rb->depth_normal_fb = RD::get_singleton()->framebuffer_create(fb); -	} else { -		tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; -		tf.samples = rb->texture_samples; -		rb->normal_buffer_msaa = RD::get_singleton()->texture_create(tf, RD::TextureView()); - -		Vector<RID> fb; -		fb.push_back(rb->depth_msaa); -		fb.push_back(rb->normal_buffer_msaa); -		rb->depth_normal_fb = RD::get_singleton()->framebuffer_create(fb); -	} - -	_render_buffers_clear_uniform_set(rb); -} - -void RasterizerSceneHighEndRD::_allocate_roughness_texture(RenderBufferDataHighEnd *rb) { -	if (rb->roughness_buffer.is_valid()) { -		return; -	} - -	ERR_FAIL_COND(rb->normal_buffer.is_null()); - -	RD::TextureFormat tf; -	tf.format = RD::DATA_FORMAT_R8_UNORM; -	tf.width = rb->width; -	tf.height = rb->height; -	tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; - -	if (rb->msaa != RS::VIEWPORT_MSAA_DISABLED) { -		tf.usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; +		tf.usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_STORAGE_BIT;  	} else {  		tf.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT;  	} -	rb->roughness_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); +	rb->normal_roughness_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView());  	if (rb->msaa == RS::VIEWPORT_MSAA_DISABLED) {  		Vector<RID> fb;  		fb.push_back(rb->depth); -		fb.push_back(rb->normal_buffer); -		fb.push_back(rb->roughness_buffer); +		fb.push_back(rb->normal_roughness_buffer);  		rb->depth_normal_roughness_fb = RD::get_singleton()->framebuffer_create(fb);  	} else { -		tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; +		tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT;  		tf.samples = rb->texture_samples; -		rb->roughness_buffer_msaa = RD::get_singleton()->texture_create(tf, RD::TextureView()); +		rb->normal_roughness_buffer_msaa = RD::get_singleton()->texture_create(tf, RD::TextureView());  		Vector<RID> fb;  		fb.push_back(rb->depth_msaa); -		fb.push_back(rb->normal_buffer_msaa); -		fb.push_back(rb->roughness_buffer_msaa); +		fb.push_back(rb->normal_roughness_buffer_msaa);  		rb->depth_normal_roughness_fb = RD::get_singleton()->framebuffer_create(fb);  	} @@ -813,7 +839,7 @@ bool RasterizerSceneHighEndRD::free(RID p_rid) {  	return false;  } -void RasterizerSceneHighEndRD::_fill_instances(RenderList::Element **p_elements, int p_element_count, bool p_for_depth) { +void RasterizerSceneHighEndRD::_fill_instances(RenderList::Element **p_elements, int p_element_count, bool p_for_depth, bool p_has_sdfgi, bool p_has_opaque_gi) {  	uint32_t lightmap_captures_used = 0;  	for (int i = 0; i < p_element_count; i++) { @@ -855,37 +881,7 @@ void RasterizerSceneHighEndRD::_fill_instances(RenderList::Element **p_elements,  			continue;  		} -		if (!e->instance->gi_probe_instances.empty()) { -			uint32_t written = 0; -			for (int j = 0; j < e->instance->gi_probe_instances.size(); j++) { -				RID probe = e->instance->gi_probe_instances[j]; -				int slot = gi_probe_instance_get_slot(probe); -				if (slot < 0) { -					continue; //unallocated, dont render -				} - -				if (render_pass != gi_probe_instance_get_render_pass(probe)) { -					continue; //not rendered in this frame -				} - -				uint32_t index = gi_probe_instance_get_render_index(probe); - -				if (written == 0) { -					id.gi_offset = index; -					id.flags |= INSTANCE_DATA_FLAG_USE_GIPROBE; -					written = 1; -				} else { -					id.gi_offset = index << 16; -					written = 2; -					break; -				} -			} -			if (written == 0) { -				id.gi_offset = 0xFFFFFFFF; -			} else if (written == 1) { -				id.gi_offset |= 0xFFFF0000; -			} -		} else if (e->instance->lightmap) { +		if (e->instance->lightmap) {  			int32_t lightmap_index = storage->lightmap_get_array_index(e->instance->lightmap->base);  			if (lightmap_index >= 0) {  				id.gi_offset = lightmap_index; @@ -916,8 +912,40 @@ void RasterizerSceneHighEndRD::_fill_instances(RenderList::Element **p_elements,  				id.gi_offset = lightmap_captures_used;  				lightmap_captures_used++;  			} +  		} else { -			id.gi_offset = 0xFFFFFFFF; +			if (p_has_opaque_gi) { +				id.flags |= INSTANCE_DATA_FLAG_USE_GI_BUFFERS; +			} + +			if (!e->instance->gi_probe_instances.empty()) { +				uint32_t written = 0; +				for (int j = 0; j < e->instance->gi_probe_instances.size(); j++) { +					RID probe = e->instance->gi_probe_instances[j]; + +					uint32_t index = gi_probe_instance_get_render_index(probe); + +					if (written == 0) { +						id.gi_offset = index; +						id.flags |= INSTANCE_DATA_FLAG_USE_GIPROBE; +						written = 1; +					} else { +						id.gi_offset = index << 16; +						written = 2; +						break; +					} +				} +				if (written == 0) { +					id.gi_offset = 0xFFFFFFFF; +				} else if (written == 1) { +					id.gi_offset |= 0xFFFF0000; +				} +			} else { +				if (p_has_sdfgi && (e->instance->baked_light || e->instance->dynamic_gi)) { +					id.flags |= INSTANCE_DATA_FLAG_USE_SDFGI; +				} +				id.gi_offset = 0xFFFFFFFF; +			}  		}  	} @@ -970,7 +998,7 @@ void RasterizerSceneHighEndRD::_render_list(RenderingDevice::DrawListID p_draw_l  		//find cull variant  		ShaderData::CullVariant cull_variant; -		if (p_pass_mode == PASS_MODE_DEPTH_MATERIAL || ((p_pass_mode == PASS_MODE_SHADOW || p_pass_mode == PASS_MODE_SHADOW_DP) && e->instance->cast_shadows == RS::SHADOW_CASTING_SETTING_DOUBLE_SIDED)) { +		if (p_pass_mode == PASS_MODE_DEPTH_MATERIAL || p_pass_mode == PASS_MODE_SDF || ((p_pass_mode == PASS_MODE_SHADOW || p_pass_mode == PASS_MODE_SHADOW_DP) && e->instance->cast_shadows == RS::SHADOW_CASTING_SETTING_DOUBLE_SIDED)) {  			cull_variant = ShaderData::CULL_VARIANT_DOUBLE_SIDED;  		} else {  			bool mirror = e->instance->mirror; @@ -1016,8 +1044,8 @@ void RasterizerSceneHighEndRD::_render_list(RenderingDevice::DrawListID p_draw_l  			case PASS_MODE_COLOR_TRANSPARENT: {  				if (e->uses_lightmap) {  					shader_version = SHADER_VERSION_LIGHTMAP_COLOR_PASS; -				} else if (e->uses_vct) { -					shader_version = SHADER_VERSION_VCT_COLOR_PASS; +				} else if (e->uses_forward_gi) { +					shader_version = SHADER_VERSION_COLOR_PASS_WITH_FORWARD_GI;  				} else {  					shader_version = SHADER_VERSION_COLOR_PASS;  				} @@ -1025,8 +1053,6 @@ void RasterizerSceneHighEndRD::_render_list(RenderingDevice::DrawListID p_draw_l  			case PASS_MODE_COLOR_SPECULAR: {  				if (e->uses_lightmap) {  					shader_version = SHADER_VERSION_LIGHTMAP_COLOR_PASS_WITH_SEPARATE_SPECULAR; -				} else if (e->uses_vct) { -					shader_version = SHADER_VERSION_VCT_COLOR_PASS_WITH_SEPARATE_SPECULAR;  				} else {  					shader_version = SHADER_VERSION_COLOR_PASS_WITH_SEPARATE_SPECULAR;  				} @@ -1038,15 +1064,18 @@ void RasterizerSceneHighEndRD::_render_list(RenderingDevice::DrawListID p_draw_l  			case PASS_MODE_SHADOW_DP: {  				shader_version = SHADER_VERSION_DEPTH_PASS_DP;  			} break; -			case PASS_MODE_DEPTH_NORMAL: { -				shader_version = SHADER_VERSION_DEPTH_PASS_WITH_NORMAL; -			} break;  			case PASS_MODE_DEPTH_NORMAL_ROUGHNESS: {  				shader_version = SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS;  			} break; +			case PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE: { +				shader_version = SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_GIPROBE; +			} break;  			case PASS_MODE_DEPTH_MATERIAL: {  				shader_version = SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL;  			} break; +			case PASS_MODE_SDF: { +				shader_version = SHADER_VERSION_DEPTH_PASS_WITH_SDF; +			} break;  		}  		RenderPipelineVertexFormatCacheRD *pipeline = nullptr; @@ -1134,7 +1163,7 @@ void RasterizerSceneHighEndRD::_render_list(RenderingDevice::DrawListID p_draw_l  	}  } -void RasterizerSceneHighEndRD::_setup_environment(RID p_environment, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2 &p_screen_pixel_size, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers, bool p_pancake_shadows) { +void RasterizerSceneHighEndRD::_setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2 &p_screen_pixel_size, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers, bool p_pancake_shadows) {  	//CameraMatrix projection = p_cam_projection;  	//projection.flip_y(); // Vulkan and modern APIs use Y-Down  	CameraMatrix correction; @@ -1178,6 +1207,70 @@ void RasterizerSceneHighEndRD::_setup_environment(RID p_environment, const Camer  	//time global variables  	scene_state.ubo.time = time; +	scene_state.ubo.gi_upscale_for_msaa = false; + +	if (p_render_buffers.is_valid()) { +		RenderBufferDataHighEnd *render_buffers = (RenderBufferDataHighEnd *)render_buffers_get_data(p_render_buffers); +		if (render_buffers->msaa != RS::VIEWPORT_MSAA_DISABLED) { +			scene_state.ubo.gi_upscale_for_msaa = true; +		} +	} +#if 0 +	if (p_render_buffers.is_valid() && render_buffers_is_sdfgi_enabled(p_render_buffers)) { + +		scene_state.ubo.sdfgi_cascade_count = render_buffers_get_sdfgi_cascade_count(p_render_buffers); +		scene_state.ubo.sdfgi_probe_axis_size = render_buffers_get_sdfgi_cascade_probe_count(p_render_buffers); +		scene_state.ubo.sdfgi_cascade_probe_size[0] = scene_state.ubo.sdfgi_probe_axis_size - 1; //float version for performance +		scene_state.ubo.sdfgi_cascade_probe_size[1] = scene_state.ubo.sdfgi_probe_axis_size - 1; +		scene_state.ubo.sdfgi_cascade_probe_size[2] = scene_state.ubo.sdfgi_probe_axis_size - 1; + +		float csize = render_buffers_get_sdfgi_cascade_size(p_render_buffers); +		scene_state.ubo.sdfgi_probe_to_uvw = 1.0 / float(scene_state.ubo.sdfgi_cascade_probe_size[0]); +		float occ_bias = 0.0; +		scene_state.ubo.sdfgi_occlusion_bias = occ_bias / csize; +		scene_state.ubo.sdfgi_use_occlusion = render_buffers_is_sdfgi_using_occlusion(p_render_buffers); +		scene_state.ubo.sdfgi_energy = render_buffers_get_sdfgi_energy(p_render_buffers); + +		float cascade_voxel_size = (csize / scene_state.ubo.sdfgi_cascade_probe_size[0]); +		float occlusion_clamp = (cascade_voxel_size - 0.5) / cascade_voxel_size; +		scene_state.ubo.sdfgi_occlusion_clamp[0] = occlusion_clamp; +		scene_state.ubo.sdfgi_occlusion_clamp[1] = occlusion_clamp; +		scene_state.ubo.sdfgi_occlusion_clamp[2] = occlusion_clamp; +		scene_state.ubo.sdfgi_normal_bias = (render_buffers_get_sdfgi_normal_bias(p_render_buffers) / csize) * scene_state.ubo.sdfgi_cascade_probe_size[0]; + +		//vec2 tex_pixel_size = 1.0 / vec2(ivec2( (OCT_SIZE+2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE+2) * params.probe_axis_size ) ); +		//vec3 probe_uv_offset = (ivec3(OCT_SIZE+2,OCT_SIZE+2,(OCT_SIZE+2) * params.probe_axis_size)) * tex_pixel_size.xyx; + +		uint32_t oct_size = sdfgi_get_lightprobe_octahedron_size(); + +		scene_state.ubo.sdfgi_lightprobe_tex_pixel_size[0] = 1.0 / ((oct_size + 2) * scene_state.ubo.sdfgi_probe_axis_size * scene_state.ubo.sdfgi_probe_axis_size); +		scene_state.ubo.sdfgi_lightprobe_tex_pixel_size[1] = 1.0 / ((oct_size + 2) * scene_state.ubo.sdfgi_probe_axis_size); +		scene_state.ubo.sdfgi_lightprobe_tex_pixel_size[2] = 1.0; + +		scene_state.ubo.sdfgi_probe_uv_offset[0] = float(oct_size + 2) * scene_state.ubo.sdfgi_lightprobe_tex_pixel_size[0]; +		scene_state.ubo.sdfgi_probe_uv_offset[1] = float(oct_size + 2) * scene_state.ubo.sdfgi_lightprobe_tex_pixel_size[1]; +		scene_state.ubo.sdfgi_probe_uv_offset[2] = float((oct_size + 2) * scene_state.ubo.sdfgi_probe_axis_size) * scene_state.ubo.sdfgi_lightprobe_tex_pixel_size[0]; + +		scene_state.ubo.sdfgi_occlusion_renormalize[0] = 0.5; +		scene_state.ubo.sdfgi_occlusion_renormalize[1] = 1.0; +		scene_state.ubo.sdfgi_occlusion_renormalize[2] = 1.0 / float(scene_state.ubo.sdfgi_cascade_count); + +		for (uint32_t i = 0; i < scene_state.ubo.sdfgi_cascade_count; i++) { +			SceneState::UBO::SDFGICascade &c = scene_state.ubo.sdfgi_cascades[i]; +			Vector3 pos = render_buffers_get_sdfgi_cascade_offset(p_render_buffers, i); +			pos -= p_cam_transform.origin; //make pos local to camera, to reduce numerical error +			c.position[0] = pos.x; +			c.position[1] = pos.y; +			c.position[2] = pos.z; +			c.to_probe = 1.0 / render_buffers_get_sdfgi_cascade_probe_size(p_render_buffers, i); + +			Vector3i probe_ofs = render_buffers_get_sdfgi_cascade_probe_offset(p_render_buffers, i); +			c.probe_world_offset[0] = probe_ofs.x; +			c.probe_world_offset[1] = probe_ofs.y; +			c.probe_world_offset[2] = probe_ofs.z; +		} +	} +#endif  	if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_UNSHADED) {  		scene_state.ubo.use_ambient_light = true;  		scene_state.ubo.ambient_light_color_energy[0] = 1; @@ -1260,11 +1353,13 @@ void RasterizerSceneHighEndRD::_setup_environment(RID p_environment, const Camer  	}  	scene_state.ubo.roughness_limiter_enabled = p_opaque_render_buffers && screen_space_roughness_limiter_is_active(); +	scene_state.ubo.roughness_limiter_amount = screen_space_roughness_limiter_get_amount(); +	scene_state.ubo.roughness_limiter_limit = screen_space_roughness_limiter_get_limit();  	RD::get_singleton()->buffer_update(scene_state.uniform_buffer, 0, sizeof(SceneState::UBO), &scene_state.ubo, true);  } -void RasterizerSceneHighEndRD::_add_geometry(InstanceBase *p_instance, uint32_t p_surface, RID p_material, PassMode p_pass_mode, uint32_t p_geometry_index) { +void RasterizerSceneHighEndRD::_add_geometry(InstanceBase *p_instance, uint32_t p_surface, RID p_material, PassMode p_pass_mode, uint32_t p_geometry_index, bool p_using_sdfgi) {  	RID m_src;  	m_src = p_instance->material_override.is_valid() ? p_instance->material_override : p_material; @@ -1293,18 +1388,18 @@ void RasterizerSceneHighEndRD::_add_geometry(InstanceBase *p_instance, uint32_t  	ERR_FAIL_COND(!material); -	_add_geometry_with_material(p_instance, p_surface, material, m_src, p_pass_mode, p_geometry_index); +	_add_geometry_with_material(p_instance, p_surface, material, m_src, p_pass_mode, p_geometry_index, p_using_sdfgi);  	while (material->next_pass.is_valid()) {  		material = (MaterialData *)storage->material_get_data(material->next_pass, RasterizerStorageRD::SHADER_TYPE_3D);  		if (!material || !material->shader_data->valid) {  			break; +			_add_geometry_with_material(p_instance, p_surface, material, material->next_pass, p_pass_mode, p_geometry_index, p_using_sdfgi);  		} -		_add_geometry_with_material(p_instance, p_surface, material, material->next_pass, p_pass_mode, p_geometry_index);  	}  } -void RasterizerSceneHighEndRD::_add_geometry_with_material(InstanceBase *p_instance, uint32_t p_surface, MaterialData *p_material, RID p_material_rid, PassMode p_pass_mode, uint32_t p_geometry_index) { +void RasterizerSceneHighEndRD::_add_geometry_with_material(InstanceBase *p_instance, uint32_t p_surface, MaterialData *p_material, RID p_material_rid, PassMode p_pass_mode, uint32_t p_geometry_index, bool p_using_sdfgi) {  	bool has_read_screen_alpha = p_material->shader_data->uses_screen_texture || p_material->shader_data->uses_depth_texture || p_material->shader_data->uses_normal_texture;  	bool has_base_alpha = (p_material->shader_data->uses_alpha || has_read_screen_alpha);  	bool has_blend_alpha = p_material->shader_data->uses_blend_alpha; @@ -1332,13 +1427,11 @@ void RasterizerSceneHighEndRD::_add_geometry_with_material(InstanceBase *p_insta  			return;  		} -		if (p_pass_mode != PASS_MODE_DEPTH_MATERIAL && !p_material->shader_data->writes_modelview_or_projection && !p_material->shader_data->uses_vertex && !p_material->shader_data->uses_discard && !p_material->shader_data->uses_depth_pre_pass) { +		if ((p_pass_mode != PASS_MODE_DEPTH_MATERIAL && p_pass_mode != PASS_MODE_SDF) && !p_material->shader_data->writes_modelview_or_projection && !p_material->shader_data->uses_vertex && !p_material->shader_data->uses_discard && !p_material->shader_data->uses_depth_pre_pass) {  			//shader does not use discard and does not write a vertex position, use generic material  			if (p_pass_mode == PASS_MODE_SHADOW || p_pass_mode == PASS_MODE_DEPTH) {  				p_material = (MaterialData *)storage->material_get_data(default_material, RasterizerStorageRD::SHADER_TYPE_3D); -			} else if (p_pass_mode == PASS_MODE_DEPTH_NORMAL && !p_material->shader_data->uses_normal) { -				p_material = (MaterialData *)storage->material_get_data(default_material, RasterizerStorageRD::SHADER_TYPE_3D); -			} else if (p_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS && !p_material->shader_data->uses_normal && !p_material->shader_data->uses_roughness) { +			} else if ((p_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS || p_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE) && !p_material->shader_data->uses_normal && !p_material->shader_data->uses_roughness) {  				p_material = (MaterialData *)storage->material_get_data(default_material, RasterizerStorageRD::SHADER_TYPE_3D);  			}  		} @@ -1346,7 +1439,9 @@ void RasterizerSceneHighEndRD::_add_geometry_with_material(InstanceBase *p_insta  		has_alpha = false;  	} -	RenderList::Element *e = (has_alpha || p_material->shader_data->depth_test == ShaderData::DEPTH_TEST_DISABLED) ? render_list.add_alpha_element() : render_list.add_element(); +	has_alpha = has_alpha || p_material->shader_data->depth_test == ShaderData::DEPTH_TEST_DISABLED; + +	RenderList::Element *e = has_alpha ? render_list.add_alpha_element() : render_list.add_element();  	if (!e) {  		return; @@ -1373,7 +1468,7 @@ void RasterizerSceneHighEndRD::_add_geometry_with_material(InstanceBase *p_insta  	e->material_index = e->material->index;  	e->uses_instancing = e->instance->base_type == RS::INSTANCE_MULTIMESH;  	e->uses_lightmap = e->instance->lightmap != nullptr || !e->instance->lightmap_sh.empty(); -	e->uses_vct = e->instance->gi_probe_instances.size(); +	e->uses_forward_gi = has_alpha && (e->instance->gi_probe_instances.size() || p_using_sdfgi);  	e->shader_index = e->shader_index;  	e->depth_layer = e->instance->depth_layer;  	e->priority = p_material->priority; @@ -1383,7 +1478,7 @@ void RasterizerSceneHighEndRD::_add_geometry_with_material(InstanceBase *p_insta  	}  } -void RasterizerSceneHighEndRD::_fill_render_list(InstanceBase **p_cull_result, int p_cull_count, PassMode p_pass_mode, bool p_no_gi) { +void RasterizerSceneHighEndRD::_fill_render_list(InstanceBase **p_cull_result, int p_cull_count, PassMode p_pass_mode, bool p_using_sdfgi) {  	scene_state.current_shader_index = 0;  	scene_state.current_material_index = 0;  	scene_state.used_sss = false; @@ -1415,7 +1510,7 @@ void RasterizerSceneHighEndRD::_fill_render_list(InstanceBase **p_cull_result, i  					RID material = inst_materials[j].is_valid() ? inst_materials[j] : materials[j];  					uint32_t surface_index = storage->mesh_surface_get_render_pass_index(inst->base, j, render_pass, &geometry_index); -					_add_geometry(inst, j, material, p_pass_mode, surface_index); +					_add_geometry(inst, j, material, p_pass_mode, surface_index, p_using_sdfgi);  				}  				//mesh->last_pass=frame; @@ -1443,7 +1538,7 @@ void RasterizerSceneHighEndRD::_fill_render_list(InstanceBase **p_cull_result, i  				for (uint32_t j = 0; j < surface_count; j++) {  					uint32_t surface_index = storage->mesh_surface_get_multimesh_render_pass_index(mesh, j, render_pass, &geometry_index); -					_add_geometry(inst, j, materials[j], p_pass_mode, surface_index); +					_add_geometry(inst, j, materials[j], p_pass_mode, surface_index, p_using_sdfgi);  				}  			} break; @@ -1525,30 +1620,13 @@ void RasterizerSceneHighEndRD::_setup_reflections(RID *p_reflection_probe_cull_r  		reflection_ubo.params[2] = interior ? 1.0 : 0.0;  		reflection_ubo.params[3] = box_projection ? 1.0 : 0.0; -		if (interior) { -			Color ambient_linear = storage->reflection_probe_get_interior_ambient(base_probe).to_linear(); -			float interior_ambient_energy = storage->reflection_probe_get_interior_ambient_energy(base_probe); -			float interior_ambient_probe_contrib = storage->reflection_probe_get_interior_ambient_probe_contribution(base_probe); -			reflection_ubo.ambient[0] = ambient_linear.r * interior_ambient_energy; -			reflection_ubo.ambient[1] = ambient_linear.g * interior_ambient_energy; -			reflection_ubo.ambient[2] = ambient_linear.b * interior_ambient_energy; -			reflection_ubo.ambient[3] = interior_ambient_probe_contrib; -		} else { -			Color ambient_linear = storage->reflection_probe_get_interior_ambient(base_probe).to_linear(); -			if (is_environment(p_environment)) { -				Color env_ambient_color = environment_get_ambient_light_color(p_environment).to_linear(); -				float env_ambient_energy = environment_get_ambient_light_energy(p_environment); -				ambient_linear = env_ambient_color; -				ambient_linear.r *= env_ambient_energy; -				ambient_linear.g *= env_ambient_energy; -				ambient_linear.b *= env_ambient_energy; -			} - -			reflection_ubo.ambient[0] = ambient_linear.r; -			reflection_ubo.ambient[1] = ambient_linear.g; -			reflection_ubo.ambient[2] = ambient_linear.b; -			reflection_ubo.ambient[3] = 0; //not used in exterior mode, since it just blends with regular ambient light -		} +		Color ambient_linear = storage->reflection_probe_get_ambient_color(base_probe).to_linear(); +		float interior_ambient_energy = storage->reflection_probe_get_ambient_color_energy(base_probe); +		uint32_t ambient_mode = storage->reflection_probe_get_ambient_mode(base_probe); +		reflection_ubo.ambient[0] = ambient_linear.r * interior_ambient_energy; +		reflection_ubo.ambient[1] = ambient_linear.g * interior_ambient_energy; +		reflection_ubo.ambient[2] = ambient_linear.b * interior_ambient_energy; +		reflection_ubo.ambient_mode = ambient_mode;  		Transform transform = reflection_probe_instance_get_transform(rpi);  		Transform proj = (p_camera_inverse_transform * transform).inverse(); @@ -1583,59 +1661,6 @@ void RasterizerSceneHighEndRD::_setup_lightmaps(InstanceBase **p_lightmap_cull_r  	}  } -void RasterizerSceneHighEndRD::_setup_gi_probes(RID *p_gi_probe_probe_cull_result, int p_gi_probe_probe_cull_count, const Transform &p_camera_transform) { -	int index = 0; - -	for (int i = 0; i < p_gi_probe_probe_cull_count; i++) { -		RID rpi = p_gi_probe_probe_cull_result[i]; - -		if (index >= (int)scene_state.max_gi_probes) { -			continue; -		} - -		int slot = gi_probe_instance_get_slot(rpi); -		if (slot < 0) { -			continue; //not usable -		} - -		RID base_probe = gi_probe_instance_get_base_probe(rpi); - -		GIProbeData &gi_probe_ubo = scene_state.gi_probes[index]; - -		Transform to_cell = gi_probe_instance_get_transform_to_cell(rpi) * p_camera_transform; - -		store_transform(to_cell, gi_probe_ubo.xform); - -		Vector3 bounds = storage->gi_probe_get_octree_size(base_probe); - -		gi_probe_ubo.bounds[0] = bounds.x; -		gi_probe_ubo.bounds[1] = bounds.y; -		gi_probe_ubo.bounds[2] = bounds.z; - -		gi_probe_ubo.dynamic_range = storage->gi_probe_get_dynamic_range(base_probe) * storage->gi_probe_get_energy(base_probe); -		gi_probe_ubo.bias = storage->gi_probe_get_bias(base_probe); -		gi_probe_ubo.normal_bias = storage->gi_probe_get_normal_bias(base_probe); -		gi_probe_ubo.blend_ambient = !storage->gi_probe_is_interior(base_probe); -		gi_probe_ubo.texture_slot = gi_probe_instance_get_slot(rpi); -		gi_probe_ubo.anisotropy_strength = storage->gi_probe_get_anisotropy_strength(base_probe); -		gi_probe_ubo.ao = storage->gi_probe_get_ao(base_probe); -		gi_probe_ubo.ao_size = Math::pow(storage->gi_probe_get_ao_size(base_probe), 4.0f); - -		if (gi_probe_is_anisotropic()) { -			gi_probe_ubo.texture_slot *= 3; -		} - -		gi_probe_instance_set_render_index(rpi, index); -		gi_probe_instance_set_render_pass(rpi, render_pass); - -		index++; -	} - -	if (index) { -		RD::get_singleton()->buffer_update(scene_state.gi_probe_buffer, 0, index * sizeof(GIProbeData), scene_state.gi_probes, true); -	} -} -  void RasterizerSceneHighEndRD::_setup_lights(RID *p_light_cull_result, int p_light_cull_count, const Transform &p_camera_inverse_transform, RID p_shadow_atlas, bool p_using_shadows) {  	uint32_t light_count = 0;  	scene_state.ubo.directional_light_count = 0; @@ -2156,6 +2181,8 @@ void RasterizerSceneHighEndRD::_render_scene(RID p_render_buffer, const Transfor  	Vector<Color> depth_pass_clear;  	bool using_separate_specular = false;  	bool using_ssr = false; +	bool using_sdfgi = false; +	bool using_giprobe = false;  	if (render_buffer) {  		screen_pixel_size.width = 1.0 / render_buffer->width; @@ -2165,43 +2192,55 @@ void RasterizerSceneHighEndRD::_render_scene(RID p_render_buffer, const Transfor  		opaque_framebuffer = render_buffer->color_fb; -		if (p_environment.is_valid() && environment_is_ssr_enabled(p_environment)) { -			depth_pass_mode = PASS_MODE_DEPTH_NORMAL_ROUGHNESS; -			render_buffer->ensure_specular(); -			using_separate_specular = true; -			using_ssr = true; -			opaque_specular_framebuffer = render_buffer->color_specular_fb; -		} else if (screen_space_roughness_limiter_is_active()) { -			depth_pass_mode = PASS_MODE_DEPTH_NORMAL; -			//we need to allocate both these, if not allocated -			_allocate_normal_texture(render_buffer); -			_allocate_roughness_texture(render_buffer); +		if (p_gi_probe_cull_count > 0) { +			using_giprobe = true; +			render_buffer->ensure_gi(); +		} + +		if (!p_environment.is_valid() && using_giprobe) { +			depth_pass_mode = PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE; + +		} else if (p_environment.is_valid() && (environment_is_ssr_enabled(p_environment) || environment_is_sdfgi_enabled(p_environment) || using_giprobe)) { +			if (environment_is_sdfgi_enabled(p_environment)) { +				depth_pass_mode = using_giprobe ? PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE : PASS_MODE_DEPTH_NORMAL_ROUGHNESS; // also giprobe +				using_sdfgi = true; +				render_buffer->ensure_gi(); +			} else { +				depth_pass_mode = using_giprobe ? PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE : PASS_MODE_DEPTH_NORMAL_ROUGHNESS; +			} + +			if (environment_is_ssr_enabled(p_environment)) { +				render_buffer->ensure_specular(); +				using_separate_specular = true; +				using_ssr = true; +				opaque_specular_framebuffer = render_buffer->color_specular_fb; +			} +  		} else if (p_environment.is_valid() && (environment_is_ssao_enabled(p_environment) || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_NORMAL_BUFFER)) { -			depth_pass_mode = PASS_MODE_DEPTH_NORMAL; +			depth_pass_mode = PASS_MODE_DEPTH_NORMAL_ROUGHNESS;  		}  		switch (depth_pass_mode) {  			case PASS_MODE_DEPTH: {  				depth_framebuffer = render_buffer->depth_fb;  			} break; -			case PASS_MODE_DEPTH_NORMAL: { -				_allocate_normal_texture(render_buffer); -				depth_framebuffer = render_buffer->depth_normal_fb; -				depth_pass_clear.push_back(Color(0.5, 0.5, 0.5, 0)); -			} break;  			case PASS_MODE_DEPTH_NORMAL_ROUGHNESS: { -				_allocate_normal_texture(render_buffer); -				_allocate_roughness_texture(render_buffer); +				_allocate_normal_roughness_texture(render_buffer);  				depth_framebuffer = render_buffer->depth_normal_roughness_fb;  				depth_pass_clear.push_back(Color(0.5, 0.5, 0.5, 0)); -				depth_pass_clear.push_back(Color()); +			} break; +			case PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE: { +				_allocate_normal_roughness_texture(render_buffer); +				render_buffer->ensure_giprobe(); +				depth_framebuffer = render_buffer->depth_normal_roughness_giprobe_fb; +				depth_pass_clear.push_back(Color(0.5, 0.5, 0.5, 0)); +				depth_pass_clear.push_back(Color(0, 0, 0, 0));  			} break;  			default: {  			};  		}  		alpha_framebuffer = opaque_framebuffer; -  	} else if (p_reflection_probe.is_valid()) {  		uint32_t resolution = reflection_probe_instance_get_resolution(p_reflection_probe);  		screen_pixel_size.width = 1.0 / resolution; @@ -2216,7 +2255,6 @@ void RasterizerSceneHighEndRD::_render_scene(RID p_render_buffer, const Transfor  		if (storage->reflection_probe_is_interior(reflection_probe_instance_get_probe(p_reflection_probe))) {  			p_environment = RID(); //no environment on interiors  		} -  	} else {  		ERR_FAIL(); //bug?  	} @@ -2226,16 +2264,15 @@ void RasterizerSceneHighEndRD::_render_scene(RID p_render_buffer, const Transfor  	_setup_lights(p_light_cull_result, p_light_cull_count, p_cam_transform.affine_inverse(), p_shadow_atlas, using_shadows);  	_setup_decals(p_decal_cull_result, p_decal_cull_count, p_cam_transform.affine_inverse());  	_setup_reflections(p_reflection_probe_cull_result, p_reflection_probe_cull_count, p_cam_transform.affine_inverse(), p_environment); -	_setup_gi_probes(p_gi_probe_cull_result, p_gi_probe_cull_count, p_cam_transform);  	_setup_lightmaps(p_lightmap_cull_result, p_lightmap_cull_count, p_cam_transform); -	_setup_environment(p_environment, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); +	_setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false);  	cluster_builder.bake_cluster(); //bake to cluster  	_update_render_base_uniform_set(); //may have changed due to the above (light buffer enlarged, as an example)  	render_list.clear(); -	_fill_render_list(p_cull_result, p_cull_count, PASS_MODE_COLOR, render_buffer == nullptr); +	_fill_render_list(p_cull_result, p_cull_count, PASS_MODE_COLOR, using_sdfgi);  	bool using_sss = render_buffer && scene_state.used_sss && sub_surface_scattering_get_quality() != RS::SUB_SURFACE_SCATTERING_QUALITY_DISABLED; @@ -2307,47 +2344,46 @@ void RasterizerSceneHighEndRD::_render_scene(RID p_render_buffer, const Transfor  		clear_color = p_default_bg_color;  	} -	_setup_view_dependant_uniform_set(p_shadow_atlas, p_reflection_atlas); +	_setup_view_dependant_uniform_set(p_shadow_atlas, p_reflection_atlas, p_gi_probe_cull_result, p_gi_probe_cull_count);  	render_list.sort_by_key(false); -	_fill_instances(render_list.elements, render_list.element_count, false); +	_fill_instances(render_list.elements, render_list.element_count, false, false, using_sdfgi || using_giprobe);  	bool debug_giprobes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_ALBEDO || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_LIGHTING || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_EMISSION; +	bool debug_sdfgi_probes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_SDFGI_PROBES;  	bool depth_pre_pass = depth_framebuffer.is_valid();  	RID render_buffers_uniform_set;  	bool using_ssao = depth_pre_pass && p_render_buffer.is_valid() && p_environment.is_valid() && environment_is_ssao_enabled(p_environment); - +	bool continue_depth = false;  	if (depth_pre_pass) { //depth pre pass  		RENDER_TIMESTAMP("Render Depth Pre-Pass"); -		bool finish_depth = using_ssao; +		bool finish_depth = using_ssao || using_sdfgi || using_giprobe;  		RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(depth_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, finish_depth ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE, depth_pass_clear);  		_render_list(draw_list, RD::get_singleton()->framebuffer_get_format(depth_framebuffer), render_list.elements, render_list.element_count, false, depth_pass_mode, render_buffer == nullptr, radiance_uniform_set, RID(), get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME);  		RD::get_singleton()->draw_list_end();  		if (render_buffer && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { -			if (finish_depth) { +			if (depth_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS || depth_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE) { +				static int texture_samples[RS::VIEWPORT_MSAA_MAX] = { 1, 2, 4, 8, 16 }; +				storage->get_effects()->resolve_gi(render_buffer->depth_msaa, render_buffer->normal_roughness_buffer_msaa, using_giprobe ? render_buffer->giprobe_buffer_msaa : RID(), render_buffer->depth, render_buffer->normal_roughness_buffer, using_giprobe ? render_buffer->giprobe_buffer : RID(), Vector2i(render_buffer->width, render_buffer->height), texture_samples[render_buffer->msaa]); +			} else if (finish_depth) {  				RD::get_singleton()->texture_resolve_multisample(render_buffer->depth_msaa, render_buffer->depth, true);  			} - -			if (depth_pass_mode == PASS_MODE_DEPTH_NORMAL || depth_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS) { -				RD::get_singleton()->texture_resolve_multisample(render_buffer->normal_buffer_msaa, render_buffer->normal_buffer, true); -				if (depth_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS) { -					RD::get_singleton()->texture_resolve_multisample(render_buffer->roughness_buffer_msaa, render_buffer->roughness_buffer, true); -				} -			}  		} + +		continue_depth = !finish_depth;  	}  	if (using_ssao) { -		_process_ssao(p_render_buffer, p_environment, render_buffer->normal_buffer, p_cam_projection); +		_process_ssao(p_render_buffer, p_environment, render_buffer->normal_roughness_buffer, p_cam_projection);  	} -	if (p_render_buffer.is_valid() && screen_space_roughness_limiter_is_active()) { -		storage->get_effects()->roughness_limit(render_buffer->normal_buffer, render_buffer->roughness_buffer, Size2(render_buffer->width, render_buffer->height), screen_space_roughness_limiter_get_curve()); +	if (using_sdfgi || using_giprobe) { +		_process_gi(p_render_buffer, render_buffer->normal_roughness_buffer, render_buffer->ambient_buffer, render_buffer->reflection_buffer, render_buffer->giprobe_buffer, p_environment, p_cam_projection, p_cam_transform, p_gi_probe_cull_result, p_gi_probe_cull_count);  	}  	if (p_render_buffer.is_valid()) { @@ -2356,7 +2392,7 @@ void RasterizerSceneHighEndRD::_render_scene(RID p_render_buffer, const Transfor  		render_buffers_uniform_set = render_buffer->uniform_set;  	} -	_setup_environment(p_environment, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), p_render_buffer.is_valid()); +	_setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), p_render_buffer.is_valid());  	RENDER_TIMESTAMP("Render Opaque Pass"); @@ -2364,8 +2400,8 @@ void RasterizerSceneHighEndRD::_render_scene(RID p_render_buffer, const Transfor  	bool can_continue_depth = !scene_state.used_depth_texture && !using_ssr && !using_sss;  	{ -		bool will_continue_color = (can_continue_color || draw_sky || debug_giprobes); -		bool will_continue_depth = (can_continue_depth || draw_sky || debug_giprobes); +		bool will_continue_color = (can_continue_color || draw_sky || debug_giprobes || debug_sdfgi_probes); +		bool will_continue_depth = (can_continue_depth || draw_sky || debug_giprobes || debug_sdfgi_probes);  		//regular forward for now  		Vector<Color> c; @@ -2379,7 +2415,7 @@ void RasterizerSceneHighEndRD::_render_scene(RID p_render_buffer, const Transfor  		}  		RID framebuffer = using_separate_specular ? opaque_specular_framebuffer : opaque_framebuffer; -		RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, depth_pre_pass ? (using_ssao ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CONTINUE) : RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0); +		RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, depth_pre_pass ? (continue_depth ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CONTINUE) : RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0);  		_render_list(draw_list, RD::get_singleton()->framebuffer_get_format(framebuffer), render_list.elements, render_list.element_count, false, using_separate_specular ? PASS_MODE_COLOR_SPECULAR : PASS_MODE_COLOR, render_buffer == nullptr, radiance_uniform_set, render_buffers_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME);  		RD::get_singleton()->draw_list_end(); @@ -2405,6 +2441,19 @@ void RasterizerSceneHighEndRD::_render_scene(RID p_render_buffer, const Transfor  		RD::get_singleton()->draw_list_end();  	} +	if (debug_sdfgi_probes) { +		//debug giprobes +		bool will_continue_color = (can_continue_color || draw_sky); +		bool will_continue_depth = (can_continue_depth || draw_sky); + +		CameraMatrix dc; +		dc.set_depth_correction(true); +		CameraMatrix cm = (dc * p_cam_projection) * CameraMatrix(p_cam_transform.affine_inverse()); +		RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(opaque_framebuffer, RD::INITIAL_ACTION_CONTINUE, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ); +		_debug_sdfgi_probes(p_render_buffer, draw_list, opaque_framebuffer, cm); +		RD::get_singleton()->draw_list_end(); +	} +  	if (draw_sky) {  		RENDER_TIMESTAMP("Render Sky"); @@ -2437,7 +2486,7 @@ void RasterizerSceneHighEndRD::_render_scene(RID p_render_buffer, const Transfor  		if (using_ssr) {  			RENDER_TIMESTAMP("Screen Space Reflection"); -			_process_ssr(p_render_buffer, render_buffer->color_fb, render_buffer->normal_buffer, render_buffer->roughness_buffer, render_buffer->specular, render_buffer->specular, Color(0, 0, 0, 1), p_environment, p_cam_projection, render_buffer->msaa == RS::VIEWPORT_MSAA_DISABLED); +			_process_ssr(p_render_buffer, render_buffer->color_fb, render_buffer->normal_roughness_buffer, render_buffer->specular, render_buffer->specular, Color(0, 0, 0, 1), p_environment, p_cam_projection, render_buffer->msaa == RS::VIEWPORT_MSAA_DISABLED);  		} else {  			//just mix specular back  			RENDER_TIMESTAMP("Merge Specular"); @@ -2447,11 +2496,11 @@ void RasterizerSceneHighEndRD::_render_scene(RID p_render_buffer, const Transfor  	RENDER_TIMESTAMP("Render Transparent Pass"); -	_setup_environment(p_environment, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); +	_setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false);  	render_list.sort_by_reverse_depth_and_priority(true); -	_fill_instances(&render_list.elements[render_list.max_elements - render_list.alpha_element_count], render_list.alpha_element_count, false); +	_fill_instances(&render_list.elements[render_list.max_elements - render_list.alpha_element_count], render_list.alpha_element_count, false, using_sdfgi);  	{  		RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(alpha_framebuffer, can_continue_color ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, can_continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ); @@ -2473,15 +2522,15 @@ void RasterizerSceneHighEndRD::_render_shadow(RID p_framebuffer, InstanceBase **  	scene_state.ubo.dual_paraboloid_side = p_use_dp_flip ? -1 : 1; -	_setup_environment(RID(), p_projection, p_transform, RID(), true, Vector2(1, 1), RID(), true, Color(), 0, p_zfar, false, p_use_pancake); +	_setup_environment(RID(), RID(), p_projection, p_transform, RID(), true, Vector2(1, 1), RID(), true, Color(), 0, p_zfar, false, p_use_pancake);  	render_list.clear();  	PassMode pass_mode = p_use_dp ? PASS_MODE_SHADOW_DP : PASS_MODE_SHADOW; -	_fill_render_list(p_cull_result, p_cull_count, pass_mode, true); +	_fill_render_list(p_cull_result, p_cull_count, pass_mode); -	_setup_view_dependant_uniform_set(RID(), RID()); +	_setup_view_dependant_uniform_set(RID(), RID(), nullptr, 0);  	RENDER_TIMESTAMP("Render Shadow"); @@ -2507,14 +2556,14 @@ void RasterizerSceneHighEndRD::_render_material(const Transform &p_cam_transform  	scene_state.ubo.dual_paraboloid_side = 0;  	scene_state.ubo.material_uv2_mode = true; -	_setup_environment(RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), RID(), false, Color(), 0, 0); +	_setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), RID(), false, Color(), 0, 0);  	render_list.clear();  	PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL; -	_fill_render_list(p_cull_result, p_cull_count, pass_mode, true); +	_fill_render_list(p_cull_result, p_cull_count, pass_mode); -	_setup_view_dependant_uniform_set(RID(), RID()); +	_setup_view_dependant_uniform_set(RID(), RID(), nullptr, 0);  	RENDER_TIMESTAMP("Render Material"); @@ -2546,14 +2595,14 @@ void RasterizerSceneHighEndRD::_render_uv2(InstanceBase **p_cull_result, int p_c  	scene_state.ubo.dual_paraboloid_side = 0;  	scene_state.ubo.material_uv2_mode = true; -	_setup_environment(RID(), CameraMatrix(), Transform(), RID(), true, Vector2(1, 1), RID(), false, Color(), 0, 0); +	_setup_environment(RID(), RID(), CameraMatrix(), Transform(), RID(), true, Vector2(1, 1), RID(), false, Color(), 0, 0);  	render_list.clear();  	PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL; -	_fill_render_list(p_cull_result, p_cull_count, pass_mode, true); +	_fill_render_list(p_cull_result, p_cull_count, pass_mode); -	_setup_view_dependant_uniform_set(RID(), RID()); +	_setup_view_dependant_uniform_set(RID(), RID(), nullptr, 0);  	RENDER_TIMESTAMP("Render Material"); @@ -2597,6 +2646,121 @@ void RasterizerSceneHighEndRD::_render_uv2(InstanceBase **p_cull_result, int p_c  	}  } +void RasterizerSceneHighEndRD::_render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, InstanceBase **p_cull_result, int p_cull_count, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) { +	RENDER_TIMESTAMP("Render SDFGI"); + +	_update_render_base_uniform_set(); + +	RenderBufferDataHighEnd *render_buffer = (RenderBufferDataHighEnd *)render_buffers_get_data(p_render_buffers); +	ERR_FAIL_COND(!render_buffer); + +	render_pass++; +	render_list.clear(); + +	PassMode pass_mode = PASS_MODE_SDF; +	_fill_render_list(p_cull_result, p_cull_count, pass_mode); +	render_list.sort_by_key(false); +	_fill_instances(render_list.elements, render_list.element_count, true); + +	_setup_view_dependant_uniform_set(RID(), RID(), nullptr, 0); + +	Vector3 half_extents = p_bounds.size * 0.5; +	Vector3 center = p_bounds.position + half_extents; + +	if (render_buffer->render_sdfgi_uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(render_buffer->render_sdfgi_uniform_set)) { +		Vector<RD::Uniform> uniforms; +		{ +			RD::Uniform u; +			u.type = RD::UNIFORM_TYPE_IMAGE; +			u.binding = 0; +			u.ids.push_back(p_albedo_texture); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.type = RD::UNIFORM_TYPE_IMAGE; +			u.binding = 1; +			u.ids.push_back(p_emission_texture); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.type = RD::UNIFORM_TYPE_IMAGE; +			u.binding = 2; +			u.ids.push_back(p_emission_aniso_texture); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.type = RD::UNIFORM_TYPE_IMAGE; +			u.binding = 3; +			u.ids.push_back(p_geom_facing_texture); +			uniforms.push_back(u); +		} + +		render_buffer->render_sdfgi_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, default_shader_sdfgi_rd, RENDER_BUFFERS_UNIFORM_SET); +	} + +	Vector<RID> sbs; +	sbs.push_back(p_albedo_texture); +	sbs.push_back(p_emission_texture); +	sbs.push_back(p_emission_aniso_texture); +	sbs.push_back(p_geom_facing_texture); + +	//print_line("re-render " + p_from + " - " + p_size + " bounds " + p_bounds); +	for (int i = 0; i < 3; i++) { +		scene_state.ubo.sdf_offset[i] = p_from[i]; +		scene_state.ubo.sdf_size[i] = p_size[i]; +	} + +	for (int i = 0; i < 3; i++) { +		Vector3 axis; +		axis[i] = 1.0; +		Vector3 up, right; +		int right_axis = (i + 1) % 3; +		int up_axis = (i + 2) % 3; +		up[up_axis] = 1.0; +		right[right_axis] = 1.0; + +		Size2i fb_size; +		fb_size.x = p_size[right_axis]; +		fb_size.y = p_size[up_axis]; + +		Transform cam_xform; +		cam_xform.origin = center + axis * half_extents; +		cam_xform.basis.set_axis(0, right); +		cam_xform.basis.set_axis(1, up); +		cam_xform.basis.set_axis(2, axis); + +		//print_line("pass: " + itos(i) + " xform " + cam_xform); + +		float h_size = half_extents[right_axis]; +		float v_size = half_extents[up_axis]; +		float d_size = half_extents[i] * 2.0; +		CameraMatrix camera_proj; +		camera_proj.set_orthogonal(-h_size, h_size, -v_size, v_size, 0, d_size); +		//print_line("pass: " + itos(i) + " cam hsize: " + rtos(h_size) + " vsize: " + rtos(v_size) + " dsize " + rtos(d_size)); + +		Transform to_bounds; +		to_bounds.origin = p_bounds.position; +		to_bounds.basis.scale(p_bounds.size); + +		store_transform(to_bounds.affine_inverse() * cam_xform, scene_state.ubo.sdf_to_bounds); + +		_setup_environment(RID(), RID(), camera_proj, cam_xform, RID(), true, Vector2(1, 1), RID(), false, Color(), 0, 0); + +		Map<Size2i, RID>::Element *E = sdfgi_framebuffer_size_cache.find(fb_size); +		if (!E) { +			RID fb = RD::get_singleton()->framebuffer_create_empty(fb_size); +			E = sdfgi_framebuffer_size_cache.insert(fb_size, fb); +		} + +		RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(E->get(), RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, Vector<Color>(), 1.0, 0, Rect2(), sbs); +		_render_list(draw_list, RD::get_singleton()->framebuffer_get_format(E->get()), render_list.elements, render_list.element_count, true, pass_mode, true, RID(), render_buffer->render_sdfgi_uniform_set, false); //second regular triangles +		RD::get_singleton()->draw_list_end(); +	} +} +  void RasterizerSceneHighEndRD::_base_uniforms_changed() {  	if (!render_base_uniform_set.is_null() && RD::get_singleton()->uniform_set_is_valid(render_base_uniform_set)) {  		RD::get_singleton()->free(render_base_uniform_set); @@ -2682,49 +2846,6 @@ void RasterizerSceneHighEndRD::_update_render_base_uniform_set() {  		}  		{  			RD::Uniform u; -			u.binding = 8; -			u.type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; -			u.ids.push_back(scene_state.gi_probe_buffer); -			uniforms.push_back(u); -		} -		{ -			RD::Uniform u; -			u.binding = 9; -			u.type = RD::UNIFORM_TYPE_TEXTURE; -			int slot_count = gi_probe_get_slots().size(); -			if (gi_probe_is_anisotropic()) { -				u.ids.resize(slot_count * 3); -			} else { -				u.ids.resize(slot_count); -			} - -			for (int i = 0; i < slot_count; i++) { -				RID probe = gi_probe_get_slots()[i]; - -				if (gi_probe_is_anisotropic()) { -					if (probe.is_null()) { -						RID empty_tex = storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE); -						u.ids.write[i * 3 + 0] = empty_tex; -						u.ids.write[i * 3 + 1] = empty_tex; -						u.ids.write[i * 3 + 2] = empty_tex; -					} else { -						u.ids.write[i * 3 + 0] = gi_probe_instance_get_texture(probe); -						u.ids.write[i * 3 + 1] = gi_probe_instance_get_aniso_texture(probe, 0); -						u.ids.write[i * 3 + 2] = gi_probe_instance_get_aniso_texture(probe, 1); -					} -				} else { -					if (probe.is_null()) { -						u.ids.write[i] = storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE); -					} else { -						u.ids.write[i] = gi_probe_instance_get_texture(probe); -					} -				} -			} - -			uniforms.push_back(u); -		} -		{ -			RD::Uniform u;  			u.binding = 10;  			u.type = RD::UNIFORM_TYPE_STORAGE_BUFFER;  			u.ids.push_back(scene_state.lightmap_buffer); @@ -2803,11 +2924,19 @@ void RasterizerSceneHighEndRD::_update_render_base_uniform_set() {  			uniforms.push_back(u);  		} +		{ +			RD::Uniform u; +			u.type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; +			u.binding = 20; +			u.ids.push_back(sdfgi_get_ubo()); +			uniforms.push_back(u); +		} +  		render_base_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, default_shader_rd, SCENE_UNIFORM_SET);  	}  } -void RasterizerSceneHighEndRD::_setup_view_dependant_uniform_set(RID p_shadow_atlas, RID p_reflection_atlas) { +void RasterizerSceneHighEndRD::_setup_view_dependant_uniform_set(RID p_shadow_atlas, RID p_reflection_atlas, RID *p_gi_probe_cull_result, int p_gi_probe_cull_count) {  	if (view_dependant_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(view_dependant_uniform_set)) {  		RD::get_singleton()->free(view_dependant_uniform_set);  	} @@ -2844,6 +2973,25 @@ void RasterizerSceneHighEndRD::_setup_view_dependant_uniform_set(RID p_shadow_at  		uniforms.push_back(u);  	} +	{ +		RD::Uniform u; +		u.binding = 2; +		u.type = RD::UNIFORM_TYPE_TEXTURE; +		RID default_tex = storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE); +		for (int i = 0; i < MAX_GI_PROBES; i++) { +			if (i < p_gi_probe_cull_count) { +				RID tex = gi_probe_instance_get_texture(p_gi_probe_cull_result[i]); +				if (!tex.is_valid()) { +					tex = default_tex; +				} +				u.ids.push_back(tex); +			} else { +				u.ids.push_back(default_tex); +			} +		} + +		uniforms.push_back(u); +	}  	view_dependant_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, default_shader_rd, VIEW_DEPENDANT_UNIFORM_SET);  } @@ -2860,16 +3008,22 @@ void RasterizerSceneHighEndRD::_render_buffers_uniform_set_changed(RID p_render_  	_render_buffers_clear_uniform_set(rb);  } -RID RasterizerSceneHighEndRD::_render_buffers_get_roughness_texture(RID p_render_buffers) { +RID RasterizerSceneHighEndRD::_render_buffers_get_normal_texture(RID p_render_buffers) {  	RenderBufferDataHighEnd *rb = (RenderBufferDataHighEnd *)render_buffers_get_data(p_render_buffers); -	return rb->roughness_buffer; +	return rb->normal_roughness_buffer;  } -RID RasterizerSceneHighEndRD::_render_buffers_get_normal_texture(RID p_render_buffers) { +RID RasterizerSceneHighEndRD::_render_buffers_get_ambient_texture(RID p_render_buffers) {  	RenderBufferDataHighEnd *rb = (RenderBufferDataHighEnd *)render_buffers_get_data(p_render_buffers); -	return rb->normal_buffer; +	return rb->ambient_buffer; +} + +RID RasterizerSceneHighEndRD::_render_buffers_get_reflection_texture(RID p_render_buffers) { +	RenderBufferDataHighEnd *rb = (RenderBufferDataHighEnd *)render_buffers_get_data(p_render_buffers); + +	return rb->reflection_buffer;  }  void RasterizerSceneHighEndRD::_update_render_buffers_uniform_set(RID p_render_buffers) { @@ -2898,30 +3052,70 @@ void RasterizerSceneHighEndRD::_update_render_buffers_uniform_set(RID p_render_b  			RD::Uniform u;  			u.binding = 2;  			u.type = RD::UNIFORM_TYPE_TEXTURE; -			RID texture = rb->normal_buffer.is_valid() ? rb->normal_buffer : storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_NORMAL); +			RID texture = rb->normal_roughness_buffer.is_valid() ? rb->normal_roughness_buffer : storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_NORMAL);  			u.ids.push_back(texture);  			uniforms.push_back(u);  		}  		{  			RD::Uniform u; -			u.binding = 3; +			u.binding = 4;  			u.type = RD::UNIFORM_TYPE_TEXTURE; -			RID texture = rb->roughness_buffer.is_valid() ? rb->roughness_buffer : storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_BLACK); +			RID aot = render_buffers_get_ao_texture(p_render_buffers); +			RID texture = aot.is_valid() ? aot : storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_BLACK);  			u.ids.push_back(texture);  			uniforms.push_back(u);  		}  		{  			RD::Uniform u; -			u.binding = 4; +			u.binding = 5;  			u.type = RD::UNIFORM_TYPE_TEXTURE; -			RID aot = render_buffers_get_ao_texture(p_render_buffers); -			RID texture = aot.is_valid() ? aot : storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_BLACK); +			RID texture = rb->ambient_buffer.is_valid() ? rb->ambient_buffer : storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_BLACK);  			u.ids.push_back(texture);  			uniforms.push_back(u);  		} +		{ +			RD::Uniform u; +			u.binding = 6; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			RID texture = rb->reflection_buffer.is_valid() ? rb->reflection_buffer : storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_BLACK); +			u.ids.push_back(texture); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 7; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			RID t; +			if (render_buffers_is_sdfgi_enabled(p_render_buffers)) { +				t = render_buffers_get_sdfgi_irradiance_probes(p_render_buffers); +			} else { +				t = storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE); +			} +			u.ids.push_back(t); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 8; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			if (render_buffers_is_sdfgi_enabled(p_render_buffers)) { +				u.ids.push_back(render_buffers_get_sdfgi_occlusion_texture(p_render_buffers)); +			} else { +				u.ids.push_back(storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); +			} +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 9; +			u.type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; +			u.ids.push_back(render_buffers_get_gi_probe_buffer(p_render_buffers)); +			uniforms.push_back(u); +		} +  		rb->uniform_set = RD::get_singleton()->uniform_set_create(uniforms, default_shader_rd, RENDER_BUFFERS_UNIFORM_SET);  	}  } @@ -2946,6 +3140,7 @@ RasterizerSceneHighEndRD::RasterizerSceneHighEndRD(RasterizerStorageRD *p_storag  		if (is_using_radiance_cubemap_array()) {  			defines += "\n#define USE_RADIANCE_CUBEMAP_ARRAY \n";  		} +		defines += "\n#define SDFGI_OCT_SIZE " + itos(sdfgi_get_lightprobe_octahedron_size()) + "\n";  		uint32_t uniform_max_size = RD::get_singleton()->limit_get(RD::LIMIT_MAX_UNIFORM_BUFFER_SIZE); @@ -2977,36 +3172,6 @@ RasterizerSceneHighEndRD::RasterizerSceneHighEndRD(RasterizerStorageRD *p_storag  			scene_state.directional_light_buffer = RD::get_singleton()->uniform_buffer_create(directional_light_buffer_size);  			defines += "\n#define MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS " + itos(scene_state.max_directional_lights) + "\n";  		} - -		{ //giprobes -			int slot_count = gi_probe_get_slots().size(); -			if (gi_probe_is_anisotropic()) { -				slot_count *= 3; -				defines += "\n#define GI_PROBE_USE_ANISOTROPY\n"; -			} - -			if (gi_probe_get_quality() == GIPROBE_QUALITY_ULTRA_LOW) { -				defines += "\n#define GI_PROBE_LOW_QUALITY\n"; -			} else if (gi_probe_get_quality() == GIPROBE_QUALITY_HIGH) { -				defines += "\n#define GI_PROBE_HIGH_QUALITY\n"; -			} - -			defines += "\n#define MAX_GI_PROBE_TEXTURES " + itos(slot_count) + "\n"; - -			uint32_t giprobe_buffer_size; -			if (uniform_max_size < 65536) { -				//Yes, you guessed right, ARM again -				giprobe_buffer_size = uniform_max_size; -			} else { -				giprobe_buffer_size = 65536; -			} - -			giprobe_buffer_size = MIN(sizeof(GIProbeData) * gi_probe_get_slots().size(), giprobe_buffer_size); -			scene_state.max_gi_probes = giprobe_buffer_size / sizeof(GIProbeData); -			scene_state.gi_probes = memnew_arr(GIProbeData, scene_state.max_gi_probes); -			scene_state.gi_probe_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(GIProbeData) * scene_state.max_gi_probes); -			defines += "\n#define MAX_GI_PROBES " + itos(scene_state.max_gi_probes) + "\n"; -		}  		{  			//lightmaps  			scene_state.max_lightmaps = storage->lightmap_array_get_size(); @@ -3036,13 +3201,13 @@ RasterizerSceneHighEndRD::RasterizerSceneHighEndRD(RasterizerStorageRD *p_storag  		Vector<String> shader_versions;  		shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n");  		shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_DUAL_PARABOLOID\n"); -		shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL\n"); -		shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL\n#define MODE_RENDER_ROUGHNESS\n"); +		shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL_ROUGHNESS\n"); +		shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL_ROUGHNESS\n#define MODE_RENDER_GIPROBE\n");  		shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_MATERIAL\n"); +		shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_SDF\n");  		shader_versions.push_back(""); +		shader_versions.push_back("\n#define USE_FORWARD_GI\n");  		shader_versions.push_back("\n#define MODE_MULTIPLE_RENDER_TARGETS\n"); -		shader_versions.push_back("\n#define USE_VOXEL_CONE_TRACING\n"); -		shader_versions.push_back("\n#define MODE_MULTIPLE_RENDER_TARGETS\n#define USE_VOXEL_CONE_TRACING\n");  		shader_versions.push_back("\n#define USE_LIGHTMAP\n");  		shader_versions.push_back("\n#define MODE_MULTIPLE_RENDER_TARGETS\n#define USE_LIGHTMAP\n");  		shader.scene_shader.initialize(shader_versions, defines); @@ -3109,7 +3274,7 @@ RasterizerSceneHighEndRD::RasterizerSceneHighEndRD(RasterizerStorageRD *p_storag  		actions.renames["SCREEN_UV"] = "screen_uv";  		actions.renames["SCREEN_TEXTURE"] = "color_buffer";  		actions.renames["DEPTH_TEXTURE"] = "depth_buffer"; -		actions.renames["NORMAL_TEXTURE"] = "normal_buffer"; +		actions.renames["NORMAL_ROUGHNESS_TEXTURE"] = "normal_roughness_buffer";  		actions.renames["DEPTH"] = "gl_FragDepth";  		actions.renames["OUTPUT_IS_SRGB"] = "true"; @@ -3219,6 +3384,7 @@ RasterizerSceneHighEndRD::RasterizerSceneHighEndRD(RasterizerStorageRD *p_storag  		MaterialData *md = (MaterialData *)storage->material_get_data(default_material, RasterizerStorageRD::SHADER_TYPE_3D);  		default_shader_rd = shader.scene_shader.version_get_shader(md->shader_data->version, SHADER_VERSION_COLOR_PASS); +		default_shader_sdfgi_rd = shader.scene_shader.version_get_shader(md->shader_data->version, SHADER_VERSION_DEPTH_PASS_WITH_SDF);  	}  	{ @@ -3268,7 +3434,7 @@ RasterizerSceneHighEndRD::RasterizerSceneHighEndRD(RasterizerStorageRD *p_storag  	{ //render buffers  		Vector<RD::Uniform> uniforms; -		for (int i = 0; i < 5; i++) { +		for (int i = 0; i < 7; i++) {  			RD::Uniform u;  			u.binding = i;  			u.type = RD::UNIFORM_TYPE_TEXTURE; @@ -3276,6 +3442,28 @@ RasterizerSceneHighEndRD::RasterizerSceneHighEndRD(RasterizerStorageRD *p_storag  			u.ids.push_back(texture);  			uniforms.push_back(u);  		} +		{ +			RD::Uniform u; +			u.binding = 7; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			RID texture = storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE); +			u.ids.push_back(texture); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 8; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			u.ids.push_back(storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 9; +			u.type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; +			u.ids.push_back(render_buffers_get_default_gi_probe_buffer()); +			uniforms.push_back(u); +		}  		default_render_buffers_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, default_shader_rd, RENDER_BUFFERS_UNIFORM_SET);  	} @@ -3307,7 +3495,6 @@ RasterizerSceneHighEndRD::~RasterizerSceneHighEndRD() {  	{  		RD::get_singleton()->free(scene_state.uniform_buffer);  		RD::get_singleton()->free(scene_state.instance_buffer); -		RD::get_singleton()->free(scene_state.gi_probe_buffer);  		RD::get_singleton()->free(scene_state.directional_light_buffer);  		RD::get_singleton()->free(scene_state.light_buffer);  		RD::get_singleton()->free(scene_state.lightmap_buffer); @@ -3315,7 +3502,6 @@ RasterizerSceneHighEndRD::~RasterizerSceneHighEndRD() {  		RD::get_singleton()->free(scene_state.reflection_buffer);  		RD::get_singleton()->free(scene_state.decal_buffer);  		memdelete_arr(scene_state.instances); -		memdelete_arr(scene_state.gi_probes);  		memdelete_arr(scene_state.directional_lights);  		memdelete_arr(scene_state.lights);  		memdelete_arr(scene_state.lightmaps); @@ -3323,4 +3509,9 @@ RasterizerSceneHighEndRD::~RasterizerSceneHighEndRD() {  		memdelete_arr(scene_state.reflections);  		memdelete_arr(scene_state.decals);  	} + +	while (sdfgi_framebuffer_size_cache.front()) { +		RD::get_singleton()->free(sdfgi_framebuffer_size_cache.front()->get()); +		sdfgi_framebuffer_size_cache.erase(sdfgi_framebuffer_size_cache.front()); +	}  } diff --git a/servers/rendering/rasterizer_rd/rasterizer_scene_high_end_rd.h b/servers/rendering/rasterizer_rd/rasterizer_scene_high_end_rd.h index 8438a4f730..cb03da48c1 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_scene_high_end_rd.h +++ b/servers/rendering/rasterizer_rd/rasterizer_scene_high_end_rd.h @@ -47,18 +47,23 @@ class RasterizerSceneHighEndRD : public RasterizerSceneRD {  		MATERIAL_UNIFORM_SET = 5  	}; +	enum { +		SDFGI_MAX_CASCADES = 8, +		MAX_GI_PROBES = 8 +	}; +  	/* Scene Shader */  	enum ShaderVersion {  		SHADER_VERSION_DEPTH_PASS,  		SHADER_VERSION_DEPTH_PASS_DP, -		SHADER_VERSION_DEPTH_PASS_WITH_NORMAL,  		SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS, +		SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_GIPROBE,  		SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL, +		SHADER_VERSION_DEPTH_PASS_WITH_SDF,  		SHADER_VERSION_COLOR_PASS, +		SHADER_VERSION_COLOR_PASS_WITH_FORWARD_GI,  		SHADER_VERSION_COLOR_PASS_WITH_SEPARATE_SPECULAR, -		SHADER_VERSION_VCT_COLOR_PASS, -		SHADER_VERSION_VCT_COLOR_PASS_WITH_SEPARATE_SPECULAR,  		SHADER_VERSION_LIGHTMAP_COLOR_PASS,  		SHADER_VERSION_LIGHTMAP_COLOR_PASS_WITH_SEPARATE_SPECULAR,  		SHADER_VERSION_MAX @@ -203,8 +208,11 @@ class RasterizerSceneHighEndRD : public RasterizerSceneRD {  		RID color;  		RID depth;  		RID specular; -		RID normal_buffer; -		RID roughness_buffer; +		RID normal_roughness_buffer; +		RID giprobe_buffer; + +		RID ambient_buffer; +		RID reflection_buffer;  		RS::ViewportMSAA msaa;  		RD::TextureSamples texture_samples; @@ -212,18 +220,22 @@ class RasterizerSceneHighEndRD : public RasterizerSceneRD {  		RID color_msaa;  		RID depth_msaa;  		RID specular_msaa; -		RID normal_buffer_msaa; +		RID normal_roughness_buffer_msaa;  		RID roughness_buffer_msaa; +		RID giprobe_buffer_msaa;  		RID depth_fb; -		RID depth_normal_fb;  		RID depth_normal_roughness_fb; +		RID depth_normal_roughness_giprobe_fb;  		RID color_fb;  		RID color_specular_fb;  		RID specular_only_fb;  		int width, height; +		RID render_sdfgi_uniform_set;  		void ensure_specular(); +		void ensure_gi(); +		void ensure_giprobe();  		void clear();  		virtual void configure(RID p_color_buffer, RID p_depth_buffer, int p_width, int p_height, RS::ViewportMSAA p_msaa); @@ -233,8 +245,7 @@ class RasterizerSceneHighEndRD : public RasterizerSceneRD {  	};  	virtual RenderBufferData *_create_render_buffer_data(); -	void _allocate_normal_texture(RenderBufferDataHighEnd *rb); -	void _allocate_roughness_texture(RenderBufferDataHighEnd *rb); +	void _allocate_normal_roughness_texture(RenderBufferDataHighEnd *rb);  	RID shadow_sampler;  	RID render_base_uniform_set; @@ -245,11 +256,12 @@ class RasterizerSceneHighEndRD : public RasterizerSceneRD {  	virtual void _base_uniforms_changed();  	void _render_buffers_clear_uniform_set(RenderBufferDataHighEnd *rb);  	virtual void _render_buffers_uniform_set_changed(RID p_render_buffers); -	virtual RID _render_buffers_get_roughness_texture(RID p_render_buffers);  	virtual RID _render_buffers_get_normal_texture(RID p_render_buffers); +	virtual RID _render_buffers_get_ambient_texture(RID p_render_buffers); +	virtual RID _render_buffers_get_reflection_texture(RID p_render_buffers);  	void _update_render_base_uniform_set(); -	void _setup_view_dependant_uniform_set(RID p_shadow_atlas, RID p_reflection_atlas); +	void _setup_view_dependant_uniform_set(RID p_shadow_atlas, RID p_reflection_atlas, RID *p_gi_probe_cull_result, int p_gi_probe_cull_count);  	void _update_render_buffers_uniform_set(RID p_render_buffers);  	/* Scene State UBO */ @@ -260,7 +272,8 @@ class RasterizerSceneHighEndRD : public RasterizerSceneRD {  		float box_offset[3];  		uint32_t mask;  		float params[4]; // intensity, 0, interior , boxproject -		float ambient[4]; // ambient color, energy +		float ambient[3]; // ambient color, +		uint32_t ambient_mode;  		float local_matrix[16]; // up to here for spot and omni, rest is for directional  	}; @@ -315,22 +328,6 @@ class RasterizerSceneHighEndRD : public RasterizerSceneRD {  		float uv_scale4[2];  	}; -	struct GIProbeData { -		float xform[16]; -		float bounds[3]; -		float dynamic_range; - -		float bias; -		float normal_bias; -		uint32_t blend_ambient; -		uint32_t texture_slot; - -		float anisotropy_strength; -		float ao; -		float ao_size; -		uint32_t pad[1]; -	}; -  	struct LightmapData {  		float normal_xform[12];  	}; @@ -358,6 +355,8 @@ class RasterizerSceneHighEndRD : public RasterizerSceneRD {  	};  	enum { +		INSTANCE_DATA_FLAG_USE_GI_BUFFERS = 1 << 6, +		INSTANCE_DATA_FLAG_USE_SDFGI = 1 << 7,  		INSTANCE_DATA_FLAG_USE_LIGHTMAP_CAPTURE = 1 << 8,  		INSTANCE_DATA_FLAG_USE_LIGHTMAP = 1 << 9,  		INSTANCE_DATA_FLAG_USE_SH_LIGHTMAP = 1 << 10, @@ -430,10 +429,19 @@ class RasterizerSceneHighEndRD : public RasterizerSceneRD {  			float ssao_ao_affect;  			uint32_t roughness_limiter_enabled; +			float roughness_limiter_amount; +			float roughness_limiter_limit; +			uint32_t roughness_limiter_pad[2]; +  			float ao_color[4]; +			float sdf_to_bounds[16]; + +			int32_t sdf_offset[3];  			uint32_t material_uv2_mode; -			uint32_t pad_material[3]; + +			int32_t sdf_size[3]; +			uint32_t gi_upscale_for_msaa;  		};  		UBO ubo; @@ -445,11 +453,6 @@ class RasterizerSceneHighEndRD : public RasterizerSceneRD {  		RID reflection_buffer;  		uint32_t max_reflection_probes_per_instance; -		GIProbeData *gi_probes; -		uint32_t max_gi_probes; -		RID gi_probe_buffer; -		uint32_t max_gi_probe_probes_per_instance; -  		LightmapData *lightmaps;  		uint32_t max_lightmaps;  		RID lightmap_buffer; @@ -498,7 +501,7 @@ class RasterizerSceneHighEndRD : public RasterizerSceneRD {  					uint64_t material_index : 15;  					uint64_t shader_index : 12;  					uint64_t uses_instancing : 1; -					uint64_t uses_vct : 1; +					uint64_t uses_forward_gi : 1;  					uint64_t uses_lightmap : 1;  					uint64_t depth_layer : 4;  					uint64_t priority : 8; @@ -625,6 +628,7 @@ class RasterizerSceneHighEndRD : public RasterizerSceneRD {  	RID wireframe_material_shader;  	RID wireframe_material;  	RID default_shader_rd; +	RID default_shader_sdfgi_rd;  	RID default_radiance_uniform_set;  	RID default_render_buffers_uniform_set; @@ -640,30 +644,33 @@ class RasterizerSceneHighEndRD : public RasterizerSceneRD {  		PASS_MODE_SHADOW,  		PASS_MODE_SHADOW_DP,  		PASS_MODE_DEPTH, -		PASS_MODE_DEPTH_NORMAL,  		PASS_MODE_DEPTH_NORMAL_ROUGHNESS, +		PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE,  		PASS_MODE_DEPTH_MATERIAL, +		PASS_MODE_SDF,  	}; -	void _setup_environment(RID p_environment, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2 &p_screen_pixel_size, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false); +	void _setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2 &p_screen_pixel_size, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false);  	void _setup_lights(RID *p_light_cull_result, int p_light_cull_count, const Transform &p_camera_inverse_transform, RID p_shadow_atlas, bool p_using_shadows);  	void _setup_decals(const RID *p_decal_instances, int p_decal_count, const Transform &p_camera_inverse_xform);  	void _setup_reflections(RID *p_reflection_probe_cull_result, int p_reflection_probe_cull_count, const Transform &p_camera_inverse_transform, RID p_environment); -	void _setup_gi_probes(RID *p_gi_probe_probe_cull_result, int p_gi_probe_probe_cull_count, const Transform &p_camera_transform);  	void _setup_lightmaps(InstanceBase **p_lightmap_cull_result, int p_lightmap_cull_count, const Transform &p_cam_transform); -	void _fill_instances(RenderList::Element **p_elements, int p_element_count, bool p_for_depth); +	void _fill_instances(RenderList::Element **p_elements, int p_element_count, bool p_for_depth, bool p_has_sdfgi = false, bool p_has_opaque_gi = false);  	void _render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderList::Element **p_elements, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_radiance_uniform_set, RID p_render_buffers_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2()); -	_FORCE_INLINE_ void _add_geometry(InstanceBase *p_instance, uint32_t p_surface, RID p_material, PassMode p_pass_mode, uint32_t p_geometry_index); -	_FORCE_INLINE_ void _add_geometry_with_material(InstanceBase *p_instance, uint32_t p_surface, MaterialData *p_material, RID p_material_rid, PassMode p_pass_mode, uint32_t p_geometry_index); +	_FORCE_INLINE_ void _add_geometry(InstanceBase *p_instance, uint32_t p_surface, RID p_material, PassMode p_pass_mode, uint32_t p_geometry_index, bool p_using_sdfgi = false); +	_FORCE_INLINE_ void _add_geometry_with_material(InstanceBase *p_instance, uint32_t p_surface, MaterialData *p_material, RID p_material_rid, PassMode p_pass_mode, uint32_t p_geometry_index, bool p_using_sdfgi = false); + +	void _fill_render_list(InstanceBase **p_cull_result, int p_cull_count, PassMode p_pass_mode, bool p_using_sdfgi = false); -	void _fill_render_list(InstanceBase **p_cull_result, int p_cull_count, PassMode p_pass_mode, bool p_no_gi); +	Map<Size2i, RID> sdfgi_framebuffer_size_cache;  protected:  	virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, InstanceBase **p_cull_result, int p_cull_count, RID *p_light_cull_result, int p_light_cull_count, RID *p_reflection_probe_cull_result, int p_reflection_probe_cull_count, RID *p_gi_probe_cull_result, int p_gi_probe_cull_count, RID *p_decal_cull_result, int p_decal_cull_count, InstanceBase **p_lightmap_cull_result, int p_lightmap_cull_count, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color);  	virtual void _render_shadow(RID p_framebuffer, InstanceBase **p_cull_result, int p_cull_count, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake);  	virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, InstanceBase **p_cull_result, int p_cull_count, RID p_framebuffer, const Rect2i &p_region);  	virtual void _render_uv2(InstanceBase **p_cull_result, int p_cull_count, RID p_framebuffer, const Rect2i &p_region); +	virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, InstanceBase **p_cull_result, int p_cull_count, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture);  public:  	virtual void set_time(double p_time, double p_step); diff --git a/servers/rendering/rasterizer_rd/rasterizer_scene_rd.cpp b/servers/rendering/rasterizer_rd/rasterizer_scene_rd.cpp index 689552be2f..8754fe6acb 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_scene_rd.cpp +++ b/servers/rendering/rasterizer_rd/rasterizer_scene_rd.cpp @@ -32,8 +32,8 @@  #include "core/os/os.h"  #include "core/project_settings.h" +#include "rasterizer_rd.h"  #include "servers/rendering/rendering_server_raster.h" -  uint64_t RasterizerSceneRD::auto_exposure_counter = 2;  void get_vogel_disk(float *r_kernel, int p_sample_count) { @@ -195,6 +195,1541 @@ void RasterizerSceneRD::_update_reflection_mipmaps(ReflectionData &rd) {  	}  } +void RasterizerSceneRD::_sdfgi_erase(RenderBuffers *rb) { +	for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { +		const SDFGI::Cascade &c = rb->sdfgi->cascades[i]; +		RD::get_singleton()->free(c.light_data); +		RD::get_singleton()->free(c.light_aniso_0_tex); +		RD::get_singleton()->free(c.light_aniso_1_tex); +		RD::get_singleton()->free(c.sdf_tex); +		RD::get_singleton()->free(c.solid_cell_dispatch_buffer); +		RD::get_singleton()->free(c.solid_cell_buffer); +		RD::get_singleton()->free(c.lightprobe_history_tex); +		RD::get_singleton()->free(c.lightprobe_average_tex); +		RD::get_singleton()->free(c.lights_buffer); +	} + +	RD::get_singleton()->free(rb->sdfgi->render_albedo); +	RD::get_singleton()->free(rb->sdfgi->render_emission); +	RD::get_singleton()->free(rb->sdfgi->render_emission_aniso); + +	RD::get_singleton()->free(rb->sdfgi->render_sdf[0]); +	RD::get_singleton()->free(rb->sdfgi->render_sdf[1]); + +	RD::get_singleton()->free(rb->sdfgi->render_sdf_half[0]); +	RD::get_singleton()->free(rb->sdfgi->render_sdf_half[1]); + +	for (int i = 0; i < 8; i++) { +		RD::get_singleton()->free(rb->sdfgi->render_occlusion[i]); +	} + +	RD::get_singleton()->free(rb->sdfgi->render_geom_facing); + +	RD::get_singleton()->free(rb->sdfgi->lightprobe_data); +	RD::get_singleton()->free(rb->sdfgi->lightprobe_history_scroll); +	RD::get_singleton()->free(rb->sdfgi->occlusion_data); + +	RD::get_singleton()->free(rb->sdfgi->cascades_ubo); + +	memdelete(rb->sdfgi); + +	rb->sdfgi = nullptr; +} + +const Vector3i RasterizerSceneRD::SDFGI::Cascade::DIRTY_ALL = Vector3i(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF); + +void RasterizerSceneRD::sdfgi_update(RID p_render_buffers, RID p_environment, const Vector3 &p_world_position) { +	Environent *env = environment_owner.getornull(p_environment); +	RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	bool needs_sdfgi = env && env->sdfgi_enabled; + +	if (!needs_sdfgi) { +		if (rb->sdfgi != nullptr) { +			//erase it +			_sdfgi_erase(rb); +			_render_buffers_uniform_set_changed(p_render_buffers); +		} +		return; +	} + +	static const uint32_t history_frames_to_converge[RS::ENV_SDFGI_CONVERGE_MAX] = { 5, 10, 15, 20, 25, 30 }; +	uint32_t requested_history_size = history_frames_to_converge[sdfgi_frames_to_converge]; + +	if (rb->sdfgi && (rb->sdfgi->cascade_mode != env->sdfgi_cascades || rb->sdfgi->min_cell_size != env->sdfgi_min_cell_size || requested_history_size != rb->sdfgi->history_size || rb->sdfgi->uses_occlusion != env->sdfgi_use_occlusion || rb->sdfgi->y_scale_mode != env->sdfgi_y_scale)) { +		//configuration changed, erase +		_sdfgi_erase(rb); +	} + +	SDFGI *sdfgi = rb->sdfgi; +	if (sdfgi == nullptr) { +		//re-create +		rb->sdfgi = memnew(SDFGI); +		sdfgi = rb->sdfgi; +		sdfgi->cascade_mode = env->sdfgi_cascades; +		sdfgi->min_cell_size = env->sdfgi_min_cell_size; +		sdfgi->uses_occlusion = env->sdfgi_use_occlusion; +		sdfgi->y_scale_mode = env->sdfgi_y_scale; +		static const float y_scale[3] = { 1.0, 1.5, 2.0 }; +		sdfgi->y_mult = y_scale[sdfgi->y_scale_mode]; +		static const int cascasde_size[3] = { 4, 6, 8 }; +		sdfgi->cascades.resize(cascasde_size[sdfgi->cascade_mode]); +		sdfgi->probe_axis_count = SDFGI::PROBE_DIVISOR + 1; +		sdfgi->solid_cell_ratio = sdfgi_solid_cell_ratio; +		sdfgi->solid_cell_count = uint32_t(float(sdfgi->cascade_size * sdfgi->cascade_size * sdfgi->cascade_size) * sdfgi->solid_cell_ratio); + +		float base_cell_size = sdfgi->min_cell_size; + +		RD::TextureFormat tf_sdf; +		tf_sdf.format = RD::DATA_FORMAT_R8_UNORM; +		tf_sdf.width = sdfgi->cascade_size; // Always 64x64 +		tf_sdf.height = sdfgi->cascade_size; +		tf_sdf.depth = sdfgi->cascade_size; +		tf_sdf.type = RD::TEXTURE_TYPE_3D; +		tf_sdf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + +		{ +			RD::TextureFormat tf_render = tf_sdf; +			tf_render.format = RD::DATA_FORMAT_R16_UINT; +			sdfgi->render_albedo = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); +			tf_render.format = RD::DATA_FORMAT_R32_UINT; +			sdfgi->render_emission = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); +			sdfgi->render_emission_aniso = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + +			tf_render.format = RD::DATA_FORMAT_R8_UNORM; //at least its easy to visualize + +			for (int i = 0; i < 8; i++) { +				sdfgi->render_occlusion[i] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); +			} + +			tf_render.format = RD::DATA_FORMAT_R32_UINT; +			sdfgi->render_geom_facing = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + +			tf_render.format = RD::DATA_FORMAT_R8G8B8A8_UINT; +			sdfgi->render_sdf[0] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); +			sdfgi->render_sdf[1] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + +			tf_render.width /= 2; +			tf_render.height /= 2; +			tf_render.depth /= 2; + +			sdfgi->render_sdf_half[0] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); +			sdfgi->render_sdf_half[1] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); +		} + +		RD::TextureFormat tf_occlusion = tf_sdf; +		tf_occlusion.format = RD::DATA_FORMAT_R16_UINT; +		tf_occlusion.shareable_formats.push_back(RD::DATA_FORMAT_R16_UINT); +		tf_occlusion.shareable_formats.push_back(RD::DATA_FORMAT_R4G4B4A4_UNORM_PACK16); +		tf_occlusion.depth *= sdfgi->cascades.size(); //use depth for occlusion slices +		tf_occlusion.width *= 2; //use width for the other half + +		RD::TextureFormat tf_light = tf_sdf; +		tf_light.format = RD::DATA_FORMAT_R32_UINT; +		tf_light.shareable_formats.push_back(RD::DATA_FORMAT_R32_UINT); +		tf_light.shareable_formats.push_back(RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32); + +		RD::TextureFormat tf_aniso0 = tf_sdf; +		tf_aniso0.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; +		RD::TextureFormat tf_aniso1 = tf_sdf; +		tf_aniso1.format = RD::DATA_FORMAT_R8G8_UNORM; + +		int passes = nearest_shift(sdfgi->cascade_size) - 1; + +		//store lightprobe SH +		RD::TextureFormat tf_probes; +		tf_probes.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; +		tf_probes.width = sdfgi->probe_axis_count * sdfgi->probe_axis_count; +		tf_probes.height = sdfgi->probe_axis_count * SDFGI::SH_SIZE; +		tf_probes.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; +		tf_probes.type = RD::TEXTURE_TYPE_2D_ARRAY; + +		sdfgi->history_size = requested_history_size; + +		RD::TextureFormat tf_probe_history = tf_probes; +		tf_probe_history.format = RD::DATA_FORMAT_R16G16B16A16_SINT; //signed integer because SH are signed +		tf_probe_history.array_layers = sdfgi->history_size; + +		RD::TextureFormat tf_probe_average = tf_probes; +		tf_probe_average.format = RD::DATA_FORMAT_R32G32B32A32_SINT; //signed integer because SH are signed +		tf_probe_average.type = RD::TEXTURE_TYPE_2D_ARRAY; +		tf_probe_average.array_layers = 1; + +		sdfgi->lightprobe_history_scroll = RD::get_singleton()->texture_create(tf_probe_history, RD::TextureView()); +		sdfgi->lightprobe_average_scroll = RD::get_singleton()->texture_create(tf_probe_average, RD::TextureView()); + +		{ +			//octahedral lightprobes +			RD::TextureFormat tf_octprobes = tf_probes; +			tf_octprobes.array_layers = sdfgi->cascades.size() * 2; +			tf_octprobes.format = RD::DATA_FORMAT_R32_UINT; //pack well with RGBE +			tf_octprobes.width = sdfgi->probe_axis_count * sdfgi->probe_axis_count * (SDFGI::LIGHTPROBE_OCT_SIZE + 2); +			tf_octprobes.height = sdfgi->probe_axis_count * (SDFGI::LIGHTPROBE_OCT_SIZE + 2); +			tf_octprobes.shareable_formats.push_back(RD::DATA_FORMAT_R32_UINT); +			tf_octprobes.shareable_formats.push_back(RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32); +			//lightprobe texture is an octahedral texture + +			sdfgi->lightprobe_data = RD::get_singleton()->texture_create(tf_octprobes, RD::TextureView()); +			RD::TextureView tv; +			tv.format_override = RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32; +			sdfgi->lightprobe_texture = RD::get_singleton()->texture_create_shared(tv, sdfgi->lightprobe_data); +		} + +		sdfgi->cascades_ubo = RD::get_singleton()->uniform_buffer_create(sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES); + +		sdfgi->occlusion_data = RD::get_singleton()->texture_create(tf_occlusion, RD::TextureView()); +		{ +			RD::TextureView tv; +			tv.format_override = RD::DATA_FORMAT_R4G4B4A4_UNORM_PACK16; +			sdfgi->occlusion_texture = RD::get_singleton()->texture_create_shared(tv, sdfgi->occlusion_data); +		} + +		for (uint32_t i = 0; i < sdfgi->cascades.size(); i++) { +			SDFGI::Cascade &cascade = sdfgi->cascades[i]; + +			/* 3D Textures */ + +			cascade.sdf_tex = RD::get_singleton()->texture_create(tf_sdf, RD::TextureView()); + +			cascade.light_data = RD::get_singleton()->texture_create(tf_light, RD::TextureView()); + +			cascade.light_aniso_0_tex = RD::get_singleton()->texture_create(tf_aniso0, RD::TextureView()); +			cascade.light_aniso_1_tex = RD::get_singleton()->texture_create(tf_aniso1, RD::TextureView()); + +			{ +				RD::TextureView tv; +				tv.format_override = RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32; +				cascade.light_tex = RD::get_singleton()->texture_create_shared(tv, cascade.light_data); + +				RD::get_singleton()->texture_clear(cascade.light_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); +				RD::get_singleton()->texture_clear(cascade.light_aniso_0_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); +				RD::get_singleton()->texture_clear(cascade.light_aniso_1_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); +			} + +			cascade.cell_size = base_cell_size; +			Vector3 world_position = p_world_position; +			world_position.y *= sdfgi->y_mult; +			int32_t probe_cells = sdfgi->cascade_size / SDFGI::PROBE_DIVISOR; +			Vector3 probe_size = Vector3(1, 1, 1) * cascade.cell_size * probe_cells; +			Vector3i probe_pos = Vector3i((world_position / probe_size + Vector3(0.5, 0.5, 0.5)).floor()); +			cascade.position = probe_pos * probe_cells; + +			cascade.dirty_regions = SDFGI::Cascade::DIRTY_ALL; + +			base_cell_size *= 2.0; + +			/* Probe History */ + +			cascade.lightprobe_history_tex = RD::get_singleton()->texture_create(tf_probe_history, RD::TextureView()); +			RD::get_singleton()->texture_clear(cascade.lightprobe_history_tex, Color(0, 0, 0, 0), 0, 1, 0, tf_probe_history.array_layers); //needs to be cleared for average to work + +			cascade.lightprobe_average_tex = RD::get_singleton()->texture_create(tf_probe_average, RD::TextureView()); +			RD::get_singleton()->texture_clear(cascade.lightprobe_average_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); //needs to be cleared for average to work + +			/* Buffers */ + +			cascade.solid_cell_buffer = RD::get_singleton()->storage_buffer_create(sizeof(SDFGI::Cascade::SolidCell) * sdfgi->solid_cell_count); +			cascade.solid_cell_dispatch_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 4, Vector<uint8_t>(), RD::STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT); +			cascade.lights_buffer = RD::get_singleton()->storage_buffer_create(sizeof(SDGIShader::Light) * MAX(SDFGI::MAX_STATIC_LIGHTS, SDFGI::MAX_DYNAMIC_LIGHTS)); +			{ +				Vector<RD::Uniform> uniforms; +				{ +					RD::Uniform u; +					u.type = RD::UNIFORM_TYPE_IMAGE; +					u.binding = 1; +					u.ids.push_back(sdfgi->render_sdf[(passes & 1) ? 1 : 0]); //if passes are even, we read from buffer 0, else we read from buffer 1 +					uniforms.push_back(u); +				} +				{ +					RD::Uniform u; +					u.type = RD::UNIFORM_TYPE_IMAGE; +					u.binding = 2; +					u.ids.push_back(sdfgi->render_albedo); +					uniforms.push_back(u); +				} +				{ +					RD::Uniform u; +					u.type = RD::UNIFORM_TYPE_IMAGE; +					u.binding = 3; +					for (int j = 0; j < 8; j++) { +						u.ids.push_back(sdfgi->render_occlusion[j]); +					} +					uniforms.push_back(u); +				} +				{ +					RD::Uniform u; +					u.type = RD::UNIFORM_TYPE_IMAGE; +					u.binding = 4; +					u.ids.push_back(sdfgi->render_emission); +					uniforms.push_back(u); +				} +				{ +					RD::Uniform u; +					u.type = RD::UNIFORM_TYPE_IMAGE; +					u.binding = 5; +					u.ids.push_back(sdfgi->render_emission_aniso); +					uniforms.push_back(u); +				} +				{ +					RD::Uniform u; +					u.type = RD::UNIFORM_TYPE_IMAGE; +					u.binding = 6; +					u.ids.push_back(sdfgi->render_geom_facing); +					uniforms.push_back(u); +				} + +				{ +					RD::Uniform u; +					u.type = RD::UNIFORM_TYPE_IMAGE; +					u.binding = 7; +					u.ids.push_back(cascade.sdf_tex); +					uniforms.push_back(u); +				} +				{ +					RD::Uniform u; +					u.type = RD::UNIFORM_TYPE_IMAGE; +					u.binding = 8; +					u.ids.push_back(sdfgi->occlusion_data); +					uniforms.push_back(u); +				} +				{ +					RD::Uniform u; +					u.type = RD::UNIFORM_TYPE_STORAGE_BUFFER; +					u.binding = 10; +					u.ids.push_back(cascade.solid_cell_dispatch_buffer); +					uniforms.push_back(u); +				} +				{ +					RD::Uniform u; +					u.type = RD::UNIFORM_TYPE_STORAGE_BUFFER; +					u.binding = 11; +					u.ids.push_back(cascade.solid_cell_buffer); +					uniforms.push_back(u); +				} + +				cascade.sdf_store_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_STORE), 0); +			} + +			{ +				Vector<RD::Uniform> uniforms; +				{ +					RD::Uniform u; +					u.type = RD::UNIFORM_TYPE_IMAGE; +					u.binding = 1; +					u.ids.push_back(sdfgi->render_albedo); +					uniforms.push_back(u); +				} +				{ +					RD::Uniform u; +					u.type = RD::UNIFORM_TYPE_IMAGE; +					u.binding = 2; +					u.ids.push_back(sdfgi->render_geom_facing); +					uniforms.push_back(u); +				} +				{ +					RD::Uniform u; +					u.type = RD::UNIFORM_TYPE_IMAGE; +					u.binding = 3; +					u.ids.push_back(sdfgi->render_emission); +					uniforms.push_back(u); +				} +				{ +					RD::Uniform u; +					u.type = RD::UNIFORM_TYPE_IMAGE; +					u.binding = 4; +					u.ids.push_back(sdfgi->render_emission_aniso); +					uniforms.push_back(u); +				} +				{ +					RD::Uniform u; +					u.type = RD::UNIFORM_TYPE_STORAGE_BUFFER; +					u.binding = 5; +					u.ids.push_back(cascade.solid_cell_dispatch_buffer); +					uniforms.push_back(u); +				} +				{ +					RD::Uniform u; +					u.type = RD::UNIFORM_TYPE_STORAGE_BUFFER; +					u.binding = 6; +					u.ids.push_back(cascade.solid_cell_buffer); +					uniforms.push_back(u); +				} + +				cascade.scroll_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_SCROLL), 0); +			} +			{ +				Vector<RD::Uniform> uniforms; +				{ +					RD::Uniform u; +					u.type = RD::UNIFORM_TYPE_IMAGE; +					u.binding = 1; +					for (int j = 0; j < 8; j++) { +						u.ids.push_back(sdfgi->render_occlusion[j]); +					} +					uniforms.push_back(u); +				} +				{ +					RD::Uniform u; +					u.type = RD::UNIFORM_TYPE_IMAGE; +					u.binding = 2; +					u.ids.push_back(sdfgi->occlusion_data); +					uniforms.push_back(u); +				} + +				cascade.scroll_occlusion_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_SCROLL_OCCLUSION), 0); +			} +		} + +		//direct light +		for (uint32_t i = 0; i < sdfgi->cascades.size(); i++) { +			SDFGI::Cascade &cascade = sdfgi->cascades[i]; + +			Vector<RD::Uniform> uniforms; +			{ +				RD::Uniform u; +				u.binding = 1; +				u.type = RD::UNIFORM_TYPE_TEXTURE; +				for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { +					if (j < rb->sdfgi->cascades.size()) { +						u.ids.push_back(rb->sdfgi->cascades[j].sdf_tex); +					} else { +						u.ids.push_back(storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); +					} +				} +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.binding = 2; +				u.type = RD::UNIFORM_TYPE_SAMPLER; +				u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.binding = 3; +				u.type = RD::UNIFORM_TYPE_STORAGE_BUFFER; +				u.ids.push_back(cascade.solid_cell_dispatch_buffer); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.binding = 4; +				u.type = RD::UNIFORM_TYPE_STORAGE_BUFFER; +				u.ids.push_back(cascade.solid_cell_buffer); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.binding = 5; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.ids.push_back(cascade.light_data); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.binding = 6; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.ids.push_back(cascade.light_aniso_0_tex); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.binding = 7; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.ids.push_back(cascade.light_aniso_1_tex); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.binding = 8; +				u.type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; +				u.ids.push_back(rb->sdfgi->cascades_ubo); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.binding = 9; +				u.type = RD::UNIFORM_TYPE_STORAGE_BUFFER; +				u.ids.push_back(cascade.lights_buffer); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.binding = 10; +				u.type = RD::UNIFORM_TYPE_TEXTURE; +				u.ids.push_back(rb->sdfgi->lightprobe_texture); +				uniforms.push_back(u); +			} + +			cascade.sdf_direct_light_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.direct_light.version_get_shader(sdfgi_shader.direct_light_shader, 0), 0); +		} + +		//preprocess initialize uniform set +		{ +			Vector<RD::Uniform> uniforms; +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 1; +				u.ids.push_back(sdfgi->render_albedo); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 2; +				u.ids.push_back(sdfgi->render_sdf[0]); +				uniforms.push_back(u); +			} + +			sdfgi->sdf_initialize_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE), 0); +		} + +		{ +			Vector<RD::Uniform> uniforms; +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 1; +				u.ids.push_back(sdfgi->render_albedo); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 2; +				u.ids.push_back(sdfgi->render_sdf_half[0]); +				uniforms.push_back(u); +			} + +			sdfgi->sdf_initialize_half_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE_HALF), 0); +		} + +		//jump flood uniform set +		{ +			Vector<RD::Uniform> uniforms; +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 1; +				u.ids.push_back(sdfgi->render_sdf[0]); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 2; +				u.ids.push_back(sdfgi->render_sdf[1]); +				uniforms.push_back(u); +			} + +			sdfgi->jump_flood_uniform_set[0] = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_JUMP_FLOOD), 0); +			SWAP(uniforms.write[0].ids.write[0], uniforms.write[1].ids.write[0]); +			sdfgi->jump_flood_uniform_set[1] = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_JUMP_FLOOD), 0); +		} +		//jump flood half uniform set +		{ +			Vector<RD::Uniform> uniforms; +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 1; +				u.ids.push_back(sdfgi->render_sdf_half[0]); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 2; +				u.ids.push_back(sdfgi->render_sdf_half[1]); +				uniforms.push_back(u); +			} + +			sdfgi->jump_flood_half_uniform_set[0] = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_JUMP_FLOOD), 0); +			SWAP(uniforms.write[0].ids.write[0], uniforms.write[1].ids.write[0]); +			sdfgi->jump_flood_half_uniform_set[1] = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_JUMP_FLOOD), 0); +		} + +		//upscale half size sdf +		{ +			Vector<RD::Uniform> uniforms; +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 1; +				u.ids.push_back(sdfgi->render_albedo); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 2; +				u.ids.push_back(sdfgi->render_sdf_half[(passes & 1) ? 0 : 1]); //reverse pass order because half size +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 3; +				u.ids.push_back(sdfgi->render_sdf[(passes & 1) ? 0 : 1]); //reverse pass order because it needs an extra JFA pass +				uniforms.push_back(u); +			} + +			sdfgi->upscale_jfa_uniform_set_index = (passes & 1) ? 0 : 1; +			sdfgi->sdf_upscale_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_JUMP_FLOOD_UPSCALE), 0); +		} + +		//occlusion uniform set +		{ +			Vector<RD::Uniform> uniforms; +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 1; +				u.ids.push_back(sdfgi->render_albedo); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 2; +				for (int i = 0; i < 8; i++) { +					u.ids.push_back(sdfgi->render_occlusion[i]); +				} +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 3; +				u.ids.push_back(sdfgi->render_geom_facing); +				uniforms.push_back(u); +			} + +			sdfgi->occlusion_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, SDGIShader::PRE_PROCESS_OCCLUSION), 0); +		} + +		for (uint32_t i = 0; i < sdfgi->cascades.size(); i++) { +			//integrate uniform + +			Vector<RD::Uniform> uniforms; + +			{ +				RD::Uniform u; +				u.binding = 1; +				u.type = RD::UNIFORM_TYPE_TEXTURE; +				for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { +					if (j < sdfgi->cascades.size()) { +						u.ids.push_back(sdfgi->cascades[j].sdf_tex); +					} else { +						u.ids.push_back(storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); +					} +				} +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.binding = 2; +				u.type = RD::UNIFORM_TYPE_TEXTURE; +				for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { +					if (j < sdfgi->cascades.size()) { +						u.ids.push_back(sdfgi->cascades[j].light_tex); +					} else { +						u.ids.push_back(storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); +					} +				} +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.binding = 3; +				u.type = RD::UNIFORM_TYPE_TEXTURE; +				for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { +					if (j < sdfgi->cascades.size()) { +						u.ids.push_back(sdfgi->cascades[j].light_aniso_0_tex); +					} else { +						u.ids.push_back(storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); +					} +				} +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.binding = 4; +				u.type = RD::UNIFORM_TYPE_TEXTURE; +				for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { +					if (j < sdfgi->cascades.size()) { +						u.ids.push_back(sdfgi->cascades[j].light_aniso_1_tex); +					} else { +						u.ids.push_back(storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); +					} +				} +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_SAMPLER; +				u.binding = 6; +				u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); +				uniforms.push_back(u); +			} + +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; +				u.binding = 7; +				u.ids.push_back(sdfgi->cascades_ubo); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 8; +				u.ids.push_back(sdfgi->lightprobe_data); +				uniforms.push_back(u); +			} + +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 9; +				u.ids.push_back(sdfgi->cascades[i].lightprobe_history_tex); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 10; +				u.ids.push_back(sdfgi->cascades[i].lightprobe_average_tex); +				uniforms.push_back(u); +			} + +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 11; +				u.ids.push_back(sdfgi->lightprobe_history_scroll); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 12; +				u.ids.push_back(sdfgi->lightprobe_average_scroll); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_IMAGE; +				u.binding = 13; +				RID parent_average; +				if (i < sdfgi->cascades.size() - 1) { +					parent_average = sdfgi->cascades[i + 1].lightprobe_average_tex; +				} else { +					parent_average = sdfgi->cascades[i - 1].lightprobe_average_tex; //to use something, but it wont be used +				} +				u.ids.push_back(parent_average); +				uniforms.push_back(u); +			} + +			sdfgi->cascades[i].integrate_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.integrate.version_get_shader(sdfgi_shader.integrate_shader, 0), 0); +		} + +		sdfgi->uses_multibounce = env->sdfgi_use_multibounce; +		sdfgi->energy = env->sdfgi_energy; +		sdfgi->normal_bias = env->sdfgi_normal_bias; +		sdfgi->probe_bias = env->sdfgi_probe_bias; +		sdfgi->reads_sky = env->sdfgi_read_sky_light; + +		_render_buffers_uniform_set_changed(p_render_buffers); + +		return; //done. all levels will need to be rendered which its going to take a bit +	} + +	//check for updates + +	sdfgi->uses_multibounce = env->sdfgi_use_multibounce; +	sdfgi->energy = env->sdfgi_energy; +	sdfgi->normal_bias = env->sdfgi_normal_bias; +	sdfgi->probe_bias = env->sdfgi_probe_bias; +	sdfgi->reads_sky = env->sdfgi_read_sky_light; + +	int32_t drag_margin = (sdfgi->cascade_size / SDFGI::PROBE_DIVISOR) / 2; + +	for (uint32_t i = 0; i < sdfgi->cascades.size(); i++) { +		SDFGI::Cascade &cascade = sdfgi->cascades[i]; +		cascade.dirty_regions = Vector3i(); + +		Vector3 probe_half_size = Vector3(1, 1, 1) * cascade.cell_size * float(sdfgi->cascade_size / SDFGI::PROBE_DIVISOR) * 0.5; +		probe_half_size = Vector3(0, 0, 0); + +		Vector3 world_position = p_world_position; +		world_position.y *= sdfgi->y_mult; +		Vector3i pos_in_cascade = Vector3i((world_position + probe_half_size) / cascade.cell_size); + +		for (int j = 0; j < 3; j++) { +			if (pos_in_cascade[j] < cascade.position[j]) { +				while (pos_in_cascade[j] < (cascade.position[j] - drag_margin)) { +					cascade.position[j] -= drag_margin * 2; +					cascade.dirty_regions[j] += drag_margin * 2; +				} +			} else if (pos_in_cascade[j] > cascade.position[j]) { +				while (pos_in_cascade[j] > (cascade.position[j] + drag_margin)) { +					cascade.position[j] += drag_margin * 2; +					cascade.dirty_regions[j] -= drag_margin * 2; +				} +			} + +			if (cascade.dirty_regions[j] == 0) { +				continue; // not dirty +			} else if (uint32_t(ABS(cascade.dirty_regions[j])) >= sdfgi->cascade_size) { +				//moved too much, just redraw everything (make all dirty) +				cascade.dirty_regions = SDFGI::Cascade::DIRTY_ALL; +				break; +			} +		} + +		if (cascade.dirty_regions != Vector3i() && cascade.dirty_regions != SDFGI::Cascade::DIRTY_ALL) { +			//see how much the total dirty volume represents from the total volume +			uint32_t total_volume = sdfgi->cascade_size * sdfgi->cascade_size * sdfgi->cascade_size; +			uint32_t safe_volume = 1; +			for (int j = 0; j < 3; j++) { +				safe_volume *= sdfgi->cascade_size - ABS(cascade.dirty_regions[j]); +			} +			uint32_t dirty_volume = total_volume - safe_volume; +			if (dirty_volume > (safe_volume / 2)) { +				//more than half the volume is dirty, make all dirty so its only rendered once +				cascade.dirty_regions = SDFGI::Cascade::DIRTY_ALL; +			} +		} +	} +} + +int RasterizerSceneRD::sdfgi_get_pending_region_count(RID p_render_buffers) const { +	RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + +	ERR_FAIL_COND_V(rb == nullptr, 0); + +	if (rb->sdfgi == nullptr) { +		return 0; +	} + +	int dirty_count = 0; +	for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { +		const SDFGI::Cascade &c = rb->sdfgi->cascades[i]; + +		if (c.dirty_regions == SDFGI::Cascade::DIRTY_ALL) { +			dirty_count++; +		} else { +			for (int j = 0; j < 3; j++) { +				if (c.dirty_regions[j] != 0) { +					dirty_count++; +				} +			} +		} +	} + +	return dirty_count; +} + +int RasterizerSceneRD::_sdfgi_get_pending_region_data(RID p_render_buffers, int p_region, Vector3i &r_local_offset, Vector3i &r_local_size, AABB &r_bounds) const { +	RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND_V(rb == nullptr, -1); +	ERR_FAIL_COND_V(rb->sdfgi == nullptr, -1); + +	int dirty_count = 0; +	for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { +		const SDFGI::Cascade &c = rb->sdfgi->cascades[i]; + +		if (c.dirty_regions == SDFGI::Cascade::DIRTY_ALL) { +			if (dirty_count == p_region) { +				r_local_offset = Vector3i(); +				r_local_size = Vector3i(1, 1, 1) * rb->sdfgi->cascade_size; + +				r_bounds.position = Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + c.position)) * c.cell_size * Vector3(1, 1.0 / rb->sdfgi->y_mult, 1); +				r_bounds.size = Vector3(r_local_size) * c.cell_size * Vector3(1, 1.0 / rb->sdfgi->y_mult, 1); +				return i; +			} +			dirty_count++; +		} else { +			for (int j = 0; j < 3; j++) { +				if (c.dirty_regions[j] != 0) { +					if (dirty_count == p_region) { +						Vector3i from = Vector3i(0, 0, 0); +						Vector3i to = Vector3i(1, 1, 1) * rb->sdfgi->cascade_size; + +						if (c.dirty_regions[j] > 0) { +							//fill from the beginning +							to[j] = c.dirty_regions[j]; +						} else { +							//fill from the end +							from[j] = to[j] + c.dirty_regions[j]; +						} + +						for (int k = 0; k < j; k++) { +							// "chip" away previous regions to avoid re-voxelizing the same thing +							if (c.dirty_regions[k] > 0) { +								from[k] += c.dirty_regions[k]; +							} else if (c.dirty_regions[k] < 0) { +								to[k] += c.dirty_regions[k]; +							} +						} + +						r_local_offset = from; +						r_local_size = to - from; + +						r_bounds.position = Vector3(from + Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + c.position) * c.cell_size * Vector3(1, 1.0 / rb->sdfgi->y_mult, 1); +						r_bounds.size = Vector3(r_local_size) * c.cell_size * Vector3(1, 1.0 / rb->sdfgi->y_mult, 1); + +						return i; +					} + +					dirty_count++; +				} +			} +		} +	} +	return -1; +} + +AABB RasterizerSceneRD::sdfgi_get_pending_region_bounds(RID p_render_buffers, int p_region) const { +	AABB bounds; +	Vector3i from; +	Vector3i size; + +	int c = _sdfgi_get_pending_region_data(p_render_buffers, p_region, from, size, bounds); +	ERR_FAIL_COND_V(c == -1, AABB()); +	return bounds; +} + +uint32_t RasterizerSceneRD::sdfgi_get_pending_region_cascade(RID p_render_buffers, int p_region) const { +	AABB bounds; +	Vector3i from; +	Vector3i size; + +	return _sdfgi_get_pending_region_data(p_render_buffers, p_region, from, size, bounds); +} + +void RasterizerSceneRD::_sdfgi_update_cascades(RID p_render_buffers) { +	RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND(rb == nullptr); +	if (rb->sdfgi == nullptr) { +		return; +	} + +	//update cascades +	SDFGI::Cascade::UBO cascade_data[SDFGI::MAX_CASCADES]; +	int32_t probe_divisor = rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR; + +	for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { +		Vector3 pos = Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + rb->sdfgi->cascades[i].position)) * rb->sdfgi->cascades[i].cell_size; + +		cascade_data[i].offset[0] = pos.x; +		cascade_data[i].offset[1] = pos.y; +		cascade_data[i].offset[2] = pos.z; +		cascade_data[i].to_cell = 1.0 / rb->sdfgi->cascades[i].cell_size; +		cascade_data[i].probe_offset[0] = rb->sdfgi->cascades[i].position.x / probe_divisor; +		cascade_data[i].probe_offset[1] = rb->sdfgi->cascades[i].position.y / probe_divisor; +		cascade_data[i].probe_offset[2] = rb->sdfgi->cascades[i].position.z / probe_divisor; +		cascade_data[i].pad = 0; +	} + +	RD::get_singleton()->buffer_update(rb->sdfgi->cascades_ubo, 0, sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES, cascade_data, true); +} + +void RasterizerSceneRD::sdfgi_update_probes(RID p_render_buffers, RID p_environment, const RID *p_directional_light_instances, uint32_t p_directional_light_count, const RID *p_positional_light_instances, uint32_t p_positional_light_count) { +	RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND(rb == nullptr); +	if (rb->sdfgi == nullptr) { +		return; +	} +	Environent *env = environment_owner.getornull(p_environment); + +	RENDER_TIMESTAMP(">SDFGI Update Probes"); + +	/* Update Cascades UBO */ +	_sdfgi_update_cascades(p_render_buffers); +	/* Update Dynamic Lights Buffer */ + +	RENDER_TIMESTAMP("Update Lights"); + +	/* Update dynamic lights */ + +	{ +		RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); +		RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.direct_light_pipeline[SDGIShader::DIRECT_LIGHT_MODE_DYNAMIC]); + +		SDGIShader::DirectLightPushConstant push_constant; + +		push_constant.grid_size[0] = rb->sdfgi->cascade_size; +		push_constant.grid_size[1] = rb->sdfgi->cascade_size; +		push_constant.grid_size[2] = rb->sdfgi->cascade_size; +		push_constant.max_cascades = rb->sdfgi->cascades.size(); +		push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; +		push_constant.multibounce = rb->sdfgi->uses_multibounce; +		push_constant.y_mult = rb->sdfgi->y_mult; + +		push_constant.process_offset = 0; +		push_constant.process_increment = 1; + +		for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { +			SDFGI::Cascade &cascade = rb->sdfgi->cascades[i]; + +			{ //fill light buffer + +				SDGIShader::Light lights[SDFGI::MAX_DYNAMIC_LIGHTS]; +				uint32_t idx = 0; +				for (uint32_t j = 0; j < p_directional_light_count; j++) { +					if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { +						break; +					} + +					LightInstance *li = light_instance_owner.getornull(p_directional_light_instances[j]); +					ERR_CONTINUE(!li); +					Vector3 dir = -li->transform.basis.get_axis(Vector3::AXIS_Z); +					dir.y *= rb->sdfgi->y_mult; +					dir.normalize(); +					lights[idx].direction[0] = dir.x; +					lights[idx].direction[1] = dir.y; +					lights[idx].direction[2] = dir.z; +					Color color = storage->light_get_color(li->light); +					color = color.to_linear(); +					lights[idx].color[0] = color.r; +					lights[idx].color[1] = color.g; +					lights[idx].color[2] = color.b; +					lights[idx].type = RS::LIGHT_DIRECTIONAL; +					lights[idx].energy = storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY); +					lights[idx].has_shadow = storage->light_has_shadow(li->light); + +					idx++; +				} + +				AABB cascade_aabb; +				cascade_aabb.position = Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + cascade.position)) * cascade.cell_size; +				cascade_aabb.size = Vector3(1, 1, 1) * rb->sdfgi->cascade_size * cascade.cell_size; + +				for (uint32_t j = 0; j < p_positional_light_count; j++) { +					if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { +						break; +					} + +					LightInstance *li = light_instance_owner.getornull(p_positional_light_instances[j]); +					ERR_CONTINUE(!li); + +					uint32_t max_sdfgi_cascade = storage->light_get_max_sdfgi_cascade(li->light); +					if (i > max_sdfgi_cascade) { +						continue; +					} + +					if (!cascade_aabb.intersects(li->aabb)) { +						continue; +					} + +					Vector3 dir = -li->transform.basis.get_axis(Vector3::AXIS_Z); +					//faster to not do this here +					//dir.y *= rb->sdfgi->y_mult; +					//dir.normalize(); +					lights[idx].direction[0] = dir.x; +					lights[idx].direction[1] = dir.y; +					lights[idx].direction[2] = dir.z; +					Vector3 pos = li->transform.origin; +					pos.y *= rb->sdfgi->y_mult; +					lights[idx].position[0] = pos.x; +					lights[idx].position[1] = pos.y; +					lights[idx].position[2] = pos.z; +					Color color = storage->light_get_color(li->light); +					color = color.to_linear(); +					lights[idx].color[0] = color.r; +					lights[idx].color[1] = color.g; +					lights[idx].color[2] = color.b; +					lights[idx].type = storage->light_get_type(li->light); +					lights[idx].energy = storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY); +					lights[idx].has_shadow = storage->light_has_shadow(li->light); +					lights[idx].attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_ATTENUATION); +					lights[idx].radius = storage->light_get_param(li->light, RS::LIGHT_PARAM_RANGE); +					lights[idx].spot_angle = Math::deg2rad(storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ANGLE)); +					lights[idx].spot_attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ATTENUATION); + +					idx++; +				} + +				if (idx > 0) { +					RD::get_singleton()->buffer_update(cascade.lights_buffer, 0, idx * sizeof(SDGIShader::Light), lights, true); +				} +				push_constant.light_count = idx; +			} + +			push_constant.cascade = i; + +			RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascade.sdf_direct_light_uniform_set, 0); +			RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::DirectLightPushConstant)); +			RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cascade.solid_cell_dispatch_buffer, 0); +		} +		RD::get_singleton()->compute_list_end(); +	} + +	RENDER_TIMESTAMP("Raytrace"); + +	SDGIShader::IntegratePushConstant push_constant; +	push_constant.grid_size[1] = rb->sdfgi->cascade_size; +	push_constant.grid_size[2] = rb->sdfgi->cascade_size; +	push_constant.grid_size[0] = rb->sdfgi->cascade_size; +	push_constant.max_cascades = rb->sdfgi->cascades.size(); +	push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; +	push_constant.history_index = rb->sdfgi->render_pass % rb->sdfgi->history_size; +	push_constant.history_size = rb->sdfgi->history_size; +	static const uint32_t ray_count[RS::ENV_SDFGI_RAY_COUNT_MAX] = { 8, 16, 32, 64, 96, 128 }; +	push_constant.ray_count = ray_count[sdfgi_ray_count]; +	push_constant.ray_bias = rb->sdfgi->probe_bias; +	push_constant.image_size[0] = rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count; +	push_constant.image_size[1] = rb->sdfgi->probe_axis_count; + +	RID sky_uniform_set = sdfgi_shader.integrate_default_sky_uniform_set; +	push_constant.sky_mode = SDGIShader::IntegratePushConstant::SKY_MODE_DISABLED; +	push_constant.y_mult = rb->sdfgi->y_mult; + +	if (rb->sdfgi->reads_sky && env) { +		push_constant.sky_energy = env->bg_energy; + +		if (env->background == RS::ENV_BG_CLEAR_COLOR) { +			push_constant.sky_mode = SDGIShader::IntegratePushConstant::SKY_MODE_COLOR; +			Color c = storage->get_default_clear_color().to_linear(); +			push_constant.sky_color[0] = c.r; +			push_constant.sky_color[1] = c.g; +			push_constant.sky_color[2] = c.b; +		} else if (env->background == RS::ENV_BG_COLOR) { +			push_constant.sky_mode = SDGIShader::IntegratePushConstant::SKY_MODE_COLOR; +			Color c = env->bg_color; +			push_constant.sky_color[0] = c.r; +			push_constant.sky_color[1] = c.g; +			push_constant.sky_color[2] = c.b; + +		} else if (env->background == RS::ENV_BG_SKY) { +			Sky *sky = sky_owner.getornull(env->sky); +			if (sky && sky->radiance.is_valid()) { +				if (sky->sdfgi_integrate_sky_uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(sky->sdfgi_integrate_sky_uniform_set)) { +					Vector<RD::Uniform> uniforms; + +					{ +						RD::Uniform u; +						u.type = RD::UNIFORM_TYPE_TEXTURE; +						u.binding = 0; +						u.ids.push_back(sky->radiance); +						uniforms.push_back(u); +					} + +					{ +						RD::Uniform u; +						u.type = RD::UNIFORM_TYPE_SAMPLER; +						u.binding = 1; +						u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); +						uniforms.push_back(u); +					} + +					sky->sdfgi_integrate_sky_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.integrate.version_get_shader(sdfgi_shader.integrate_shader, 0), 1); +				} +				sky_uniform_set = sky->sdfgi_integrate_sky_uniform_set; +				push_constant.sky_mode = SDGIShader::IntegratePushConstant::SKY_MODE_SKY; +			} +		} +	} + +	rb->sdfgi->render_pass++; + +	RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); +	RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.integrate_pipeline[SDGIShader::INTEGRATE_MODE_PROCESS]); + +	int32_t probe_divisor = rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR; +	for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { +		push_constant.cascade = i; +		push_constant.world_offset[0] = rb->sdfgi->cascades[i].position.x / probe_divisor; +		push_constant.world_offset[1] = rb->sdfgi->cascades[i].position.y / probe_divisor; +		push_constant.world_offset[2] = rb->sdfgi->cascades[i].position.z / probe_divisor; + +		RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[i].integrate_uniform_set, 0); +		RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sky_uniform_set, 1); + +		RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::IntegratePushConstant)); +		RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1, 8, 8, 1); +	} + +	RD::get_singleton()->compute_list_add_barrier(compute_list); //wait until done + +	// Then store values into the lightprobe texture. Separating these steps has a small performance hit, but it allows for multiple bounces +	RENDER_TIMESTAMP("Average Probes"); + +	RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.integrate_pipeline[SDGIShader::INTEGRATE_MODE_STORE]); + +	//convert to octahedral to store +	push_constant.image_size[0] *= SDFGI::LIGHTPROBE_OCT_SIZE; +	push_constant.image_size[1] *= SDFGI::LIGHTPROBE_OCT_SIZE; + +	for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { +		push_constant.cascade = i; +		RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[i].integrate_uniform_set, 0); +		RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::IntegratePushConstant)); +		RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1, 8, 8, 1); +	} + +	RD::get_singleton()->compute_list_end(); + +	RENDER_TIMESTAMP("<SDFGI Update Probes"); +} + +void RasterizerSceneRD::_process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_ambient_buffer, RID p_reflection_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, RID *p_gi_probe_cull_result, int p_gi_probe_cull_count) { +	RENDER_TIMESTAMP("Render GI"); + +	RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND(rb == nullptr); +	Environent *env = environment_owner.getornull(p_environment); + +	GI::PushConstant push_constant; + +	push_constant.screen_size[0] = rb->width; +	push_constant.screen_size[1] = rb->height; +	push_constant.z_near = p_projection.get_z_near(); +	push_constant.z_far = p_projection.get_z_far(); +	push_constant.orthogonal = p_projection.is_orthogonal(); +	push_constant.proj_info[0] = -2.0f / (rb->width * p_projection.matrix[0][0]); +	push_constant.proj_info[1] = -2.0f / (rb->height * p_projection.matrix[1][1]); +	push_constant.proj_info[2] = (1.0f - p_projection.matrix[0][2]) / p_projection.matrix[0][0]; +	push_constant.proj_info[3] = (1.0f + p_projection.matrix[1][2]) / p_projection.matrix[1][1]; +	push_constant.max_giprobes = MIN(RenderBuffers::MAX_GIPROBES, p_gi_probe_cull_count); +	push_constant.high_quality_vct = gi_probe_quality == RS::GI_PROBE_QUALITY_HIGH; +	push_constant.use_sdfgi = rb->sdfgi != nullptr; + +	if (env) { +		push_constant.ao_color[0] = env->ao_color.r; +		push_constant.ao_color[1] = env->ao_color.g; +		push_constant.ao_color[2] = env->ao_color.b; +	} else { +		push_constant.ao_color[0] = 0; +		push_constant.ao_color[1] = 0; +		push_constant.ao_color[2] = 0; +	} + +	push_constant.cam_rotation[0] = p_transform.basis[0][0]; +	push_constant.cam_rotation[1] = p_transform.basis[1][0]; +	push_constant.cam_rotation[2] = p_transform.basis[2][0]; +	push_constant.cam_rotation[3] = 0; +	push_constant.cam_rotation[4] = p_transform.basis[0][1]; +	push_constant.cam_rotation[5] = p_transform.basis[1][1]; +	push_constant.cam_rotation[6] = p_transform.basis[2][1]; +	push_constant.cam_rotation[7] = 0; +	push_constant.cam_rotation[8] = p_transform.basis[0][2]; +	push_constant.cam_rotation[9] = p_transform.basis[1][2]; +	push_constant.cam_rotation[10] = p_transform.basis[2][2]; +	push_constant.cam_rotation[11] = 0; + +	if (rb->sdfgi) { +		GI::SDFGIData sdfgi_data; + +		sdfgi_data.grid_size[0] = rb->sdfgi->cascade_size; +		sdfgi_data.grid_size[1] = rb->sdfgi->cascade_size; +		sdfgi_data.grid_size[2] = rb->sdfgi->cascade_size; + +		sdfgi_data.max_cascades = rb->sdfgi->cascades.size(); +		sdfgi_data.probe_axis_size = rb->sdfgi->probe_axis_count; +		sdfgi_data.cascade_probe_size[0] = sdfgi_data.probe_axis_size - 1; //float version for performance +		sdfgi_data.cascade_probe_size[1] = sdfgi_data.probe_axis_size - 1; +		sdfgi_data.cascade_probe_size[2] = sdfgi_data.probe_axis_size - 1; + +		float csize = rb->sdfgi->cascade_size; +		sdfgi_data.probe_to_uvw = 1.0 / float(sdfgi_data.cascade_probe_size[0]); +		sdfgi_data.use_occlusion = rb->sdfgi->uses_occlusion; +		//sdfgi_data.energy = rb->sdfgi->energy; + +		sdfgi_data.y_mult = rb->sdfgi->y_mult; + +		float cascade_voxel_size = (csize / sdfgi_data.cascade_probe_size[0]); +		float occlusion_clamp = (cascade_voxel_size - 0.5) / cascade_voxel_size; +		sdfgi_data.occlusion_clamp[0] = occlusion_clamp; +		sdfgi_data.occlusion_clamp[1] = occlusion_clamp; +		sdfgi_data.occlusion_clamp[2] = occlusion_clamp; +		sdfgi_data.normal_bias = (rb->sdfgi->normal_bias / csize) * sdfgi_data.cascade_probe_size[0]; + +		//vec2 tex_pixel_size = 1.0 / vec2(ivec2( (OCT_SIZE+2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE+2) * params.probe_axis_size ) ); +		//vec3 probe_uv_offset = (ivec3(OCT_SIZE+2,OCT_SIZE+2,(OCT_SIZE+2) * params.probe_axis_size)) * tex_pixel_size.xyx; + +		uint32_t oct_size = SDFGI::LIGHTPROBE_OCT_SIZE; + +		sdfgi_data.lightprobe_tex_pixel_size[0] = 1.0 / ((oct_size + 2) * sdfgi_data.probe_axis_size * sdfgi_data.probe_axis_size); +		sdfgi_data.lightprobe_tex_pixel_size[1] = 1.0 / ((oct_size + 2) * sdfgi_data.probe_axis_size); +		sdfgi_data.lightprobe_tex_pixel_size[2] = 1.0; + +		sdfgi_data.energy = rb->sdfgi->energy; + +		sdfgi_data.lightprobe_uv_offset[0] = float(oct_size + 2) * sdfgi_data.lightprobe_tex_pixel_size[0]; +		sdfgi_data.lightprobe_uv_offset[1] = float(oct_size + 2) * sdfgi_data.lightprobe_tex_pixel_size[1]; +		sdfgi_data.lightprobe_uv_offset[2] = float((oct_size + 2) * sdfgi_data.probe_axis_size) * sdfgi_data.lightprobe_tex_pixel_size[0]; + +		sdfgi_data.occlusion_renormalize[0] = 0.5; +		sdfgi_data.occlusion_renormalize[1] = 1.0; +		sdfgi_data.occlusion_renormalize[2] = 1.0 / float(sdfgi_data.max_cascades); + +		int32_t probe_divisor = rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR; + +		for (uint32_t i = 0; i < sdfgi_data.max_cascades; i++) { +			GI::SDFGIData::ProbeCascadeData &c = sdfgi_data.cascades[i]; +			Vector3 pos = Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + rb->sdfgi->cascades[i].position)) * rb->sdfgi->cascades[i].cell_size; +			Vector3 cam_origin = p_transform.origin; +			cam_origin.y *= rb->sdfgi->y_mult; +			pos -= cam_origin; //make pos local to camera, to reduce numerical error +			c.position[0] = pos.x; +			c.position[1] = pos.y; +			c.position[2] = pos.z; +			c.to_probe = 1.0 / (float(rb->sdfgi->cascade_size) * rb->sdfgi->cascades[i].cell_size / float(rb->sdfgi->probe_axis_count - 1)); + +			Vector3i probe_ofs = rb->sdfgi->cascades[i].position / probe_divisor; +			c.probe_world_offset[0] = probe_ofs.x; +			c.probe_world_offset[1] = probe_ofs.y; +			c.probe_world_offset[2] = probe_ofs.z; + +			c.to_cell = 1.0 / rb->sdfgi->cascades[i].cell_size; +		} + +		RD::get_singleton()->buffer_update(gi.sdfgi_ubo, 0, sizeof(GI::SDFGIData), &sdfgi_data, true); +	} + +	{ +		RID gi_probe_buffer = render_buffers_get_gi_probe_buffer(p_render_buffers); +		GI::GIProbeData gi_probe_data[RenderBuffers::MAX_GIPROBES]; + +		bool giprobes_changed = false; + +		Transform to_camera; +		to_camera.origin = p_transform.origin; //only translation, make local + +		for (int i = 0; i < RenderBuffers::MAX_GIPROBES; i++) { +			RID texture; +			if (i < p_gi_probe_cull_count) { +				GIProbeInstance *gipi = gi_probe_instance_owner.getornull(p_gi_probe_cull_result[i]); + +				if (gipi) { +					texture = gipi->texture; +					GI::GIProbeData &gipd = gi_probe_data[i]; + +					RID base_probe = gipi->probe; + +					Transform to_cell = storage->gi_probe_get_to_cell_xform(gipi->probe) * gipi->transform.affine_inverse() * to_camera; + +					gipd.xform[0] = to_cell.basis.elements[0][0]; +					gipd.xform[1] = to_cell.basis.elements[1][0]; +					gipd.xform[2] = to_cell.basis.elements[2][0]; +					gipd.xform[3] = 0; +					gipd.xform[4] = to_cell.basis.elements[0][1]; +					gipd.xform[5] = to_cell.basis.elements[1][1]; +					gipd.xform[6] = to_cell.basis.elements[2][1]; +					gipd.xform[7] = 0; +					gipd.xform[8] = to_cell.basis.elements[0][2]; +					gipd.xform[9] = to_cell.basis.elements[1][2]; +					gipd.xform[10] = to_cell.basis.elements[2][2]; +					gipd.xform[11] = 0; +					gipd.xform[12] = to_cell.origin.x; +					gipd.xform[13] = to_cell.origin.y; +					gipd.xform[14] = to_cell.origin.z; +					gipd.xform[15] = 1; + +					Vector3 bounds = storage->gi_probe_get_octree_size(base_probe); + +					gipd.bounds[0] = bounds.x; +					gipd.bounds[1] = bounds.y; +					gipd.bounds[2] = bounds.z; + +					gipd.dynamic_range = storage->gi_probe_get_dynamic_range(base_probe) * storage->gi_probe_get_energy(base_probe); +					gipd.bias = storage->gi_probe_get_bias(base_probe); +					gipd.normal_bias = storage->gi_probe_get_normal_bias(base_probe); +					gipd.blend_ambient = !storage->gi_probe_is_interior(base_probe); +					gipd.anisotropy_strength = 0; +					gipd.ao = storage->gi_probe_get_ao(base_probe); +					gipd.ao_size = Math::pow(storage->gi_probe_get_ao_size(base_probe), 4.0f); +				} +			} + +			if (texture == RID()) { +				texture = storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE); +			} + +			if (texture != rb->giprobe_textures[i]) { +				giprobes_changed = true; +				rb->giprobe_textures[i] = texture; +			} +		} + +		if (giprobes_changed) { +			RD::get_singleton()->free(rb->gi_uniform_set); +			rb->gi_uniform_set = RID(); +		} + +		if (p_gi_probe_cull_count > 0) { +			RD::get_singleton()->buffer_update(gi_probe_buffer, 0, sizeof(GI::GIProbeData) * MIN(RenderBuffers::MAX_GIPROBES, p_gi_probe_cull_count), gi_probe_data, true); +		} +	} + +	if (rb->gi_uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(rb->gi_uniform_set)) { +		Vector<RD::Uniform> uniforms; +		{ +			RD::Uniform u; +			u.binding = 1; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { +				if (rb->sdfgi && j < rb->sdfgi->cascades.size()) { +					u.ids.push_back(rb->sdfgi->cascades[j].sdf_tex); +				} else { +					u.ids.push_back(storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); +				} +			} +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 2; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { +				if (rb->sdfgi && j < rb->sdfgi->cascades.size()) { +					u.ids.push_back(rb->sdfgi->cascades[j].light_tex); +				} else { +					u.ids.push_back(storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); +				} +			} +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 3; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { +				if (rb->sdfgi && j < rb->sdfgi->cascades.size()) { +					u.ids.push_back(rb->sdfgi->cascades[j].light_aniso_0_tex); +				} else { +					u.ids.push_back(storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); +				} +			} +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 4; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { +				if (rb->sdfgi && j < rb->sdfgi->cascades.size()) { +					u.ids.push_back(rb->sdfgi->cascades[j].light_aniso_1_tex); +				} else { +					u.ids.push_back(storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); +				} +			} +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			u.binding = 5; +			if (rb->sdfgi) { +				u.ids.push_back(rb->sdfgi->occlusion_texture); +			} else { +				u.ids.push_back(storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); +			} +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.type = RD::UNIFORM_TYPE_SAMPLER; +			u.binding = 6; +			u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.type = RD::UNIFORM_TYPE_SAMPLER; +			u.binding = 7; +			u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); +			uniforms.push_back(u); +		} + +		{ +			RD::Uniform u; +			u.type = RD::UNIFORM_TYPE_IMAGE; +			u.binding = 9; +			u.ids.push_back(p_ambient_buffer); +			uniforms.push_back(u); +		} + +		{ +			RD::Uniform u; +			u.type = RD::UNIFORM_TYPE_IMAGE; +			u.binding = 10; +			u.ids.push_back(p_reflection_buffer); +			uniforms.push_back(u); +		} + +		{ +			RD::Uniform u; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			u.binding = 11; +			if (rb->sdfgi) { +				u.ids.push_back(rb->sdfgi->lightprobe_texture); +			} else { +				u.ids.push_back(storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE)); +			} +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			u.binding = 12; +			u.ids.push_back(rb->depth_texture); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			u.binding = 13; +			u.ids.push_back(p_normal_roughness_buffer); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			u.binding = 14; +			RID buffer = p_gi_probe_buffer.is_valid() ? p_gi_probe_buffer : storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_BLACK); +			u.ids.push_back(buffer); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; +			u.binding = 15; +			u.ids.push_back(gi.sdfgi_ubo); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; +			u.binding = 16; +			u.ids.push_back(rb->giprobe_buffer); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			u.binding = 17; +			for (int i = 0; i < RenderBuffers::MAX_GIPROBES; i++) { +				u.ids.push_back(rb->giprobe_textures[i]); +			} +			uniforms.push_back(u); +		} + +		rb->gi_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi.shader.version_get_shader(gi.shader_version, 0), 0); +	} + +	RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); +	RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi.pipelines[0]); +	RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->gi_uniform_set, 0); +	RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(GI::PushConstant)); +	RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width, rb->height, 1, 8, 8, 1); +	RD::get_singleton()->compute_list_end(); +} +  RID RasterizerSceneRD::sky_create() {  	return sky_owner.make_rid(Sky());  } @@ -1291,6 +2826,31 @@ void RasterizerSceneRD::environment_glow_set_use_bicubic_upscale(bool p_enable)  	glow_bicubic_upscale = p_enable;  } +void RasterizerSceneRD::environment_set_sdfgi(RID p_env, bool p_enable, RS::EnvironmentSDFGICascades p_cascades, float p_min_cell_size, RS::EnvironmentSDFGIYScale p_y_scale, bool p_use_occlusion, bool p_use_multibounce, bool p_read_sky, bool p_enhance_ssr, float p_energy, float p_normal_bias, float p_probe_bias) { +	Environent *env = environment_owner.getornull(p_env); +	ERR_FAIL_COND(!env); + +	env->sdfgi_enabled = p_enable; +	env->sdfgi_cascades = p_cascades; +	env->sdfgi_min_cell_size = p_min_cell_size; +	env->sdfgi_use_occlusion = p_use_occlusion; +	env->sdfgi_use_multibounce = p_use_multibounce; +	env->sdfgi_read_sky_light = p_read_sky; +	env->sdfgi_enhance_ssr = p_enhance_ssr; +	env->sdfgi_energy = p_energy; +	env->sdfgi_normal_bias = p_normal_bias; +	env->sdfgi_probe_bias = p_probe_bias; +	env->sdfgi_y_scale = p_y_scale; +} + +void RasterizerSceneRD::environment_set_sdfgi_ray_count(RS::EnvironmentSDFGIRayCount p_ray_count) { +	sdfgi_ray_count = p_ray_count; +} + +void RasterizerSceneRD::environment_set_sdfgi_frames_to_converge(RS::EnvironmentSDFGIFramesToConverge p_frames) { +	sdfgi_frames_to_converge = p_frames; +} +  void RasterizerSceneRD::environment_set_ssr(RID p_env, bool p_enable, int p_max_steps, float p_fade_int, float p_fade_out, float p_depth_tolerance) {  	Environent *env = environment_owner.getornull(p_env);  	ERR_FAIL_COND(!env); @@ -1351,6 +2911,11 @@ bool RasterizerSceneRD::environment_is_ssr_enabled(RID p_env) const {  	ERR_FAIL_COND_V(!env, false);  	return env->ssr_enabled;  } +bool RasterizerSceneRD::environment_is_sdfgi_enabled(RID p_env) const { +	Environent *env = environment_owner.getornull(p_env); +	ERR_FAIL_COND_V(!env, false); +	return env->sdfgi_enabled; +}  bool RasterizerSceneRD::is_environment(RID p_env) const {  	return environment_owner.owns(p_env); @@ -2099,6 +3664,13 @@ void RasterizerSceneRD::light_instance_set_transform(RID p_light_instance, const  	light_instance->transform = p_transform;  } +void RasterizerSceneRD::light_instance_set_aabb(RID p_light_instance, const AABB &p_aabb) { +	LightInstance *light_instance = light_instance_owner.getornull(p_light_instance); +	ERR_FAIL_COND(!light_instance); + +	light_instance->aabb = p_aabb; +} +  void RasterizerSceneRD::light_instance_set_shadow_transform(RID p_light_instance, const CameraMatrix &p_projection, const Transform &p_transform, float p_far, float p_split, int p_pass, float p_shadow_texel_size, float p_bias_scale, float p_range_begin, const Vector2 &p_uv_scale) {  	LightInstance *light_instance = light_instance_owner.getornull(p_light_instance);  	ERR_FAIL_COND(!light_instance); @@ -2193,23 +3765,9 @@ void RasterizerSceneRD::decal_instance_set_transform(RID p_decal, const Transfor  /////////////////////////////////  RID RasterizerSceneRD::gi_probe_instance_create(RID p_base) { -	//find a free slot -	int index = -1; -	for (int i = 0; i < gi_probe_slots.size(); i++) { -		if (gi_probe_slots[i] == RID()) { -			index = i; -			break; -		} -	} - -	ERR_FAIL_COND_V(index == -1, RID()); -  	GIProbeInstance gi_probe; -	gi_probe.slot = index;  	gi_probe.probe = p_base;  	RID rid = gi_probe_instance_owner.make_rid(gi_probe); -	gi_probe_slots.write[index] = rid; -  	return rid;  } @@ -2240,10 +3798,6 @@ void RasterizerSceneRD::gi_probe_update(RID p_probe, bool p_update_light_instanc  		//need to re-create everything  		if (gi_probe->texture.is_valid()) {  			RD::get_singleton()->free(gi_probe->texture); -			if (gi_probe_use_anisotropy) { -				RD::get_singleton()->free(gi_probe->anisotropy_r16[0]); -				RD::get_singleton()->free(gi_probe->anisotropy_r16[1]); -			}  			RD::get_singleton()->free(gi_probe->write_buffer);  			gi_probe->mipmaps.clear();  		} @@ -2275,47 +3829,18 @@ void RasterizerSceneRD::gi_probe_update(RID p_probe, bool p_update_light_instanc  			RD::get_singleton()->texture_clear(gi_probe->texture, Color(0, 0, 0, 0), 0, levels.size(), 0, 1, false); -			if (gi_probe_use_anisotropy) { -				tf.format = RD::DATA_FORMAT_R16_UINT; -				tf.shareable_formats.push_back(RD::DATA_FORMAT_R16_UINT); -				tf.shareable_formats.push_back(RD::DATA_FORMAT_R5G6B5_UNORM_PACK16); - -				//need to create R16 first, else driver does not like the storage bit for compute.. -				gi_probe->anisotropy_r16[0] = RD::get_singleton()->texture_create(tf, RD::TextureView()); -				gi_probe->anisotropy_r16[1] = RD::get_singleton()->texture_create(tf, RD::TextureView()); - -				RD::TextureView tv; -				tv.format_override = RD::DATA_FORMAT_R5G6B5_UNORM_PACK16; -				gi_probe->anisotropy[0] = RD::get_singleton()->texture_create_shared(tv, gi_probe->anisotropy_r16[0]); -				gi_probe->anisotropy[1] = RD::get_singleton()->texture_create_shared(tv, gi_probe->anisotropy_r16[1]); - -				RD::get_singleton()->texture_clear(gi_probe->anisotropy[0], Color(0, 0, 0, 0), 0, levels.size(), 0, 1, false); -				RD::get_singleton()->texture_clear(gi_probe->anisotropy[1], Color(0, 0, 0, 0), 0, levels.size(), 0, 1, false); -			} -  			{  				int total_elements = 0;  				for (int i = 0; i < levels.size(); i++) {  					total_elements += levels[i];  				} -				if (gi_probe_use_anisotropy) { -					total_elements *= 6; -				} -  				gi_probe->write_buffer = RD::get_singleton()->storage_buffer_create(total_elements * 16);  			}  			for (int i = 0; i < levels.size(); i++) {  				GIProbeInstance::Mipmap mipmap;  				mipmap.texture = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), gi_probe->texture, 0, i, RD::TEXTURE_SLICE_3D); -				if (gi_probe_use_anisotropy) { -					RD::TextureView tv; -					tv.format_override = RD::DATA_FORMAT_R16_UINT; -					mipmap.anisotropy[0] = RD::get_singleton()->texture_create_shared_from_slice(tv, gi_probe->anisotropy[0], 0, i, RD::TEXTURE_SLICE_3D); -					mipmap.anisotropy[1] = RD::get_singleton()->texture_create_shared_from_slice(tv, gi_probe->anisotropy[1], 0, i, RD::TEXTURE_SLICE_3D); -				} -  				mipmap.level = levels.size() - i - 1;  				mipmap.cell_offset = 0;  				for (uint32_t j = 0; j < mipmap.level; j++) { @@ -2383,24 +3908,6 @@ void RasterizerSceneRD::gi_probe_update(RID p_probe, bool p_update_light_instanc  							u.ids.push_back(gi_probe->texture);  							copy_uniforms.push_back(u);  						} - -						if (gi_probe_use_anisotropy) { -							{ -								RD::Uniform u; -								u.type = RD::UNIFORM_TYPE_TEXTURE; -								u.binding = 7; -								u.ids.push_back(gi_probe->anisotropy[0]); -								copy_uniforms.push_back(u); -							} -							{ -								RD::Uniform u; -								u.type = RD::UNIFORM_TYPE_TEXTURE; -								u.binding = 8; -								u.ids.push_back(gi_probe->anisotropy[1]); -								copy_uniforms.push_back(u); -							} -						} -  						mipmap.second_bounce_uniform_set = RD::get_singleton()->uniform_set_create(copy_uniforms, giprobe_lighting_shader_version_shaders[GI_PROBE_SHADER_VERSION_COMPUTE_SECOND_BOUNCE], 0);  					} else {  						mipmap.uniform_set = RD::get_singleton()->uniform_set_create(copy_uniforms, giprobe_lighting_shader_version_shaders[GI_PROBE_SHADER_VERSION_COMPUTE_MIPMAP], 0); @@ -2415,23 +3922,6 @@ void RasterizerSceneRD::gi_probe_update(RID p_probe, bool p_update_light_instanc  					uniforms.push_back(u);  				} -				if (gi_probe_use_anisotropy) { -					{ -						RD::Uniform u; -						u.type = RD::UNIFORM_TYPE_IMAGE; -						u.binding = 6; -						u.ids.push_back(mipmap.anisotropy[0]); -						uniforms.push_back(u); -					} -					{ -						RD::Uniform u; -						u.type = RD::UNIFORM_TYPE_IMAGE; -						u.binding = 7; -						u.ids.push_back(mipmap.anisotropy[1]); -						uniforms.push_back(u); -					} -				} -  				mipmap.write_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, giprobe_lighting_shader_version_shaders[GI_PROBE_SHADER_VERSION_WRITE_TEXTURE], 0);  				gi_probe->mipmaps.push_back(mipmap); @@ -2626,22 +4116,6 @@ void RasterizerSceneRD::gi_probe_update(RID p_probe, bool p_update_light_instanc  								u.ids.push_back(gi_probe->mipmaps[dmap.mipmap].texture);  								uniforms.push_back(u);  							} -							if (gi_probe_is_anisotropic()) { -								{ -									RD::Uniform u; -									u.type = RD::UNIFORM_TYPE_IMAGE; -									u.binding = 12; -									u.ids.push_back(gi_probe->mipmaps[dmap.mipmap].anisotropy[0]); -									uniforms.push_back(u); -								} -								{ -									RD::Uniform u; -									u.type = RD::UNIFORM_TYPE_IMAGE; -									u.binding = 13; -									u.ids.push_back(gi_probe->mipmaps[dmap.mipmap].anisotropy[1]); -									uniforms.push_back(u); -								} -							}  						}  						dmap.uniform_set = RD::get_singleton()->uniform_set_create(uniforms, giprobe_lighting_shader_version_shaders[(write && plot) ? GI_PROBE_SHADER_VERSION_DYNAMIC_SHRINK_WRITE_PLOT : write ? GI_PROBE_SHADER_VERSION_DYNAMIC_SHRINK_WRITE : GI_PROBE_SHADER_VERSION_DYNAMIC_SHRINK_PLOT], 0); @@ -2663,10 +4137,6 @@ void RasterizerSceneRD::gi_probe_update(RID p_probe, bool p_update_light_instanc  	if (gi_probe->has_dynamic_object_data) {  		//if it has dynamic object data, it needs to be cleared  		RD::get_singleton()->texture_clear(gi_probe->texture, Color(0, 0, 0, 0), 0, gi_probe->mipmaps.size(), 0, 1, true); -		if (gi_probe_is_anisotropic()) { -			RD::get_singleton()->texture_clear(gi_probe->anisotropy[0], Color(0, 0, 0, 0), 0, gi_probe->mipmaps.size(), 0, 1, true); -			RD::get_singleton()->texture_clear(gi_probe->anisotropy[1], Color(0, 0, 0, 0), 0, gi_probe->mipmaps.size(), 0, 1, true); -		}  	}  	uint32_t light_count = 0; @@ -2733,7 +4203,7 @@ void RasterizerSceneRD::gi_probe_update(RID p_probe, bool p_update_light_instanc  			push_constant.propagation = storage->gi_probe_get_propagation(gi_probe->probe);  			push_constant.dynamic_range = storage->gi_probe_get_dynamic_range(gi_probe->probe);  			push_constant.light_count = light_count; -			push_constant.aniso_strength = storage->gi_probe_get_anisotropy_strength(gi_probe->probe); +			push_constant.aniso_strength = 0;  			/*		print_line("probe update to version " + itos(gi_probe->last_probe_version));  			print_line("propagation " + rtos(push_constant.propagation)); @@ -3067,23 +4537,6 @@ void RasterizerSceneRD::_debug_giprobe(RID p_gi_probe, RD::DrawListID p_draw_lis  		uniforms.push_back(u);  	} -	if (gi_probe_use_anisotropy) { -		{ -			RD::Uniform u; -			u.type = RD::UNIFORM_TYPE_TEXTURE; -			u.binding = 4; -			u.ids.push_back(gi_probe->anisotropy[0]); -			uniforms.push_back(u); -		} -		{ -			RD::Uniform u; -			u.type = RD::UNIFORM_TYPE_TEXTURE; -			u.binding = 5; -			u.ids.push_back(gi_probe->anisotropy[1]); -			uniforms.push_back(u); -		} -	} -  	int cell_count;  	if (!p_emission && p_lighting && gi_probe->has_dynamic_object_data) {  		cell_count = push_constant.bounds[0] * push_constant.bounds[1] * push_constant.bounds[2]; @@ -3098,12 +4551,140 @@ void RasterizerSceneRD::_debug_giprobe(RID p_gi_probe, RD::DrawListID p_draw_lis  	RD::get_singleton()->draw_list_draw(p_draw_list, false, cell_count, 36);  } -const Vector<RID> &RasterizerSceneRD::gi_probe_get_slots() const { -	return gi_probe_slots; -} +void RasterizerSceneRD::_debug_sdfgi_probes(RID p_render_buffers, RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform) { +	RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND(!rb); + +	if (!rb->sdfgi) { +		return; //nothing to debug +	} + +	SDGIShader::DebugProbesPushConstant push_constant; + +	for (int i = 0; i < 4; i++) { +		for (int j = 0; j < 4; j++) { +			push_constant.projection[i * 4 + j] = p_camera_with_transform.matrix[i][j]; +		} +	} + +	//gen spheres from strips +	uint32_t band_points = 16; +	push_constant.band_power = 4; +	push_constant.sections_in_band = ((band_points / 2) - 1); +	push_constant.band_mask = band_points - 2; +	push_constant.section_arc = (Math_PI * 2.0) / float(push_constant.sections_in_band); +	push_constant.y_mult = rb->sdfgi->y_mult; + +	uint32_t total_points = push_constant.sections_in_band * band_points; +	uint32_t total_probes = rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count; + +	push_constant.grid_size[0] = rb->sdfgi->cascade_size; +	push_constant.grid_size[1] = rb->sdfgi->cascade_size; +	push_constant.grid_size[2] = rb->sdfgi->cascade_size; +	push_constant.cascade = 0; + +	push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; + +	if (!rb->sdfgi->debug_probes_uniform_set.is_valid() || !RD::get_singleton()->uniform_set_is_valid(rb->sdfgi->debug_probes_uniform_set)) { +		Vector<RD::Uniform> uniforms; +		{ +			RD::Uniform u; +			u.binding = 1; +			u.type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; +			u.ids.push_back(rb->sdfgi->cascades_ubo); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 2; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			u.ids.push_back(rb->sdfgi->lightprobe_texture); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 3; +			u.type = RD::UNIFORM_TYPE_SAMPLER; +			u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 4; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			u.ids.push_back(rb->sdfgi->occlusion_texture); +			uniforms.push_back(u); +		} + +		rb->sdfgi->debug_probes_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.debug_probes.version_get_shader(sdfgi_shader.debug_probes_shader, 0), 0); +	} + +	RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, sdfgi_shader.debug_probes_pipeline[SDGIShader::PROBE_DEBUG_PROBES].get_render_pipeline(RD::INVALID_FORMAT_ID, RD::get_singleton()->framebuffer_get_format(p_framebuffer))); +	RD::get_singleton()->draw_list_bind_uniform_set(p_draw_list, rb->sdfgi->debug_probes_uniform_set, 0); +	RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(SDGIShader::DebugProbesPushConstant)); +	RD::get_singleton()->draw_list_draw(p_draw_list, false, total_probes, total_points); + +	if (sdfgi_debug_probe_dir != Vector3()) { +		print_line("CLICK DEBUG ME?"); +		uint32_t cascade = 0; +		Vector3 offset = Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + rb->sdfgi->cascades[cascade].position)) * rb->sdfgi->cascades[cascade].cell_size * Vector3(1.0, 1.0 / rb->sdfgi->y_mult, 1.0); +		Vector3 probe_size = rb->sdfgi->cascades[cascade].cell_size * (rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR) * Vector3(1.0, 1.0 / rb->sdfgi->y_mult, 1.0); +		Vector3 ray_from = sdfgi_debug_probe_pos; +		Vector3 ray_to = sdfgi_debug_probe_pos + sdfgi_debug_probe_dir * rb->sdfgi->cascades[cascade].cell_size * Math::sqrt(3.0) * rb->sdfgi->cascade_size; +		float sphere_radius = 0.2; +		float closest_dist = 1e20; +		sdfgi_debug_probe_enabled = false; + +		Vector3i probe_from = rb->sdfgi->cascades[cascade].position / (rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR); +		for (int i = 0; i < (SDFGI::PROBE_DIVISOR + 1); i++) { +			for (int j = 0; j < (SDFGI::PROBE_DIVISOR + 1); j++) { +				for (int k = 0; k < (SDFGI::PROBE_DIVISOR + 1); k++) { +					Vector3 pos = offset + probe_size * Vector3(i, j, k); +					Vector3 res; +					if (Geometry3D::segment_intersects_sphere(ray_from, ray_to, pos, sphere_radius, &res)) { +						float d = ray_from.distance_to(res); +						if (d < closest_dist) { +							closest_dist = d; +							sdfgi_debug_probe_enabled = true; +							sdfgi_debug_probe_index = probe_from + Vector3i(i, j, k); +						} +					} +				} +			} +		} + +		if (sdfgi_debug_probe_enabled) { +			print_line("found: " + sdfgi_debug_probe_index); +		} else { +			print_line("no found"); +		} +		sdfgi_debug_probe_dir = Vector3(); +	} + +	if (sdfgi_debug_probe_enabled) { +		uint32_t cascade = 0; +		uint32_t probe_cells = (rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR); +		Vector3i probe_from = rb->sdfgi->cascades[cascade].position / probe_cells; +		Vector3i ofs = sdfgi_debug_probe_index - probe_from; +		if (ofs.x < 0 || ofs.y < 0 || ofs.z < 0) { +			return; +		} +		if (ofs.x > SDFGI::PROBE_DIVISOR || ofs.y > SDFGI::PROBE_DIVISOR || ofs.z > SDFGI::PROBE_DIVISOR) { +			return; +		} -RasterizerSceneRD::GIProbeQuality RasterizerSceneRD::gi_probe_get_quality() const { -	return gi_probe_quality; +		uint32_t mult = (SDFGI::PROBE_DIVISOR + 1); +		uint32_t index = ofs.z * mult * mult + ofs.y * mult + ofs.x; + +		push_constant.probe_debug_index = index; + +		uint32_t cell_count = probe_cells * 2 * probe_cells * 2 * probe_cells * 2; + +		RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, sdfgi_shader.debug_probes_pipeline[SDGIShader::PROBE_DEBUG_VISIBILITY].get_render_pipeline(RD::INVALID_FORMAT_ID, RD::get_singleton()->framebuffer_get_format(p_framebuffer))); +		RD::get_singleton()->draw_list_bind_uniform_set(p_draw_list, rb->sdfgi->debug_probes_uniform_set, 0); +		RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(SDGIShader::DebugProbesPushConstant)); +		RD::get_singleton()->draw_list_draw(p_draw_list, false, cell_count, total_points); +	}  }  //////////////////////////////// @@ -3273,7 +4854,7 @@ void RasterizerSceneRD::_process_sss(RID p_render_buffers, const CameraMatrix &p  	storage->get_effects()->sub_surface_scattering(rb->texture, rb->blur[0].mipmaps[0].texture, rb->depth_texture, p_camera, Size2i(rb->width, rb->height), sss_scale, sss_depth_scale, sss_quality);  } -void RasterizerSceneRD::_process_ssr(RID p_render_buffers, RID p_dest_framebuffer, RID p_normal_buffer, RID p_roughness_buffer, RID p_specular_buffer, RID p_metallic, const Color &p_metallic_mask, RID p_environment, const CameraMatrix &p_projection, bool p_use_additive) { +void RasterizerSceneRD::_process_ssr(RID p_render_buffers, RID p_dest_framebuffer, RID p_normal_buffer, RID p_specular_buffer, RID p_metallic, const Color &p_metallic_mask, RID p_environment, const CameraMatrix &p_projection, bool p_use_additive) {  	RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers);  	ERR_FAIL_COND(!rb); @@ -3322,7 +4903,7 @@ void RasterizerSceneRD::_process_ssr(RID p_render_buffers, RID p_dest_framebuffe  		_render_buffers_uniform_set_changed(p_render_buffers);  	} -	storage->get_effects()->screen_space_reflection(rb->texture, p_normal_buffer, ssr_roughness_quality, p_roughness_buffer, rb->ssr.blur_radius[0], rb->ssr.blur_radius[1], p_metallic, p_metallic_mask, rb->depth_texture, rb->ssr.depth_scaled, rb->ssr.normal_scaled, rb->blur[0].mipmaps[1].texture, rb->blur[1].mipmaps[0].texture, Size2i(rb->width / 2, rb->height / 2), env->ssr_max_steps, env->ssr_fade_in, env->ssr_fade_out, env->ssr_depth_tolerance, p_projection); +	storage->get_effects()->screen_space_reflection(rb->texture, p_normal_buffer, ssr_roughness_quality, rb->ssr.blur_radius[0], rb->ssr.blur_radius[1], p_metallic, p_metallic_mask, rb->depth_texture, rb->ssr.depth_scaled, rb->ssr.normal_scaled, rb->blur[0].mipmaps[1].texture, rb->blur[1].mipmaps[0].texture, Size2i(rb->width / 2, rb->height / 2), env->ssr_max_steps, env->ssr_fade_in, env->ssr_fade_out, env->ssr_depth_tolerance, p_projection);  	storage->get_effects()->merge_specular(p_dest_framebuffer, p_specular_buffer, p_use_additive ? RID() : rb->texture, rb->blur[0].mipmaps[1].texture);  } @@ -3563,17 +5144,165 @@ void RasterizerSceneRD::_render_buffers_debug_draw(RID p_render_buffers, RID p_s  		effects->copy_to_fb_rect(ao_buf, storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), false, true);  	} -	if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_ROUGHNESS_LIMITER && _render_buffers_get_roughness_texture(p_render_buffers).is_valid()) { +	if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_NORMAL_BUFFER && _render_buffers_get_normal_texture(p_render_buffers).is_valid()) {  		Size2 rtsize = storage->render_target_get_size(rb->render_target); -		effects->copy_to_fb_rect(_render_buffers_get_roughness_texture(p_render_buffers), storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), false, true); +		effects->copy_to_fb_rect(_render_buffers_get_normal_texture(p_render_buffers), storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), false, false);  	} -	if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_NORMAL_BUFFER && _render_buffers_get_normal_texture(p_render_buffers).is_valid()) { +	if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_GI_BUFFER && _render_buffers_get_ambient_texture(p_render_buffers).is_valid()) {  		Size2 rtsize = storage->render_target_get_size(rb->render_target); -		effects->copy_to_fb_rect(_render_buffers_get_normal_texture(p_render_buffers), storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), false, false); +		RID ambient_texture = _render_buffers_get_ambient_texture(p_render_buffers); +		RID reflection_texture = _render_buffers_get_reflection_texture(p_render_buffers); +		effects->copy_to_fb_rect(ambient_texture, storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), false, false, false, true, reflection_texture);  	}  } +void RasterizerSceneRD::_sdfgi_debug_draw(RID p_render_buffers, const CameraMatrix &p_projection, const Transform &p_transform) { +	RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND(!rb); + +	if (!rb->sdfgi) { +		return; //eh +	} + +	if (!rb->sdfgi->debug_uniform_set.is_valid() || !RD::get_singleton()->uniform_set_is_valid(rb->sdfgi->debug_uniform_set)) { +		Vector<RD::Uniform> uniforms; +		{ +			RD::Uniform u; +			u.binding = 1; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			for (uint32_t i = 0; i < SDFGI::MAX_CASCADES; i++) { +				if (i < rb->sdfgi->cascades.size()) { +					u.ids.push_back(rb->sdfgi->cascades[i].sdf_tex); +				} else { +					u.ids.push_back(storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); +				} +			} +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 2; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			for (uint32_t i = 0; i < SDFGI::MAX_CASCADES; i++) { +				if (i < rb->sdfgi->cascades.size()) { +					u.ids.push_back(rb->sdfgi->cascades[i].light_tex); +				} else { +					u.ids.push_back(storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); +				} +			} +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 3; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			for (uint32_t i = 0; i < SDFGI::MAX_CASCADES; i++) { +				if (i < rb->sdfgi->cascades.size()) { +					u.ids.push_back(rb->sdfgi->cascades[i].light_aniso_0_tex); +				} else { +					u.ids.push_back(storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); +				} +			} +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 4; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			for (uint32_t i = 0; i < SDFGI::MAX_CASCADES; i++) { +				if (i < rb->sdfgi->cascades.size()) { +					u.ids.push_back(rb->sdfgi->cascades[i].light_aniso_1_tex); +				} else { +					u.ids.push_back(storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); +				} +			} +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 5; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			u.ids.push_back(rb->sdfgi->occlusion_texture); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 8; +			u.type = RD::UNIFORM_TYPE_SAMPLER; +			u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 9; +			u.type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; +			u.ids.push_back(rb->sdfgi->cascades_ubo); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 10; +			u.type = RD::UNIFORM_TYPE_IMAGE; +			u.ids.push_back(rb->texture); +			uniforms.push_back(u); +		} +		{ +			RD::Uniform u; +			u.binding = 11; +			u.type = RD::UNIFORM_TYPE_TEXTURE; +			u.ids.push_back(rb->sdfgi->lightprobe_texture); +			uniforms.push_back(u); +		} +		rb->sdfgi->debug_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.debug_shader_version, 0); +	} + +	RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); +	RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.debug_pipeline); +	RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->debug_uniform_set, 0); + +	SDGIShader::DebugPushConstant push_constant; +	push_constant.grid_size[0] = rb->sdfgi->cascade_size; +	push_constant.grid_size[1] = rb->sdfgi->cascade_size; +	push_constant.grid_size[2] = rb->sdfgi->cascade_size; +	push_constant.max_cascades = rb->sdfgi->cascades.size(); +	push_constant.screen_size[0] = rb->width; +	push_constant.screen_size[1] = rb->height; +	push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; +	push_constant.use_occlusion = rb->sdfgi->uses_occlusion; +	push_constant.y_mult = rb->sdfgi->y_mult; + +	Vector2 vp_half = p_projection.get_viewport_half_extents(); +	push_constant.cam_extent[0] = vp_half.x; +	push_constant.cam_extent[1] = vp_half.y; +	push_constant.cam_extent[2] = -p_projection.get_z_near(); + +	push_constant.cam_transform[0] = p_transform.basis.elements[0][0]; +	push_constant.cam_transform[1] = p_transform.basis.elements[1][0]; +	push_constant.cam_transform[2] = p_transform.basis.elements[2][0]; +	push_constant.cam_transform[3] = 0; +	push_constant.cam_transform[4] = p_transform.basis.elements[0][1]; +	push_constant.cam_transform[5] = p_transform.basis.elements[1][1]; +	push_constant.cam_transform[6] = p_transform.basis.elements[2][1]; +	push_constant.cam_transform[7] = 0; +	push_constant.cam_transform[8] = p_transform.basis.elements[0][2]; +	push_constant.cam_transform[9] = p_transform.basis.elements[1][2]; +	push_constant.cam_transform[10] = p_transform.basis.elements[2][2]; +	push_constant.cam_transform[11] = 0; +	push_constant.cam_transform[12] = p_transform.origin.x; +	push_constant.cam_transform[13] = p_transform.origin.y; +	push_constant.cam_transform[14] = p_transform.origin.z; +	push_constant.cam_transform[15] = 1; + +	RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::DebugPushConstant)); + +	RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width, rb->height, 1, 8, 8, 1); +	RD::get_singleton()->compute_list_end(); + +	Size2 rtsize = storage->render_target_get_size(rb->render_target); +	storage->get_effects()->copy_to_fb_rect(rb->texture, storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), true); +} +  RID RasterizerSceneRD::render_buffers_get_back_buffer_texture(RID p_render_buffers) {  	RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers);  	ERR_FAIL_COND_V(!rb, RID()); @@ -3590,6 +5319,113 @@ RID RasterizerSceneRD::render_buffers_get_ao_texture(RID p_render_buffers) {  	return rb->ssao.ao_full.is_valid() ? rb->ssao.ao_full : rb->ssao.ao[0];  } +RID RasterizerSceneRD::render_buffers_get_gi_probe_buffer(RID p_render_buffers) { +	RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND_V(!rb, RID()); +	if (rb->giprobe_buffer.is_null()) { +		rb->giprobe_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(GI::GIProbeData) * RenderBuffers::MAX_GIPROBES); +	} +	return rb->giprobe_buffer; +} + +RID RasterizerSceneRD::render_buffers_get_default_gi_probe_buffer() { +	return default_giprobe_buffer; +} + +uint32_t RasterizerSceneRD::render_buffers_get_sdfgi_cascade_count(RID p_render_buffers) const { +	const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND_V(!rb, 0); +	ERR_FAIL_COND_V(!rb->sdfgi, 0); + +	return rb->sdfgi->cascades.size(); +} +bool RasterizerSceneRD::render_buffers_is_sdfgi_enabled(RID p_render_buffers) const { +	const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND_V(!rb, false); + +	return rb->sdfgi != nullptr; +} +RID RasterizerSceneRD::render_buffers_get_sdfgi_irradiance_probes(RID p_render_buffers) const { +	const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND_V(!rb, RID()); +	ERR_FAIL_COND_V(!rb->sdfgi, RID()); + +	return rb->sdfgi->lightprobe_texture; +} + +Vector3 RasterizerSceneRD::render_buffers_get_sdfgi_cascade_offset(RID p_render_buffers, uint32_t p_cascade) const { +	const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND_V(!rb, Vector3()); +	ERR_FAIL_COND_V(!rb->sdfgi, Vector3()); +	ERR_FAIL_UNSIGNED_INDEX_V(p_cascade, rb->sdfgi->cascades.size(), Vector3()); + +	return Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + rb->sdfgi->cascades[p_cascade].position)) * rb->sdfgi->cascades[p_cascade].cell_size; +} + +Vector3i RasterizerSceneRD::render_buffers_get_sdfgi_cascade_probe_offset(RID p_render_buffers, uint32_t p_cascade) const { +	const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND_V(!rb, Vector3i()); +	ERR_FAIL_COND_V(!rb->sdfgi, Vector3i()); +	ERR_FAIL_UNSIGNED_INDEX_V(p_cascade, rb->sdfgi->cascades.size(), Vector3i()); +	int32_t probe_divisor = rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR; + +	return rb->sdfgi->cascades[p_cascade].position / probe_divisor; +} + +float RasterizerSceneRD::render_buffers_get_sdfgi_normal_bias(RID p_render_buffers) const { +	const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND_V(!rb, 0); +	ERR_FAIL_COND_V(!rb->sdfgi, 0); + +	return rb->sdfgi->normal_bias; +} +float RasterizerSceneRD::render_buffers_get_sdfgi_cascade_probe_size(RID p_render_buffers, uint32_t p_cascade) const { +	const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND_V(!rb, 0); +	ERR_FAIL_COND_V(!rb->sdfgi, 0); +	ERR_FAIL_UNSIGNED_INDEX_V(p_cascade, rb->sdfgi->cascades.size(), 0); + +	return float(rb->sdfgi->cascade_size) * rb->sdfgi->cascades[p_cascade].cell_size / float(rb->sdfgi->probe_axis_count - 1); +} +uint32_t RasterizerSceneRD::render_buffers_get_sdfgi_cascade_probe_count(RID p_render_buffers) const { +	const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND_V(!rb, 0); +	ERR_FAIL_COND_V(!rb->sdfgi, 0); + +	return rb->sdfgi->probe_axis_count; +} + +uint32_t RasterizerSceneRD::render_buffers_get_sdfgi_cascade_size(RID p_render_buffers) const { +	const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND_V(!rb, 0); +	ERR_FAIL_COND_V(!rb->sdfgi, 0); + +	return rb->sdfgi->cascade_size; +} + +bool RasterizerSceneRD::render_buffers_is_sdfgi_using_occlusion(RID p_render_buffers) const { +	const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND_V(!rb, false); +	ERR_FAIL_COND_V(!rb->sdfgi, false); + +	return rb->sdfgi->uses_occlusion; +} + +float RasterizerSceneRD::render_buffers_get_sdfgi_energy(RID p_render_buffers) const { +	const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND_V(!rb, 0); +	ERR_FAIL_COND_V(!rb->sdfgi, false); + +	return rb->sdfgi->energy; +} +RID RasterizerSceneRD::render_buffers_get_sdfgi_occlusion_texture(RID p_render_buffers) const { +	const RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND_V(!rb, RID()); +	ERR_FAIL_COND_V(!rb->sdfgi, RID()); + +	return rb->sdfgi->occlusion_texture; +} +  void RasterizerSceneRD::render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_width, int p_height, RS::ViewportMSAA p_msaa, RenderingServer::ViewportScreenSpaceAA p_screen_space_aa) {  	RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers);  	rb->width = p_width; @@ -3606,7 +5442,7 @@ void RasterizerSceneRD::render_buffers_configure(RID p_render_buffers, RID p_ren  		tf.height = rb->height;  		tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT;  		if (rb->msaa != RS::VIEWPORT_MSAA_DISABLED) { -			tf.usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; +			tf.usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_STORAGE_BIT;  		} else {  			tf.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT;  		} @@ -3616,12 +5452,20 @@ void RasterizerSceneRD::render_buffers_configure(RID p_render_buffers, RID p_ren  	{  		RD::TextureFormat tf; -		tf.format = RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_D24_UNORM_S8_UINT, RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ? RD::DATA_FORMAT_D24_UNORM_S8_UINT : RD::DATA_FORMAT_D32_SFLOAT_S8_UINT; +		if (rb->msaa == RS::VIEWPORT_MSAA_DISABLED) { +			tf.format = RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_D24_UNORM_S8_UINT, RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ? RD::DATA_FORMAT_D24_UNORM_S8_UINT : RD::DATA_FORMAT_D32_SFLOAT_S8_UINT; +		} else { +			tf.format = RD::DATA_FORMAT_R32_SFLOAT; +		} +  		tf.width = p_width;  		tf.height = p_height; -		tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; +		tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT; +  		if (rb->msaa != RS::VIEWPORT_MSAA_DISABLED) { -			tf.usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; +			tf.usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; +		} else { +			tf.usage_bits |= RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;  		}  		rb->depth_texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); @@ -3748,6 +5592,14 @@ void RasterizerSceneRD::render_scene(RID p_render_buffers, const Transform &p_ca  		clear_color = storage->get_default_clear_color();  	} +	//assign render indices to giprobes +	for (int i = 0; i < p_gi_probe_cull_count; i++) { +		GIProbeInstance *giprobe_inst = gi_probe_instance_owner.getornull(p_gi_probe_cull_result[i]); +		if (giprobe_inst) { +			giprobe_inst->render_index = i; +		} +	} +  	_render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_ortogonal, p_cull_result, p_cull_count, p_light_cull_result, p_light_cull_count, p_reflection_probe_cull_result, p_reflection_probe_cull_count, p_gi_probe_cull_result, p_gi_probe_cull_count, p_decal_cull_result, p_decal_cull_count, p_lightmap_cull_result, p_lightmap_cull_count, p_environment, p_camera_effects, p_shadow_atlas, p_reflection_atlas, p_reflection_probe, p_reflection_probe_pass, clear_color);  	if (p_render_buffers.is_valid()) { @@ -3755,6 +5607,9 @@ void RasterizerSceneRD::render_scene(RID p_render_buffers, const Transform &p_ca  		_render_buffers_post_process_and_tonemap(p_render_buffers, p_environment, p_camera_effects, p_cam_projection);  		_render_buffers_debug_draw(p_render_buffers, p_shadow_atlas); +		if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_SDFGI) { +			_sdfgi_debug_draw(p_render_buffers, p_cam_projection, p_cam_transform); +		}  	}  } @@ -3938,11 +5793,455 @@ void RasterizerSceneRD::render_material(const Transform &p_cam_transform, const  	_render_material(p_cam_transform, p_cam_projection, p_cam_ortogonal, p_cull_result, p_cull_count, p_framebuffer, p_region);  } +void RasterizerSceneRD::render_sdfgi(RID p_render_buffers, int p_region, InstanceBase **p_cull_result, int p_cull_count) { +	//print_line("rendering region " + itos(p_region)); +	RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND(!rb); +	ERR_FAIL_COND(!rb->sdfgi); +	AABB bounds; +	Vector3i from; +	Vector3i size; + +	int cascade_prev = _sdfgi_get_pending_region_data(p_render_buffers, p_region - 1, from, size, bounds); +	int cascade_next = _sdfgi_get_pending_region_data(p_render_buffers, p_region + 1, from, size, bounds); +	int cascade = _sdfgi_get_pending_region_data(p_render_buffers, p_region, from, size, bounds); +	ERR_FAIL_COND(cascade < 0); + +	if (cascade_prev != cascade) { +		//initialize render +		RD::get_singleton()->texture_clear(rb->sdfgi->render_albedo, Color(0, 0, 0, 0), 0, 1, 0, 1, true); +		RD::get_singleton()->texture_clear(rb->sdfgi->render_emission, Color(0, 0, 0, 0), 0, 1, 0, 1, true); +		RD::get_singleton()->texture_clear(rb->sdfgi->render_emission_aniso, Color(0, 0, 0, 0), 0, 1, 0, 1, true); +		RD::get_singleton()->texture_clear(rb->sdfgi->render_geom_facing, Color(0, 0, 0, 0), 0, 1, 0, 1, true); +	} + +	//print_line("rendering cascade " + itos(p_region) + " objects: " + itos(p_cull_count) + " bounds: " + bounds + " from: " + from + " size: " + size + " cell size: " + rtos(rb->sdfgi->cascades[cascade].cell_size)); +	_render_sdfgi(p_render_buffers, from, size, bounds, p_cull_result, p_cull_count, rb->sdfgi->render_albedo, rb->sdfgi->render_emission, rb->sdfgi->render_emission_aniso, rb->sdfgi->render_geom_facing); + +	if (cascade_next != cascade) { +		RENDER_TIMESTAMP(">SDFGI Update SDF"); +		//done rendering! must update SDF +		//clear dispatch indirect data + +		SDGIShader::PreprocessPushConstant push_constant; +		zeromem(&push_constant, sizeof(SDGIShader::PreprocessPushConstant)); + +		RENDER_TIMESTAMP("Scroll SDF"); + +		//scroll +		if (rb->sdfgi->cascades[cascade].dirty_regions != SDFGI::Cascade::DIRTY_ALL) { +			//for scroll +			Vector3i dirty = rb->sdfgi->cascades[cascade].dirty_regions; +			push_constant.scroll[0] = dirty.x; +			push_constant.scroll[1] = dirty.y; +			push_constant.scroll[2] = dirty.z; +		} else { +			//for no scroll +			push_constant.scroll[0] = 0; +			push_constant.scroll[1] = 0; +			push_constant.scroll[2] = 0; +		} +		push_constant.grid_size = rb->sdfgi->cascade_size; +		push_constant.cascade = cascade; + +		RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); +		if (rb->sdfgi->cascades[cascade].dirty_regions != SDFGI::Cascade::DIRTY_ALL) { +			//must pre scroll existing data because not all is dirty +			RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_SCROLL]); +			RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].scroll_uniform_set, 0); + +			RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); +			RD::get_singleton()->compute_list_dispatch_indirect(compute_list, rb->sdfgi->cascades[cascade].solid_cell_dispatch_buffer, 0); +			// no barrier do all together + +			RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_SCROLL_OCCLUSION]); +			RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].scroll_occlusion_uniform_set, 0); + +			Vector3i dirty = rb->sdfgi->cascades[cascade].dirty_regions; +			Vector3i groups; +			groups.x = rb->sdfgi->cascade_size - ABS(dirty.x); +			groups.y = rb->sdfgi->cascade_size - ABS(dirty.y); +			groups.z = rb->sdfgi->cascade_size - ABS(dirty.z); + +			RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); +			RD::get_singleton()->compute_list_dispatch_threads(compute_list, groups.x, groups.y, groups.z, 4, 4, 4); + +			//no barrier, continue together + +			{ +				//scroll probes and their history also + +				SDGIShader::IntegratePushConstant ipush_constant; +				ipush_constant.grid_size[1] = rb->sdfgi->cascade_size; +				ipush_constant.grid_size[2] = rb->sdfgi->cascade_size; +				ipush_constant.grid_size[0] = rb->sdfgi->cascade_size; +				ipush_constant.max_cascades = rb->sdfgi->cascades.size(); +				ipush_constant.probe_axis_size = rb->sdfgi->probe_axis_count; +				ipush_constant.history_index = 0; +				ipush_constant.history_size = rb->sdfgi->history_size; +				ipush_constant.ray_count = 0; +				ipush_constant.ray_bias = 0; +				ipush_constant.sky_mode = 0; +				ipush_constant.sky_energy = 0; +				ipush_constant.sky_color[0] = 0; +				ipush_constant.sky_color[1] = 0; +				ipush_constant.sky_color[2] = 0; +				ipush_constant.y_mult = rb->sdfgi->y_mult; + +				ipush_constant.image_size[0] = rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count; +				ipush_constant.image_size[1] = rb->sdfgi->probe_axis_count; +				ipush_constant.image_size[1] = rb->sdfgi->probe_axis_count; + +				int32_t probe_divisor = rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR; +				ipush_constant.cascade = cascade; +				ipush_constant.world_offset[0] = rb->sdfgi->cascades[cascade].position.x / probe_divisor; +				ipush_constant.world_offset[1] = rb->sdfgi->cascades[cascade].position.y / probe_divisor; +				ipush_constant.world_offset[2] = rb->sdfgi->cascades[cascade].position.z / probe_divisor; + +				ipush_constant.scroll[0] = dirty.x / probe_divisor; +				ipush_constant.scroll[1] = dirty.y / probe_divisor; +				ipush_constant.scroll[2] = dirty.z / probe_divisor; + +				RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.integrate_pipeline[SDGIShader::INTEGRATE_MODE_SCROLL]); +				RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].integrate_uniform_set, 0); +				RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdfgi_shader.integrate_default_sky_uniform_set, 1); +				RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDGIShader::IntegratePushConstant)); +				RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1, 8, 8, 1); + +				RD::get_singleton()->compute_list_add_barrier(compute_list); + +				RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.integrate_pipeline[SDGIShader::INTEGRATE_MODE_SCROLL_STORE]); +				RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].integrate_uniform_set, 0); +				RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdfgi_shader.integrate_default_sky_uniform_set, 1); +				RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDGIShader::IntegratePushConstant)); +				RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1, 8, 8, 1); +			} + +			//ok finally barrier +			RD::get_singleton()->compute_list_add_barrier(compute_list); +		} + +		//clear dispatch indirect data +		uint32_t dispatch_indirct_data[4] = { 0, 0, 0, 0 }; +		RD::get_singleton()->buffer_update(rb->sdfgi->cascades[cascade].solid_cell_dispatch_buffer, 0, sizeof(uint32_t) * 4, dispatch_indirct_data, true); + +		bool half_size = true; //much faster, very little differnce +		static const int optimized_jf_group_size = 8; + +		if (half_size) { +			push_constant.grid_size >>= 1; + +			uint32_t cascade_half_size = rb->sdfgi->cascade_size >> 1; +			RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE_HALF]); +			RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->sdf_initialize_half_uniform_set, 0); +			RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); +			RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size, 4, 4, 4); +			RD::get_singleton()->compute_list_add_barrier(compute_list); + +			//must start with regular jumpflood + +			push_constant.half_size = true; +			{ +				RENDER_TIMESTAMP("SDFGI Jump Flood (Half Size)"); + +				uint32_t s = cascade_half_size; + +				RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD]); + +				int jf_us = 0; +				//start with regular jump flood for very coarse reads, as this is impossible to optimize +				while (s > 1) { +					s /= 2; +					push_constant.step_size = s; +					RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_half_uniform_set[jf_us], 0); +					RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); +					RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size, 4, 4, 4); +					RD::get_singleton()->compute_list_add_barrier(compute_list); +					jf_us = jf_us == 0 ? 1 : 0; + +					if (cascade_half_size / (s / 2) >= optimized_jf_group_size) { +						break; +					} +				} + +				RENDER_TIMESTAMP("SDFGI Jump Flood Optimized (Half Size)"); + +				//continue with optimized jump flood for smaller reads +				RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_OPTIMIZED]); +				while (s > 1) { +					s /= 2; +					push_constant.step_size = s; +					RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_half_uniform_set[jf_us], 0); +					RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); +					RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size, optimized_jf_group_size, optimized_jf_group_size, optimized_jf_group_size); +					RD::get_singleton()->compute_list_add_barrier(compute_list); +					jf_us = jf_us == 0 ? 1 : 0; +				} +			} + +			// restore grid size for last passes +			push_constant.grid_size = rb->sdfgi->cascade_size; + +			RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_UPSCALE]); +			RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->sdf_upscale_uniform_set, 0); +			RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); +			RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, 4, 4, 4); +			RD::get_singleton()->compute_list_add_barrier(compute_list); + +			//run one pass of fullsize jumpflood to fix up half size arctifacts + +			push_constant.half_size = false; +			push_constant.step_size = 1; +			RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_OPTIMIZED]); +			RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_uniform_set[rb->sdfgi->upscale_jfa_uniform_set_index], 0); +			RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); +			RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, optimized_jf_group_size, optimized_jf_group_size, optimized_jf_group_size); +			RD::get_singleton()->compute_list_add_barrier(compute_list); + +		} else { +			//full size jumpflood +			RENDER_TIMESTAMP("SDFGI Jump Flood"); + +			RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE]); +			RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->sdf_initialize_uniform_set, 0); +			RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); +			RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, 4, 4, 4); + +			RD::get_singleton()->compute_list_add_barrier(compute_list); + +			push_constant.half_size = false; +			{ +				uint32_t s = rb->sdfgi->cascade_size; + +				RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD]); + +				int jf_us = 0; +				//start with regular jump flood for very coarse reads, as this is impossible to optimize +				while (s > 1) { +					s /= 2; +					push_constant.step_size = s; +					RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_uniform_set[jf_us], 0); +					RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); +					RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, 4, 4, 4); +					RD::get_singleton()->compute_list_add_barrier(compute_list); +					jf_us = jf_us == 0 ? 1 : 0; + +					if (rb->sdfgi->cascade_size / (s / 2) >= optimized_jf_group_size) { +						break; +					} +				} + +				RENDER_TIMESTAMP("SDFGI Jump Flood Optimized"); + +				//continue with optimized jump flood for smaller reads +				RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_OPTIMIZED]); +				while (s > 1) { +					s /= 2; +					push_constant.step_size = s; +					RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_uniform_set[jf_us], 0); +					RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); +					RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, optimized_jf_group_size, optimized_jf_group_size, optimized_jf_group_size); +					RD::get_singleton()->compute_list_add_barrier(compute_list); +					jf_us = jf_us == 0 ? 1 : 0; +				} +			} +		} + +		RENDER_TIMESTAMP("SDFGI Occlusion"); + +		// occlusion +		{ +			uint32_t probe_size = rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR; +			Vector3i probe_global_pos = rb->sdfgi->cascades[cascade].position / probe_size; + +			RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_OCCLUSION]); +			RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->occlusion_uniform_set, 0); +			for (int i = 0; i < 8; i++) { +				//dispatch all at once for performance +				Vector3i offset(i & 1, (i >> 1) & 1, (i >> 2) & 1); + +				if ((probe_global_pos.x & 1) != 0) { +					offset.x = (offset.x + 1) & 1; +				} +				if ((probe_global_pos.y & 1) != 0) { +					offset.y = (offset.y + 1) & 1; +				} +				if ((probe_global_pos.z & 1) != 0) { +					offset.z = (offset.z + 1) & 1; +				} +				push_constant.probe_offset[0] = offset.x; +				push_constant.probe_offset[1] = offset.y; +				push_constant.probe_offset[2] = offset.z; +				push_constant.occlusion_index = i; +				RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); + +				Vector3i groups = Vector3i(probe_size + 1, probe_size + 1, probe_size + 1) - offset; //if offseted, its one less probe per axis to compute +				RD::get_singleton()->compute_list_dispatch(compute_list, groups.x, groups.y, groups.z); +			} +			RD::get_singleton()->compute_list_add_barrier(compute_list); +		} + +		RENDER_TIMESTAMP("SDFGI Store"); + +		// store +		RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_STORE]); +		RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].sdf_store_uniform_set, 0); +		RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); +		RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, 4, 4, 4); + +		RD::get_singleton()->compute_list_end(); + +		//clear these textures, as they will have previous garbage on next draw +		RD::get_singleton()->texture_clear(rb->sdfgi->cascades[cascade].light_tex, Color(0, 0, 0, 0), 0, 1, 0, 1, true); +		RD::get_singleton()->texture_clear(rb->sdfgi->cascades[cascade].light_aniso_0_tex, Color(0, 0, 0, 0), 0, 1, 0, 1, true); +		RD::get_singleton()->texture_clear(rb->sdfgi->cascades[cascade].light_aniso_1_tex, Color(0, 0, 0, 0), 0, 1, 0, 1, true); + +#if 0 +		Vector<uint8_t> data = RD::get_singleton()->texture_get_data(rb->sdfgi->cascades[cascade].sdf, 0); +		Ref<Image> img; +		img.instance(); +		for (uint32_t i = 0; i < rb->sdfgi->cascade_size; i++) { +			Vector<uint8_t> subarr = data.subarray(128 * 128 * i, 128 * 128 * (i + 1) - 1); +			img->create(rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, false, Image::FORMAT_L8, subarr); +			img->save_png("res://cascade_sdf_" + itos(cascade) + "_" + itos(i) + ".png"); +		} + +		//finalize render and update sdf +#endif + +#if 0 +		Vector<uint8_t> data = RD::get_singleton()->texture_get_data(rb->sdfgi->render_albedo, 0); +		Ref<Image> img; +		img.instance(); +		for (uint32_t i = 0; i < rb->sdfgi->cascade_size; i++) { +			Vector<uint8_t> subarr = data.subarray(128 * 128 * i * 2, 128 * 128 * (i + 1) * 2 - 1); +			img->create(rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, false, Image::FORMAT_RGB565, subarr); +			img->convert(Image::FORMAT_RGBA8); +			img->save_png("res://cascade_" + itos(cascade) + "_" + itos(i) + ".png"); +		} + +		//finalize render and update sdf +#endif + +		RENDER_TIMESTAMP("<SDFGI Update SDF"); +	} +} + +void RasterizerSceneRD::render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const RID **p_positional_light_cull_result, const uint32_t *p_positional_light_cull_count) { +	RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); +	ERR_FAIL_COND(!rb); +	ERR_FAIL_COND(!rb->sdfgi); + +	ERR_FAIL_COND(p_positional_light_cull_count == 0); + +	_sdfgi_update_cascades(p_render_buffers); //need cascades updated for this + +	RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + +	RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.direct_light_pipeline[SDGIShader::DIRECT_LIGHT_MODE_STATIC]); + +	SDGIShader::DirectLightPushConstant dl_push_constant; + +	dl_push_constant.grid_size[0] = rb->sdfgi->cascade_size; +	dl_push_constant.grid_size[1] = rb->sdfgi->cascade_size; +	dl_push_constant.grid_size[2] = rb->sdfgi->cascade_size; +	dl_push_constant.max_cascades = rb->sdfgi->cascades.size(); +	dl_push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; +	dl_push_constant.multibounce = false; // this is static light, do not multibounce yet +	dl_push_constant.y_mult = rb->sdfgi->y_mult; + +	//all must be processed +	dl_push_constant.process_offset = 0; +	dl_push_constant.process_increment = 1; + +	SDGIShader::Light lights[SDFGI::MAX_STATIC_LIGHTS]; + +	for (uint32_t i = 0; i < p_cascade_count; i++) { +		ERR_CONTINUE(p_cascade_indices[i] >= rb->sdfgi->cascades.size()); + +		SDFGI::Cascade &cc = rb->sdfgi->cascades[p_cascade_indices[i]]; + +		{ //fill light buffer + +			AABB cascade_aabb; +			cascade_aabb.position = Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + cc.position)) * cc.cell_size; +			cascade_aabb.size = Vector3(1, 1, 1) * rb->sdfgi->cascade_size * cc.cell_size; + +			int idx = 0; + +			for (uint32_t j = 0; j < p_positional_light_cull_count[i]; j++) { +				if (idx == SDFGI::MAX_STATIC_LIGHTS) { +					break; +				} + +				LightInstance *li = light_instance_owner.getornull(p_positional_light_cull_result[i][j]); +				ERR_CONTINUE(!li); + +				uint32_t max_sdfgi_cascade = storage->light_get_max_sdfgi_cascade(li->light); +				if (p_cascade_indices[i] > max_sdfgi_cascade) { +					continue; +				} + +				if (!cascade_aabb.intersects(li->aabb)) { +					continue; +				} + +				lights[idx].type = storage->light_get_type(li->light); + +				Vector3 dir = -li->transform.basis.get_axis(Vector3::AXIS_Z); +				if (lights[idx].type == RS::LIGHT_DIRECTIONAL) { +					dir.y *= rb->sdfgi->y_mult; //only makes sense for directional +					dir.normalize(); +				} +				lights[idx].direction[0] = dir.x; +				lights[idx].direction[1] = dir.y; +				lights[idx].direction[2] = dir.z; +				Vector3 pos = li->transform.origin; +				pos.y *= rb->sdfgi->y_mult; +				lights[idx].position[0] = pos.x; +				lights[idx].position[1] = pos.y; +				lights[idx].position[2] = pos.z; +				Color color = storage->light_get_color(li->light); +				color = color.to_linear(); +				lights[idx].color[0] = color.r; +				lights[idx].color[1] = color.g; +				lights[idx].color[2] = color.b; +				lights[idx].energy = storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY); +				lights[idx].has_shadow = storage->light_has_shadow(li->light); +				lights[idx].attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_ATTENUATION); +				lights[idx].radius = storage->light_get_param(li->light, RS::LIGHT_PARAM_RANGE); +				lights[idx].spot_angle = Math::deg2rad(storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ANGLE)); +				lights[idx].spot_attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ATTENUATION); + +				idx++; +			} + +			if (idx > 0) { +				RD::get_singleton()->buffer_update(cc.lights_buffer, 0, idx * sizeof(SDGIShader::Light), lights, true); +			} +			dl_push_constant.light_count = idx; +		} + +		dl_push_constant.cascade = p_cascade_indices[i]; + +		if (dl_push_constant.light_count > 0) { +			RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cc.sdf_direct_light_uniform_set, 0); +			RD::get_singleton()->compute_list_set_push_constant(compute_list, &dl_push_constant, sizeof(SDGIShader::DirectLightPushConstant)); +			RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cc.solid_cell_dispatch_buffer, 0); +		} +	} + +	RD::get_singleton()->compute_list_end(); +} +  bool RasterizerSceneRD::free(RID p_rid) {  	if (render_buffers_owner.owns(p_rid)) {  		RenderBuffers *rb = render_buffers_owner.getornull(p_rid);  		_free_render_buffer_data(rb);  		memdelete(rb->data); +		if (rb->sdfgi) { +			_sdfgi_erase(rb); +		}  		render_buffers_owner.free(p_rid);  	} else if (environment_owner.owns(p_rid)) {  		//not much to delete, just free it @@ -3966,18 +6265,12 @@ bool RasterizerSceneRD::free(RID p_rid) {  			RD::get_singleton()->free(gi_probe->texture);  			RD::get_singleton()->free(gi_probe->write_buffer);  		} -		if (gi_probe->anisotropy[0].is_valid()) { -			RD::get_singleton()->free(gi_probe->anisotropy[0]); -			RD::get_singleton()->free(gi_probe->anisotropy[1]); -		}  		for (int i = 0; i < gi_probe->dynamic_maps.size(); i++) {  			RD::get_singleton()->free(gi_probe->dynamic_maps[i].texture);  			RD::get_singleton()->free(gi_probe->dynamic_maps[i].depth);  		} -		gi_probe_slots.write[gi_probe->slot] = RID(); -  		gi_probe_instance_owner.free(p_rid);  	} else if (sky_owner.owns(p_rid)) {  		_update_dirty_skys(); @@ -4050,17 +6343,22 @@ void RasterizerSceneRD::set_time(double p_time, double p_step) {  	time_step = p_step;  } -void RasterizerSceneRD::screen_space_roughness_limiter_set_active(bool p_enable, float p_curve) { +void RasterizerSceneRD::screen_space_roughness_limiter_set_active(bool p_enable, float p_amount, float p_limit) {  	screen_space_roughness_limiter = p_enable; -	screen_space_roughness_limiter_curve = p_curve; +	screen_space_roughness_limiter_amount = p_amount; +	screen_space_roughness_limiter_limit = p_limit;  }  bool RasterizerSceneRD::screen_space_roughness_limiter_is_active() const {  	return screen_space_roughness_limiter;  } -float RasterizerSceneRD::screen_space_roughness_limiter_get_curve() const { -	return screen_space_roughness_limiter_curve; +float RasterizerSceneRD::screen_space_roughness_limiter_get_amount() const { +	return screen_space_roughness_limiter_amount; +} + +float RasterizerSceneRD::screen_space_roughness_limiter_get_limit() const { +	return screen_space_roughness_limiter_limit;  }  TypedArray<Image> RasterizerSceneRD::bake_render_uv2(RID p_base, const Vector<RID> &p_material_overrides, const Size2i &p_image_size) { @@ -4154,6 +6452,11 @@ TypedArray<Image> RasterizerSceneRD::bake_render_uv2(RID p_base, const Vector<RI  	return ret;  } +void RasterizerSceneRD::sdfgi_set_debug_probe_select(const Vector3 &p_position, const Vector3 &p_dir) { +	sdfgi_debug_probe_pos = p_position; +	sdfgi_debug_probe_dir = p_dir; +} +  RasterizerSceneRD *RasterizerSceneRD::singleton = nullptr;  RasterizerSceneRD::RasterizerSceneRD(RasterizerStorageRD *p_storage) { @@ -4165,7 +6468,7 @@ RasterizerSceneRD::RasterizerSceneRD(RasterizerStorageRD *p_storage) {  	sky_use_cubemap_array = GLOBAL_GET("rendering/quality/reflections/texture_array_reflections");  	//	sky_use_cubemap_array = false; -	uint32_t textures_per_stage = RD::get_singleton()->limit_get(RD::LIMIT_MAX_TEXTURES_PER_SHADER_STAGE); +	//uint32_t textures_per_stage = RD::get_singleton()->limit_get(RD::LIMIT_MAX_TEXTURES_PER_SHADER_STAGE);  	{  		//kinda complicated to compute the amount of slots, we try to use as many as we can @@ -4174,34 +6477,9 @@ RasterizerSceneRD::RasterizerSceneRD(RasterizerStorageRD *p_storage) {  		gi_probe_lights = memnew_arr(GIProbeLight, gi_probe_max_lights);  		gi_probe_lights_uniform = RD::get_singleton()->uniform_buffer_create(gi_probe_max_lights * sizeof(GIProbeLight)); - -		gi_probe_use_anisotropy = GLOBAL_GET("rendering/quality/gi_probes/anisotropic"); -		gi_probe_quality = GIProbeQuality(CLAMP(int(GLOBAL_GET("rendering/quality/gi_probes/quality")), 0, 2)); - -		if (textures_per_stage <= 16) { -			gi_probe_slots.resize(2); //thats all you can get -			gi_probe_use_anisotropy = false; -		} else if (textures_per_stage <= 31) { -			gi_probe_slots.resize(4); //thats all you can get, iOS -			gi_probe_use_anisotropy = false; -		} else if (textures_per_stage <= 128) { -			gi_probe_slots.resize(32); //old intel -			gi_probe_use_anisotropy = false; -		} else if (textures_per_stage <= 256) { -			gi_probe_slots.resize(64); //old intel too -			gi_probe_use_anisotropy = false; -		} else { -			if (gi_probe_use_anisotropy) { -				gi_probe_slots.resize(1024 / 3); //needs 3 textures -			} else { -				gi_probe_slots.resize(1024); //modern intel, nvidia, 8192 or greater -			} -		} +		gi_probe_quality = RS::GIProbeQuality(CLAMP(int(GLOBAL_GET("rendering/quality/gi_probes/quality")), 0, 1));  		String defines = "\n#define MAX_LIGHTS " + itos(gi_probe_max_lights) + "\n"; -		if (gi_probe_use_anisotropy) { -			defines += "\n#define MODE_ANISOTROPIC\n"; -		}  		Vector<String> versions;  		versions.push_back("\n#define MODE_COMPUTE_LIGHT\n"); @@ -4223,9 +6501,6 @@ RasterizerSceneRD::RasterizerSceneRD(RasterizerStorageRD *p_storage) {  	{  		String defines; -		if (gi_probe_use_anisotropy) { -			defines += "\n#define USE_ANISOTROPY\n"; -		}  		Vector<String> versions;  		versions.push_back("\n#define MODE_DEBUG_COLOR\n");  		versions.push_back("\n#define MODE_DEBUG_LIGHT\n"); @@ -4373,11 +6648,131 @@ RasterizerSceneRD::RasterizerSceneRD(RasterizerStorageRD *p_storage) {  		sky_scene_state.sampler_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sky_shader.default_shader_rd, SKY_SET_SAMPLERS);  	} +	{ +		Vector<String> preprocess_modes; +		preprocess_modes.push_back("\n#define MODE_SCROLL\n"); +		preprocess_modes.push_back("\n#define MODE_SCROLL_OCCLUSION\n"); +		preprocess_modes.push_back("\n#define MODE_INITIALIZE_JUMP_FLOOD\n"); +		preprocess_modes.push_back("\n#define MODE_INITIALIZE_JUMP_FLOOD_HALF\n"); +		preprocess_modes.push_back("\n#define MODE_JUMPFLOOD\n"); +		preprocess_modes.push_back("\n#define MODE_JUMPFLOOD_OPTIMIZED\n"); +		preprocess_modes.push_back("\n#define MODE_UPSCALE_JUMP_FLOOD\n"); +		preprocess_modes.push_back("\n#define MODE_OCCLUSION\n"); +		preprocess_modes.push_back("\n#define MODE_STORE\n"); +		String defines = "\n#define OCCLUSION_SIZE " + itos(SDFGI::CASCADE_SIZE / SDFGI::PROBE_DIVISOR) + "\n"; +		sdfgi_shader.preprocess.initialize(preprocess_modes, defines); +		sdfgi_shader.preprocess_shader = sdfgi_shader.preprocess.version_create(); +		for (int i = 0; i < SDGIShader::PRE_PROCESS_MAX; i++) { +			sdfgi_shader.preprocess_pipeline[i] = RD::get_singleton()->compute_pipeline_create(sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, i)); +		} +	} + +	{ +		//calculate tables +		String defines = "\n#define OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; + +		Vector<String> direct_light_modes; +		direct_light_modes.push_back("\n#define MODE_PROCESS_STATIC\n"); +		direct_light_modes.push_back("\n#define MODE_PROCESS_DYNAMIC\n"); +		sdfgi_shader.direct_light.initialize(direct_light_modes, defines); +		sdfgi_shader.direct_light_shader = sdfgi_shader.direct_light.version_create(); +		for (int i = 0; i < SDGIShader::DIRECT_LIGHT_MODE_MAX; i++) { +			sdfgi_shader.direct_light_pipeline[i] = RD::get_singleton()->compute_pipeline_create(sdfgi_shader.direct_light.version_get_shader(sdfgi_shader.direct_light_shader, i)); +		} +	} + +	{ +		//calculate tables +		String defines = "\n#define OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; +		defines += "\n#define SH_SIZE " + itos(SDFGI::SH_SIZE) + "\n"; + +		Vector<String> integrate_modes; +		integrate_modes.push_back("\n#define MODE_PROCESS\n"); +		integrate_modes.push_back("\n#define MODE_STORE\n"); +		integrate_modes.push_back("\n#define MODE_SCROLL\n"); +		integrate_modes.push_back("\n#define MODE_SCROLL_STORE\n"); +		sdfgi_shader.integrate.initialize(integrate_modes, defines); +		sdfgi_shader.integrate_shader = sdfgi_shader.integrate.version_create(); + +		for (int i = 0; i < SDGIShader::INTEGRATE_MODE_MAX; i++) { +			sdfgi_shader.integrate_pipeline[i] = RD::get_singleton()->compute_pipeline_create(sdfgi_shader.integrate.version_get_shader(sdfgi_shader.integrate_shader, i)); +		} + +		{ +			Vector<RD::Uniform> uniforms; + +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_TEXTURE; +				u.binding = 0; +				u.ids.push_back(storage->texture_rd_get_default(RasterizerStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE)); +				uniforms.push_back(u); +			} +			{ +				RD::Uniform u; +				u.type = RD::UNIFORM_TYPE_SAMPLER; +				u.binding = 1; +				u.ids.push_back(storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); +				uniforms.push_back(u); +			} + +			sdfgi_shader.integrate_default_sky_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.integrate.version_get_shader(sdfgi_shader.integrate_shader, 0), 1); +		} +	} +	{ +		//calculate tables +		String defines = "\n#define SDFGI_OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; +		Vector<String> gi_modes; +		gi_modes.push_back(""); +		gi.shader.initialize(gi_modes, defines); +		gi.shader_version = gi.shader.version_create(); +		for (int i = 0; i < GI::MODE_MAX; i++) { +			gi.pipelines[i] = RD::get_singleton()->compute_pipeline_create(gi.shader.version_get_shader(gi.shader_version, i)); +		} + +		gi.sdfgi_ubo = RD::get_singleton()->uniform_buffer_create(sizeof(GI::SDFGIData)); +	} +	{ +		String defines = "\n#define OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; +		Vector<String> debug_modes; +		debug_modes.push_back(""); +		sdfgi_shader.debug.initialize(debug_modes, defines); +		sdfgi_shader.debug_shader = sdfgi_shader.debug.version_create(); +		sdfgi_shader.debug_shader_version = sdfgi_shader.debug.version_get_shader(sdfgi_shader.debug_shader, 0); +		sdfgi_shader.debug_pipeline = RD::get_singleton()->compute_pipeline_create(sdfgi_shader.debug_shader_version); +	} +	{ +		String defines = "\n#define OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; + +		Vector<String> versions; +		versions.push_back("\n#define MODE_PROBES\n"); +		versions.push_back("\n#define MODE_VISIBILITY\n"); + +		sdfgi_shader.debug_probes.initialize(versions, defines); +		sdfgi_shader.debug_probes_shader = sdfgi_shader.debug_probes.version_create(); + +		{ +			RD::PipelineRasterizationState rs; +			rs.cull_mode = RD::POLYGON_CULL_DISABLED; +			RD::PipelineDepthStencilState ds; +			ds.enable_depth_test = true; +			ds.enable_depth_write = true; +			ds.depth_compare_operator = RD::COMPARE_OP_LESS_OR_EQUAL; +			for (int i = 0; i < SDGIShader::PROBE_DEBUG_MAX; i++) { +				RID debug_probes_shader_version = sdfgi_shader.debug_probes.version_get_shader(sdfgi_shader.debug_probes_shader, i); +				sdfgi_shader.debug_probes_pipeline[i].setup(debug_probes_shader_version, RD::RENDER_PRIMITIVE_TRIANGLE_STRIPS, rs, RD::PipelineMultisampleState(), ds, RD::PipelineColorBlendState::create_disabled(), 0); +			} +		} +	} + +	default_giprobe_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(GI::GIProbeData) * RenderBuffers::MAX_GIPROBES); +  	camera_effects_set_dof_blur_bokeh_shape(RS::DOFBokehShape(int(GLOBAL_GET("rendering/quality/depth_of_field/depth_of_field_bokeh_shape"))));  	camera_effects_set_dof_blur_quality(RS::DOFBlurQuality(int(GLOBAL_GET("rendering/quality/depth_of_field/depth_of_field_bokeh_quality"))), GLOBAL_GET("rendering/quality/depth_of_field/depth_of_field_use_jitter"));  	environment_set_ssao_quality(RS::EnvironmentSSAOQuality(int(GLOBAL_GET("rendering/quality/ssao/quality"))), GLOBAL_GET("rendering/quality/ssao/half_size")); -	screen_space_roughness_limiter = GLOBAL_GET("rendering/quality/screen_filters/screen_space_roughness_limiter"); -	screen_space_roughness_limiter_curve = GLOBAL_GET("rendering/quality/screen_filters/screen_space_roughness_limiter_curve"); +	screen_space_roughness_limiter = GLOBAL_GET("rendering/quality/screen_filters/screen_space_roughness_limiter_enabled"); +	screen_space_roughness_limiter_amount = GLOBAL_GET("rendering/quality/screen_filters/screen_space_roughness_limiter_amount"); +	screen_space_roughness_limiter_limit = GLOBAL_GET("rendering/quality/screen_filters/screen_space_roughness_limiter_limit");  	glow_bicubic_upscale = int(GLOBAL_GET("rendering/quality/glow/upscale_mode")) > 0;  	ssr_roughness_quality = RS::EnvironmentSSRRoughnessQuality(int(GLOBAL_GET("rendering/quality/screen_space_reflection/roughness_quality")));  	sss_quality = RS::SubSurfaceScatteringQuality(int(GLOBAL_GET("rendering/quality/subsurface_scattering/subsurface_scattering_quality"))); @@ -4406,9 +6801,19 @@ RasterizerSceneRD::~RasterizerSceneRD() {  		RD::get_singleton()->free(sky_scene_state.light_uniform_set);  	} +	RD::get_singleton()->free(default_giprobe_buffer);  	RD::get_singleton()->free(gi_probe_lights_uniform); +	RD::get_singleton()->free(gi.sdfgi_ubo); +  	giprobe_debug_shader.version_free(giprobe_debug_shader_version);  	giprobe_shader.version_free(giprobe_lighting_shader_version); +	gi.shader.version_free(gi.shader_version); +	sdfgi_shader.debug_probes.version_free(sdfgi_shader.debug_probes_shader); +	sdfgi_shader.debug.version_free(sdfgi_shader.debug_shader); +	sdfgi_shader.direct_light.version_free(sdfgi_shader.direct_light_shader); +	sdfgi_shader.integrate.version_free(sdfgi_shader.integrate_shader); +	sdfgi_shader.preprocess.version_free(sdfgi_shader.preprocess_shader); +  	memdelete_arr(gi_probe_lights);  	SkyMaterialData *md = (SkyMaterialData *)storage->material_get_data(sky_shader.default_material, RasterizerStorageRD::SHADER_TYPE_SKY);  	sky_shader.shader.version_free(md->shader_data->version); diff --git a/servers/rendering/rasterizer_rd/rasterizer_scene_rd.h b/servers/rendering/rasterizer_rd/rasterizer_scene_rd.h index 781dbd50cc..88c2f5a5e6 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_scene_rd.h +++ b/servers/rendering/rasterizer_rd/rasterizer_scene_rd.h @@ -31,22 +31,22 @@  #ifndef RASTERIZER_SCENE_RD_H  #define RASTERIZER_SCENE_RD_H +#include "core/local_vector.h"  #include "core/rid_owner.h"  #include "servers/rendering/rasterizer.h"  #include "servers/rendering/rasterizer_rd/rasterizer_storage_rd.h" +#include "servers/rendering/rasterizer_rd/shaders/gi.glsl.gen.h"  #include "servers/rendering/rasterizer_rd/shaders/giprobe.glsl.gen.h"  #include "servers/rendering/rasterizer_rd/shaders/giprobe_debug.glsl.gen.h" +#include "servers/rendering/rasterizer_rd/shaders/sdfgi_debug.glsl.gen.h" +#include "servers/rendering/rasterizer_rd/shaders/sdfgi_debug_probes.glsl.gen.h" +#include "servers/rendering/rasterizer_rd/shaders/sdfgi_direct_light.glsl.gen.h" +#include "servers/rendering/rasterizer_rd/shaders/sdfgi_integrate.glsl.gen.h" +#include "servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl.gen.h"  #include "servers/rendering/rasterizer_rd/shaders/sky.glsl.gen.h"  #include "servers/rendering/rendering_device.h"  class RasterizerSceneRD : public RasterizerScene { -public: -	enum GIProbeQuality { -		GIPROBE_QUALITY_ULTRA_LOW, -		GIPROBE_QUALITY_MEDIUM, -		GIPROBE_QUALITY_HIGH, -	}; -  protected:  	double time; @@ -81,23 +81,27 @@ protected:  	virtual void _render_shadow(RID p_framebuffer, InstanceBase **p_cull_result, int p_cull_count, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool use_dp_flip, bool p_use_pancake) = 0;  	virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, InstanceBase **p_cull_result, int p_cull_count, RID p_framebuffer, const Rect2i &p_region) = 0;  	virtual void _render_uv2(InstanceBase **p_cull_result, int p_cull_count, RID p_framebuffer, const Rect2i &p_region) = 0; +	virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, InstanceBase **p_cull_result, int p_cull_count, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) = 0;  	virtual void _debug_giprobe(RID p_gi_probe, RenderingDevice::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform, bool p_lighting, bool p_emission, float p_alpha); +	void _debug_sdfgi_probes(RID p_render_buffers, RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform);  	RenderBufferData *render_buffers_get_data(RID p_render_buffers);  	virtual void _base_uniforms_changed() = 0;  	virtual void _render_buffers_uniform_set_changed(RID p_render_buffers) = 0; -	virtual RID _render_buffers_get_roughness_texture(RID p_render_buffers) = 0;  	virtual RID _render_buffers_get_normal_texture(RID p_render_buffers) = 0; +	virtual RID _render_buffers_get_ambient_texture(RID p_render_buffers) = 0; +	virtual RID _render_buffers_get_reflection_texture(RID p_render_buffers) = 0;  	void _process_ssao(RID p_render_buffers, RID p_environment, RID p_normal_buffer, const CameraMatrix &p_projection); -	void _process_ssr(RID p_render_buffers, RID p_dest_framebuffer, RID p_normal_buffer, RID p_roughness_buffer, RID p_specular_buffer, RID p_metallic, const Color &p_metallic_mask, RID p_environment, const CameraMatrix &p_projection, bool p_use_additive); +	void _process_ssr(RID p_render_buffers, RID p_dest_framebuffer, RID p_normal_buffer, RID p_specular_buffer, RID p_metallic, const Color &p_metallic_mask, RID p_environment, const CameraMatrix &p_projection, bool p_use_additive);  	void _process_sss(RID p_render_buffers, const CameraMatrix &p_camera);  	void _setup_sky(RID p_environment, const Vector3 &p_position, const Size2i p_screen_size);  	void _update_sky(RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform);  	void _draw_sky(bool p_can_continue_color, bool p_can_continue_depth, RID p_fb, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform); +	void _process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_ambient_buffer, RID p_reflection_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, RID *p_gi_probe_cull_result, int p_gi_probe_cull_count);  private:  	RS::ViewportDebugDraw debug_draw = RS::VIEWPORT_DEBUG_DRAW_DISABLED; @@ -267,6 +271,8 @@ private:  		SkyMaterialData *prev_material;  		Vector3 prev_position;  		float prev_time; + +		RID sdfgi_integrate_sky_uniform_set;  	};  	Sky *dirty_sky_list = nullptr; @@ -388,13 +394,10 @@ private:  	struct GIProbeInstance {  		RID probe;  		RID texture; -		RID anisotropy[2]; //only if anisotropy is used -		RID anisotropy_r16[2]; //only if anisotropy is used  		RID write_buffer;  		struct Mipmap {  			RID texture; -			RID anisotropy[2]; //only if anisotropy is used  			RID uniform_set;  			RID second_bounce_uniform_set;  			RID write_uniform_set; @@ -423,7 +426,7 @@ private:  		uint32_t last_probe_version = 0;  		uint32_t last_probe_data_version = 0; -		uint64_t last_pass = 0; +		//uint64_t last_pass = 0;  		uint32_t render_index = 0;  		bool has_dynamic_object_data = false; @@ -435,11 +438,6 @@ private:  	uint32_t gi_probe_max_lights;  	RID gi_probe_lights_uniform; -	bool gi_probe_use_anisotropy = false; -	GIProbeQuality gi_probe_quality = GIPROBE_QUALITY_MEDIUM; - -	Vector<RID> gi_probe_slots; -  	enum {  		GI_PROBE_SHADER_VERSION_COMPUTE_LIGHT,  		GI_PROBE_SHADER_VERSION_COMPUTE_SECOND_BOUNCE, @@ -458,6 +456,8 @@ private:  	mutable RID_Owner<GIProbeInstance> gi_probe_instance_owner; +	RS::GIProbeQuality gi_probe_quality = RS::GI_PROBE_QUALITY_HIGH; +  	enum {  		GI_PROBE_DEBUG_COLOR,  		GI_PROBE_DEBUG_LIGHT, @@ -591,6 +591,7 @@ private:  		ShadowTransform shadow_transform[4]; +		AABB aabb;  		RID self;  		RID light;  		Transform transform; @@ -680,6 +681,19 @@ private:  		float ssr_fade_in = 0.15;  		float ssr_fade_out = 2.0;  		float ssr_depth_tolerance = 0.2; + +		/// SDFGI +		bool sdfgi_enabled = false; +		RS::EnvironmentSDFGICascades sdfgi_cascades; +		float sdfgi_min_cell_size = 0.2; +		bool sdfgi_use_occlusion = false; +		bool sdfgi_use_multibounce = false; +		bool sdfgi_read_sky_light = false; +		bool sdfgi_enhance_ssr = false; +		float sdfgi_energy = 1.0; +		float sdfgi_normal_bias = 1.1; +		float sdfgi_probe_bias = 1.1; +		RS::EnvironmentSDFGIYScale sdfgi_y_scale = RS::ENV_SDFGI_Y_SCALE_DISABLED;  	};  	RS::EnvironmentSSAOQuality ssao_quality = RS::ENV_SSAO_QUALITY_MEDIUM; @@ -719,7 +733,13 @@ private:  	/* RENDER BUFFERS */ +	struct SDFGI; +  	struct RenderBuffers { +		enum { +			MAX_GIPROBES = 8 +		}; +  		RenderBufferData *data = nullptr;  		int width = 0, height = 0;  		RS::ViewportMSAA msaa = RS::VIEWPORT_MSAA_DISABLED; @@ -732,6 +752,9 @@ private:  		RID texture; //main texture for rendering to, must be filled after done rendering  		RID depth_texture; //main depth texture +		RID gi_uniform_set; +		SDFGI *sdfgi = nullptr; +  		//built-in textures used for ping pong image processing and blurring  		struct Blur {  			RID texture; @@ -764,10 +787,389 @@ private:  			RID depth_scaled;  			RID blur_radius[2];  		} ssr; + +		RID giprobe_textures[MAX_GIPROBES]; +		RID giprobe_buffer;  	}; +	RID default_giprobe_buffer; + +	/* SDFGI */ + +	struct SDFGI { +		enum { +			MAX_CASCADES = 8, +			CASCADE_SIZE = 128, +			PROBE_DIVISOR = 16, +			ANISOTROPY_SIZE = 6, +			MAX_DYNAMIC_LIGHTS = 128, +			MAX_STATIC_LIGHTS = 1024, +			LIGHTPROBE_OCT_SIZE = 6, +			SH_SIZE = 16 +		}; + +		struct Cascade { +			struct UBO { +				float offset[3]; +				float to_cell; +				int32_t probe_offset[3]; +				uint32_t pad; +			}; + +			//cascade blocks are full-size for volume (128^3), half size for albedo/emission +			RID sdf_tex; +			RID light_tex; +			RID light_aniso_0_tex; +			RID light_aniso_1_tex; + +			RID light_data; +			RID light_aniso_0_data; +			RID light_aniso_1_data; + +			struct SolidCell { // this struct is unused, but remains as reference for size +				uint32_t position; +				uint32_t albedo; +				uint32_t static_light; +				uint32_t static_light_aniso; +			}; + +			RID solid_cell_dispatch_buffer; //buffer for indirect compute dispatch +			RID solid_cell_buffer; + +			RID lightprobe_history_tex; +			RID lightprobe_average_tex; + +			float cell_size; +			Vector3i position; + +			static const Vector3i DIRTY_ALL; +			Vector3i dirty_regions; //(0,0,0 is not dirty, negative is refresh from the end, DIRTY_ALL is refresh all. + +			RID sdf_store_uniform_set; +			RID sdf_direct_light_uniform_set; +			RID scroll_uniform_set; +			RID scroll_occlusion_uniform_set; +			RID integrate_uniform_set; +			RID lights_buffer; +		}; + +		//used for rendering (voxelization) +		RID render_albedo; +		RID render_emission; +		RID render_emission_aniso; +		RID render_occlusion[8]; +		RID render_geom_facing; + +		RID render_sdf[2]; +		RID render_sdf_half[2]; + +		//used for ping pong processing in cascades +		RID sdf_initialize_uniform_set; +		RID sdf_initialize_half_uniform_set; +		RID jump_flood_uniform_set[2]; +		RID jump_flood_half_uniform_set[2]; +		RID sdf_upscale_uniform_set; +		int upscale_jfa_uniform_set_index; +		RID occlusion_uniform_set; + +		uint32_t cascade_size = 128; + +		LocalVector<Cascade> cascades; + +		RID lightprobe_texture; +		RID lightprobe_data; +		RID occlusion_texture; +		RID occlusion_data; + +		RID lightprobe_history_scroll; //used for scrolling lightprobes +		RID lightprobe_average_scroll; //used for scrolling lightprobes + +		uint32_t history_size = 0; +		float solid_cell_ratio = 0; +		uint32_t solid_cell_count = 0; + +		RS::EnvironmentSDFGICascades cascade_mode; +		float min_cell_size = 0; +		uint32_t probe_axis_count = 0; //amount of probes per axis, this is an odd number because it encloses endpoints + +		RID debug_uniform_set; +		RID debug_probes_uniform_set; +		RID cascades_ubo; + +		bool uses_occlusion = false; +		bool uses_multibounce = false; +		bool reads_sky = false; +		float energy = 1.0; +		float normal_bias = 1.1; +		float probe_bias = 1.1; +		RS::EnvironmentSDFGIYScale y_scale_mode = RS::ENV_SDFGI_Y_SCALE_DISABLED; + +		float y_mult = 1.0; + +		uint32_t render_pass = 0; +	}; + +	RS::EnvironmentSDFGIRayCount sdfgi_ray_count = RS::ENV_SDFGI_RAY_COUNT_16; +	RS::EnvironmentSDFGIFramesToConverge sdfgi_frames_to_converge = RS::ENV_SDFGI_CONVERGE_IN_10_FRAMES; +	float sdfgi_solid_cell_ratio = 0.25; +	Vector3 sdfgi_debug_probe_pos; +	Vector3 sdfgi_debug_probe_dir; +	bool sdfgi_debug_probe_enabled = false; +	Vector3i sdfgi_debug_probe_index; + +	struct SDGIShader { +		enum SDFGIPreprocessShaderVersion { +			PRE_PROCESS_SCROLL, +			PRE_PROCESS_SCROLL_OCCLUSION, +			PRE_PROCESS_JUMP_FLOOD_INITIALIZE, +			PRE_PROCESS_JUMP_FLOOD_INITIALIZE_HALF, +			PRE_PROCESS_JUMP_FLOOD, +			PRE_PROCESS_JUMP_FLOOD_OPTIMIZED, +			PRE_PROCESS_JUMP_FLOOD_UPSCALE, +			PRE_PROCESS_OCCLUSION, +			PRE_PROCESS_STORE, +			PRE_PROCESS_MAX +		}; + +		struct PreprocessPushConstant { +			int32_t scroll[3]; +			int32_t grid_size; + +			int32_t probe_offset[3]; +			int32_t step_size; + +			int32_t half_size; +			uint32_t occlusion_index; +			int32_t cascade; +			uint32_t pad; +		}; + +		SdfgiPreprocessShaderRD preprocess; +		RID preprocess_shader; +		RID preprocess_pipeline[PRE_PROCESS_MAX]; + +		struct DebugPushConstant { +			float grid_size[3]; +			uint32_t max_cascades; + +			int32_t screen_size[2]; +			uint32_t use_occlusion; +			float y_mult; + +			float cam_extent[3]; +			uint32_t probe_axis_size; + +			float cam_transform[16]; +		}; + +		SdfgiDebugShaderRD debug; +		RID debug_shader; +		RID debug_shader_version; +		RID debug_pipeline; + +		enum ProbeDebugMode { +			PROBE_DEBUG_PROBES, +			PROBE_DEBUG_VISIBILITY, +			PROBE_DEBUG_MAX +		}; + +		struct DebugProbesPushConstant { +			float projection[16]; + +			uint32_t band_power; +			uint32_t sections_in_band; +			uint32_t band_mask; +			float section_arc; + +			float grid_size[3]; +			uint32_t cascade; + +			uint32_t pad; +			float y_mult; +			int32_t probe_debug_index; +			int32_t probe_axis_size; +		}; + +		SdfgiDebugProbesShaderRD debug_probes; +		RID debug_probes_shader; +		RID debug_probes_shader_version; + +		RenderPipelineVertexFormatCacheRD debug_probes_pipeline[PROBE_DEBUG_MAX]; + +		struct Light { +			float color[3]; +			float energy; + +			float direction[3]; +			uint32_t has_shadow; + +			float position[3]; +			float attenuation; + +			uint32_t type; +			float spot_angle; +			float spot_attenuation; +			float radius; + +			float shadow_color[4]; +		}; + +		struct DirectLightPushConstant { +			float grid_size[3]; +			uint32_t max_cascades; + +			uint32_t cascade; +			uint32_t light_count; +			uint32_t process_offset; +			uint32_t process_increment; + +			int32_t probe_axis_size; +			uint32_t multibounce; +			float y_mult; +			uint32_t pad; +		}; + +		enum { +			DIRECT_LIGHT_MODE_STATIC, +			DIRECT_LIGHT_MODE_DYNAMIC, +			DIRECT_LIGHT_MODE_MAX +		}; +		SdfgiDirectLightShaderRD direct_light; +		RID direct_light_shader; +		RID direct_light_pipeline[DIRECT_LIGHT_MODE_MAX]; + +		enum { +			INTEGRATE_MODE_PROCESS, +			INTEGRATE_MODE_STORE, +			INTEGRATE_MODE_SCROLL, +			INTEGRATE_MODE_SCROLL_STORE, +			INTEGRATE_MODE_MAX +		}; +		struct IntegratePushConstant { +			enum { +				SKY_MODE_DISABLED, +				SKY_MODE_COLOR, +				SKY_MODE_SKY, +			}; + +			float grid_size[3]; +			uint32_t max_cascades; + +			uint32_t probe_axis_size; +			uint32_t cascade; +			uint32_t history_index; +			uint32_t history_size; + +			uint32_t ray_count; +			float ray_bias; +			int32_t image_size[2]; + +			int32_t world_offset[3]; +			uint32_t sky_mode; + +			int32_t scroll[3]; +			float sky_energy; + +			float sky_color[3]; +			float y_mult; +		}; + +		SdfgiIntegrateShaderRD integrate; +		RID integrate_shader; +		RID integrate_pipeline[INTEGRATE_MODE_MAX]; + +		RID integrate_default_sky_uniform_set; + +	} sdfgi_shader; + +	void _sdfgi_erase(RenderBuffers *rb); +	int _sdfgi_get_pending_region_data(RID p_render_buffers, int p_region, Vector3i &r_local_offset, Vector3i &r_local_size, AABB &r_bounds) const; +	void _sdfgi_update_cascades(RID p_render_buffers); + +	/* GI */ + +	struct GI { +		struct SDFGIData { +			float grid_size[3]; +			uint32_t max_cascades; + +			uint32_t use_occlusion; +			int32_t probe_axis_size; +			float probe_to_uvw; +			float normal_bias; + +			float lightprobe_tex_pixel_size[3]; +			float energy; + +			float lightprobe_uv_offset[3]; +			float y_mult; + +			float occlusion_clamp[3]; +			uint32_t pad3; + +			float occlusion_renormalize[3]; +			uint32_t pad4; + +			float cascade_probe_size[3]; +			uint32_t pad5; + +			struct ProbeCascadeData { +				float position[3]; //offset of (0,0,0) in world coordinates +				float to_probe; // 1/bounds * grid_size +				int32_t probe_world_offset[3]; +				float to_cell; // 1/bounds * grid_size +			}; + +			ProbeCascadeData cascades[SDFGI::MAX_CASCADES]; +		}; + +		struct GIProbeData { +			float xform[16]; +			float bounds[3]; +			float dynamic_range; + +			float bias; +			float normal_bias; +			uint32_t blend_ambient; +			uint32_t texture_slot; + +			float anisotropy_strength; +			float ao; +			float ao_size; +			uint32_t pad[1]; +		}; + +		struct PushConstant { +			int32_t screen_size[2]; +			float z_near; +			float z_far; + +			float proj_info[4]; + +			uint32_t max_giprobes; +			uint32_t high_quality_vct; +			uint32_t use_sdfgi; +			uint32_t orthogonal; + +			float ao_color[3]; +			uint32_t pad; + +			float cam_rotation[12]; +		}; + +		RID sdfgi_ubo; +		enum { +			MODE_MAX = 1 +		}; + +		GiShaderRD shader; +		RID shader_version; +		RID pipelines[MODE_MAX]; +	} gi; +  	bool screen_space_roughness_limiter = false; -	float screen_space_roughness_limiter_curve = 1.0; +	float screen_space_roughness_limiter_amount = 0.25; +	float screen_space_roughness_limiter_limit = 0.18;  	mutable RID_Owner<RenderBuffers> render_buffers_owner; @@ -777,10 +1179,16 @@ private:  	void _render_buffers_debug_draw(RID p_render_buffers, RID p_shadow_atlas);  	void _render_buffers_post_process_and_tonemap(RID p_render_buffers, RID p_environment, RID p_camera_effects, const CameraMatrix &p_projection); +	void _sdfgi_debug_draw(RID p_render_buffers, const CameraMatrix &p_projection, const Transform &p_transform);  	uint64_t scene_pass = 0;  	uint64_t shadow_atlas_realloc_tolerance_msec = 500; +	struct SDFGICosineNeighbour { +		uint32_t neighbour; +		float weight; +	}; +  public:  	/* SHADOW ATLAS API */ @@ -818,6 +1226,15 @@ public:  		return Size2i(directional_shadow.size, directional_shadow.size);  	} +	/* SDFGI UPDATE */ + +	int sdfgi_get_lightprobe_octahedron_size() const { return SDFGI::LIGHTPROBE_OCT_SIZE; } +	virtual void sdfgi_update(RID p_render_buffers, RID p_environment, const Vector3 &p_world_position); +	virtual int sdfgi_get_pending_region_count(RID p_render_buffers) const; +	virtual AABB sdfgi_get_pending_region_bounds(RID p_render_buffers, int p_region) const; +	virtual uint32_t sdfgi_get_pending_region_cascade(RID p_render_buffers, int p_region) const; +	virtual void sdfgi_update_probes(RID p_render_buffers, RID p_environment, const RID *p_directional_light_instances, uint32_t p_directional_light_count, const RID *p_positional_light_instances, uint32_t p_positional_light_count); +	RID sdfgi_get_ubo() const { return gi.sdfgi_ubo; }  	/* SKY API */  	RID sky_create(); @@ -871,6 +1288,11 @@ public:  	float environment_get_ssao_ao_affect(RID p_env) const;  	float environment_get_ssao_light_affect(RID p_env) const;  	bool environment_is_ssr_enabled(RID p_env) const; +	bool environment_is_sdfgi_enabled(RID p_env) const; + +	virtual void environment_set_sdfgi(RID p_env, bool p_enable, RS::EnvironmentSDFGICascades p_cascades, float p_min_cell_size, RS::EnvironmentSDFGIYScale p_y_scale, bool p_use_occlusion, bool p_use_multibounce, bool p_read_sky, bool p_enhance_ssr, float p_energy, float p_normal_bias, float p_probe_bias); +	virtual void environment_set_sdfgi_ray_count(RS::EnvironmentSDFGIRayCount p_ray_count); +	virtual void environment_set_sdfgi_frames_to_converge(RS::EnvironmentSDFGIFramesToConverge p_frames);  	void environment_set_ssr_roughness_quality(RS::EnvironmentSSRRoughnessQuality p_quality);  	RS::EnvironmentSSRRoughnessQuality environment_get_ssr_roughness_quality() const; @@ -894,6 +1316,7 @@ public:  	RID light_instance_create(RID p_light);  	void light_instance_set_transform(RID p_light_instance, const Transform &p_transform); +	void light_instance_set_aabb(RID p_light_instance, const AABB &p_aabb);  	void light_instance_set_shadow_transform(RID p_light_instance, const CameraMatrix &p_projection, const Transform &p_transform, float p_far, float p_split, int p_pass, float p_shadow_texel_size, float p_bias_scale = 1.0, float p_range_begin = 0, const Vector2 &p_uv_scale = Vector2());  	void light_instance_mark_visible(RID p_light_instance); @@ -1107,6 +1530,8 @@ public:  	bool gi_probe_needs_update(RID p_probe) const;  	void gi_probe_update(RID p_probe, bool p_update_light_instances, const Vector<RID> &p_light_instances, int p_dynamic_object_count, InstanceBase **p_dynamic_objects); +	void gi_probe_set_quality(RS::GIProbeQuality p_quality) { gi_probe_quality = p_quality; } +  	_FORCE_INLINE_ uint32_t gi_probe_instance_get_slot(RID p_probe) {  		GIProbeInstance *gi_probe = gi_probe_instance_owner.getornull(p_probe);  		return gi_probe->slot; @@ -1124,10 +1549,6 @@ public:  		GIProbeInstance *gi_probe = gi_probe_instance_owner.getornull(p_probe);  		return gi_probe->texture;  	} -	_FORCE_INLINE_ RID gi_probe_instance_get_aniso_texture(RID p_probe, int p_index) { -		GIProbeInstance *gi_probe = gi_probe_instance_owner.getornull(p_probe); -		return gi_probe->anisotropy[p_index]; -	}  	_FORCE_INLINE_ void gi_probe_instance_set_render_index(RID p_instance, uint32_t p_render_index) {  		GIProbeInstance *gi_probe = gi_probe_instance_owner.getornull(p_instance); @@ -1141,7 +1562,7 @@ public:  		return gi_probe->render_index;  	} - +	/*  	_FORCE_INLINE_ void gi_probe_instance_set_render_pass(RID p_instance, uint32_t p_render_pass) {  		GIProbeInstance *g_probe = gi_probe_instance_owner.getornull(p_instance);  		ERR_FAIL_COND(!g_probe); @@ -1154,18 +1575,27 @@ public:  		return g_probe->last_pass;  	} - -	const Vector<RID> &gi_probe_get_slots() const; -	_FORCE_INLINE_ bool gi_probe_is_anisotropic() const { -		return gi_probe_use_anisotropy; -	} -	GIProbeQuality gi_probe_get_quality() const; - +*/  	RID render_buffers_create();  	void render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_width, int p_height, RS::ViewportMSAA p_msaa, RS::ViewportScreenSpaceAA p_screen_space_aa);  	RID render_buffers_get_ao_texture(RID p_render_buffers);  	RID render_buffers_get_back_buffer_texture(RID p_render_buffers); +	RID render_buffers_get_gi_probe_buffer(RID p_render_buffers); +	RID render_buffers_get_default_gi_probe_buffer(); + +	uint32_t render_buffers_get_sdfgi_cascade_count(RID p_render_buffers) const; +	bool render_buffers_is_sdfgi_enabled(RID p_render_buffers) const; +	RID render_buffers_get_sdfgi_irradiance_probes(RID p_render_buffers) const; +	Vector3 render_buffers_get_sdfgi_cascade_offset(RID p_render_buffers, uint32_t p_cascade) const; +	Vector3i render_buffers_get_sdfgi_cascade_probe_offset(RID p_render_buffers, uint32_t p_cascade) const; +	float render_buffers_get_sdfgi_cascade_probe_size(RID p_render_buffers, uint32_t p_cascade) const; +	float render_buffers_get_sdfgi_normal_bias(RID p_render_buffers) const; +	uint32_t render_buffers_get_sdfgi_cascade_probe_count(RID p_render_buffers) const; +	uint32_t render_buffers_get_sdfgi_cascade_size(RID p_render_buffers) const; +	bool render_buffers_is_sdfgi_using_occlusion(RID p_render_buffers) const; +	float render_buffers_get_sdfgi_energy(RID p_render_buffers) const; +	RID render_buffers_get_sdfgi_occlusion_texture(RID p_render_buffers) const;  	void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, InstanceBase **p_cull_result, int p_cull_count, RID *p_light_cull_result, int p_light_cull_count, RID *p_reflection_probe_cull_result, int p_reflection_probe_cull_count, RID *p_gi_probe_cull_result, int p_gi_probe_cull_count, RID *p_decal_cull_result, int p_decal_cull_count, InstanceBase **p_lightmap_cull_result, int p_lightmap_cull_count, RID p_environment, RID p_shadow_atlas, RID p_camera_effects, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass); @@ -1173,6 +1603,9 @@ public:  	void render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, InstanceBase **p_cull_result, int p_cull_count, RID p_framebuffer, const Rect2i &p_region); +	void render_sdfgi(RID p_render_buffers, int p_region, InstanceBase **p_cull_result, int p_cull_count); +	void render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const RID **p_positional_light_cull_result, const uint32_t *p_positional_light_cull_count); +  	virtual void set_scene_pass(uint64_t p_pass) {  		scene_pass = p_pass;  	} @@ -1180,9 +1613,10 @@ public:  		return scene_pass;  	} -	virtual void screen_space_roughness_limiter_set_active(bool p_enable, float p_curve); +	virtual void screen_space_roughness_limiter_set_active(bool p_enable, float p_amount, float p_limit);  	virtual bool screen_space_roughness_limiter_is_active() const; -	virtual float screen_space_roughness_limiter_get_curve() const; +	virtual float screen_space_roughness_limiter_get_amount() const; +	virtual float screen_space_roughness_limiter_get_limit() const;  	virtual void sub_surface_scattering_set_quality(RS::SubSurfaceScatteringQuality p_quality);  	RS::SubSurfaceScatteringQuality sub_surface_scattering_get_quality() const; @@ -1221,6 +1655,8 @@ public:  	virtual void set_time(double p_time, double p_step); +	void sdfgi_set_debug_probe_select(const Vector3 &p_position, const Vector3 &p_dir); +  	RasterizerSceneRD(RasterizerStorageRD *p_storage);  	~RasterizerSceneRD();  }; diff --git a/servers/rendering/rasterizer_rd/rasterizer_storage_rd.cpp b/servers/rendering/rasterizer_rd/rasterizer_storage_rd.cpp index a5151d1ff8..8f3e2c25f9 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_storage_rd.cpp +++ b/servers/rendering/rasterizer_rd/rasterizer_storage_rd.cpp @@ -3387,11 +3387,21 @@ void RasterizerStorageRD::light_set_reverse_cull_face_mode(RID p_light, bool p_e  	light->instance_dependency.instance_notify_changed(true, false);  } -void RasterizerStorageRD::light_set_use_gi(RID p_light, bool p_enabled) { +void RasterizerStorageRD::light_set_bake_mode(RID p_light, RS::LightBakeMode p_bake_mode) {  	Light *light = light_owner.getornull(p_light);  	ERR_FAIL_COND(!light); -	light->use_gi = p_enabled; +	light->bake_mode = p_bake_mode; + +	light->version++; +	light->instance_dependency.instance_notify_changed(true, false); +} + +void RasterizerStorageRD::light_set_max_sdfgi_cascade(RID p_light, uint32_t p_cascade) { +	Light *light = light_owner.getornull(p_light); +	ERR_FAIL_COND(!light); + +	light->max_sdfgi_cascade = p_cascade;  	light->version++;  	light->instance_dependency.instance_notify_changed(true, false); @@ -3460,11 +3470,18 @@ RS::LightDirectionalShadowDepthRangeMode RasterizerStorageRD::light_directional_  	return light->directional_range_mode;  } -bool RasterizerStorageRD::light_get_use_gi(RID p_light) { -	Light *light = light_owner.getornull(p_light); -	ERR_FAIL_COND_V(!light, false); +uint32_t RasterizerStorageRD::light_get_max_sdfgi_cascade(RID p_light) { +	const Light *light = light_owner.getornull(p_light); +	ERR_FAIL_COND_V(!light, 0); + +	return light->max_sdfgi_cascade; +} + +RS::LightBakeMode RasterizerStorageRD::light_get_bake_mode(RID p_light) { +	const Light *light = light_owner.getornull(p_light); +	ERR_FAIL_COND_V(!light, RS::LIGHT_BAKE_DISABLED); -	return light->use_gi; +	return light->bake_mode;  }  uint64_t RasterizerStorageRD::light_get_version(RID p_light) const { @@ -3517,25 +3534,25 @@ void RasterizerStorageRD::reflection_probe_set_intensity(RID p_probe, float p_in  	reflection_probe->intensity = p_intensity;  } -void RasterizerStorageRD::reflection_probe_set_interior_ambient(RID p_probe, const Color &p_ambient) { +void RasterizerStorageRD::reflection_probe_set_ambient_mode(RID p_probe, RS::ReflectionProbeAmbientMode p_mode) {  	ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe);  	ERR_FAIL_COND(!reflection_probe); -	reflection_probe->interior_ambient = p_ambient; +	reflection_probe->ambient_mode = p_mode;  } -void RasterizerStorageRD::reflection_probe_set_interior_ambient_energy(RID p_probe, float p_energy) { +void RasterizerStorageRD::reflection_probe_set_ambient_color(RID p_probe, const Color &p_color) {  	ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe);  	ERR_FAIL_COND(!reflection_probe); -	reflection_probe->interior_ambient_energy = p_energy; +	reflection_probe->ambient_color = p_color;  } -void RasterizerStorageRD::reflection_probe_set_interior_ambient_probe_contribution(RID p_probe, float p_contrib) { +void RasterizerStorageRD::reflection_probe_set_ambient_energy(RID p_probe, float p_energy) {  	ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe);  	ERR_FAIL_COND(!reflection_probe); -	reflection_probe->interior_ambient_probe_contrib = p_contrib; +	reflection_probe->ambient_color_energy = p_energy;  }  void RasterizerStorageRD::reflection_probe_set_max_distance(RID p_probe, float p_distance) { @@ -3683,25 +3700,23 @@ bool RasterizerStorageRD::reflection_probe_is_box_projection(RID p_probe) const  	return reflection_probe->box_projection;  } -Color RasterizerStorageRD::reflection_probe_get_interior_ambient(RID p_probe) const { +RS::ReflectionProbeAmbientMode RasterizerStorageRD::reflection_probe_get_ambient_mode(RID p_probe) const {  	const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); -	ERR_FAIL_COND_V(!reflection_probe, Color()); - -	return reflection_probe->interior_ambient; +	ERR_FAIL_COND_V(!reflection_probe, RS::REFLECTION_PROBE_AMBIENT_DISABLED); +	return reflection_probe->ambient_mode;  } -float RasterizerStorageRD::reflection_probe_get_interior_ambient_energy(RID p_probe) const { +Color RasterizerStorageRD::reflection_probe_get_ambient_color(RID p_probe) const {  	const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe); -	ERR_FAIL_COND_V(!reflection_probe, 0); +	ERR_FAIL_COND_V(!reflection_probe, Color()); -	return reflection_probe->interior_ambient_energy; +	return reflection_probe->ambient_color;  } - -float RasterizerStorageRD::reflection_probe_get_interior_ambient_probe_contribution(RID p_probe) const { +float RasterizerStorageRD::reflection_probe_get_ambient_color_energy(RID p_probe) const {  	const ReflectionProbe *reflection_probe = reflection_probe_owner.getornull(p_probe);  	ERR_FAIL_COND_V(!reflection_probe, 0); -	return reflection_probe->interior_ambient_probe_contrib; +	return reflection_probe->ambient_color_energy;  }  RID RasterizerStorageRD::decal_create() { @@ -5878,6 +5893,20 @@ RasterizerStorageRD::RasterizerStorageRD() {  		}  		default_rd_textures[DEFAULT_RD_TEXTURE_MULTIMESH_BUFFER] = RD::get_singleton()->texture_buffer_create(16, RD::DATA_FORMAT_R8G8B8A8_UNORM, pv); + +		for (int i = 0; i < 16; i++) { +			pv.set(i * 4 + 0, 0); +			pv.set(i * 4 + 1, 0); +			pv.set(i * 4 + 2, 0); +			pv.set(i * 4 + 3, 0); +		} + +		{ +			tformat.format = RD::DATA_FORMAT_R8G8B8A8_UINT; +			Vector<Vector<uint8_t>> vpv; +			vpv.push_back(pv); +			default_rd_textures[DEFAULT_RD_TEXTURE_2D_UINT] = RD::get_singleton()->texture_create(tformat, RD::TextureView(), vpv); +		}  	}  	{ //create default cubemap diff --git a/servers/rendering/rasterizer_rd/rasterizer_storage_rd.h b/servers/rendering/rasterizer_rd/rasterizer_storage_rd.h index fe9377192e..b1146f1386 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_storage_rd.h +++ b/servers/rendering/rasterizer_rd/rasterizer_storage_rd.h @@ -92,6 +92,7 @@ public:  		DEFAULT_RD_TEXTURE_CUBEMAP_ARRAY_BLACK,  		DEFAULT_RD_TEXTURE_3D_WHITE,  		DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE, +		DEFAULT_RD_TEXTURE_2D_UINT,  		DEFAULT_RD_TEXTURE_MAX  	}; @@ -420,7 +421,8 @@ private:  		bool shadow = false;  		bool negative = false;  		bool reverse_cull = false; -		bool use_gi = true; +		RS::LightBakeMode bake_mode = RS::LIGHT_BAKE_DYNAMIC; +		uint32_t max_sdfgi_cascade = 2;  		uint32_t cull_mask = 0xFFFFFFFF;  		RS::LightOmniShadowMode omni_shadow_mode = RS::LIGHT_OMNI_SHADOW_DUAL_PARABOLOID;  		RS::LightDirectionalShadowMode directional_shadow_mode = RS::LIGHT_DIRECTIONAL_SHADOW_ORTHOGONAL; @@ -439,9 +441,9 @@ private:  		RS::ReflectionProbeUpdateMode update_mode = RS::REFLECTION_PROBE_UPDATE_ONCE;  		int resolution = 256;  		float intensity = 1.0; -		Color interior_ambient; -		float interior_ambient_energy = 1.0; -		float interior_ambient_probe_contrib = 0.0; +		RS::ReflectionProbeAmbientMode ambient_mode = RS::REFLECTION_PROBE_AMBIENT_ENVIRONMENT; +		Color ambient_color; +		float ambient_color_energy = 1.0;  		float max_distance = 0;  		Vector3 extents = Vector3(1, 1, 1);  		Vector3 origin_offset; @@ -1041,7 +1043,8 @@ public:  	void light_set_negative(RID p_light, bool p_enable);  	void light_set_cull_mask(RID p_light, uint32_t p_mask);  	void light_set_reverse_cull_face_mode(RID p_light, bool p_enabled); -	void light_set_use_gi(RID p_light, bool p_enabled); +	void light_set_bake_mode(RID p_light, RS::LightBakeMode p_bake_mode); +	void light_set_max_sdfgi_cascade(RID p_light, uint32_t p_cascade);  	void light_omni_set_shadow_mode(RID p_light, RS::LightOmniShadowMode p_mode); @@ -1118,7 +1121,8 @@ public:  		return light->param[RS::LIGHT_PARAM_TRANSMITTANCE_BIAS];  	} -	bool light_get_use_gi(RID p_light); +	RS::LightBakeMode light_get_bake_mode(RID p_light); +	uint32_t light_get_max_sdfgi_cascade(RID p_light);  	uint64_t light_get_version(RID p_light) const;  	/* PROBE API */ @@ -1127,9 +1131,9 @@ public:  	void reflection_probe_set_update_mode(RID p_probe, RS::ReflectionProbeUpdateMode p_mode);  	void reflection_probe_set_intensity(RID p_probe, float p_intensity); -	void reflection_probe_set_interior_ambient(RID p_probe, const Color &p_ambient); -	void reflection_probe_set_interior_ambient_energy(RID p_probe, float p_energy); -	void reflection_probe_set_interior_ambient_probe_contribution(RID p_probe, float p_contrib); +	void reflection_probe_set_ambient_mode(RID p_probe, RS::ReflectionProbeAmbientMode p_mode); +	void reflection_probe_set_ambient_color(RID p_probe, const Color &p_color); +	void reflection_probe_set_ambient_energy(RID p_probe, float p_energy);  	void reflection_probe_set_max_distance(RID p_probe, float p_distance);  	void reflection_probe_set_extents(RID p_probe, const Vector3 &p_extents);  	void reflection_probe_set_origin_offset(RID p_probe, const Vector3 &p_offset); @@ -1151,9 +1155,9 @@ public:  	float reflection_probe_get_intensity(RID p_probe) const;  	bool reflection_probe_is_interior(RID p_probe) const;  	bool reflection_probe_is_box_projection(RID p_probe) const; -	Color reflection_probe_get_interior_ambient(RID p_probe) const; -	float reflection_probe_get_interior_ambient_energy(RID p_probe) const; -	float reflection_probe_get_interior_ambient_probe_contribution(RID p_probe) const; +	RS::ReflectionProbeAmbientMode reflection_probe_get_ambient_mode(RID p_probe) const; +	Color reflection_probe_get_ambient_color(RID p_probe) const; +	float reflection_probe_get_ambient_color_energy(RID p_probe) const;  	void base_update_dependency(RID p_base, RasterizerScene::InstanceBase *p_instance);  	void skeleton_update_dependency(RID p_skeleton, RasterizerScene::InstanceBase *p_instance); diff --git a/servers/rendering/rasterizer_rd/shaders/SCsub b/servers/rendering/rasterizer_rd/shaders/SCsub index a454d144aa..67f4edc626 100644 --- a/servers/rendering/rasterizer_rd/shaders/SCsub +++ b/servers/rendering/rasterizer_rd/shaders/SCsub @@ -28,3 +28,10 @@ if "RD_GLSL" in env["BUILDERS"]:      env.RD_GLSL("screen_space_reflection_scale.glsl")      env.RD_GLSL("subsurface_scattering.glsl")      env.RD_GLSL("specular_merge.glsl") +    env.RD_GLSL("gi.glsl") +    env.RD_GLSL("resolve.glsl") +    env.RD_GLSL("sdfgi_preprocess.glsl") +    env.RD_GLSL("sdfgi_integrate.glsl") +    env.RD_GLSL("sdfgi_direct_light.glsl") +    env.RD_GLSL("sdfgi_debug.glsl") +    env.RD_GLSL("sdfgi_debug_probes.glsl") diff --git a/servers/rendering/rasterizer_rd/shaders/copy_to_fb.glsl b/servers/rendering/rasterizer_rd/shaders/copy_to_fb.glsl index b1cfe1e91e..9751e13b4e 100644 --- a/servers/rendering/rasterizer_rd/shaders/copy_to_fb.glsl +++ b/servers/rendering/rasterizer_rd/shaders/copy_to_fb.glsl @@ -47,16 +47,26 @@ layout(push_constant, binding = 1, std430) uniform Params {  	bool force_luminance;  	bool alpha_to_zero; -	uint pad[2]; +	bool srgb; +	uint pad;  }  params;  layout(location = 0) in vec2 uv_interp;  layout(set = 0, binding = 0) uniform sampler2D source_color; - +#ifdef MODE_TWO_SOURCES +layout(set = 1, binding = 0) uniform sampler2D source_color2; +#endif  layout(location = 0) out vec4 frag_color; +vec3 linear_to_srgb(vec3 color) { +	//if going to srgb, clamp from 0 to 1. +	color = clamp(color, vec3(0.0), vec3(1.0)); +	const vec3 a = vec3(0.055f); +	return mix((vec3(1.0f) + a) * pow(color.rgb, vec3(1.0f / 2.4f)) - a, 12.92f * color.rgb, lessThan(color.rgb, vec3(0.0031308f))); +} +  void main() {  	vec2 uv = uv_interp; @@ -89,11 +99,17 @@ void main() {  	}  #endif  	vec4 color = textureLod(source_color, uv, 0.0); +#ifdef MODE_TWO_SOURCES +	color += textureLod(source_color2, uv, 0.0); +#endif  	if (params.force_luminance) {  		color.rgb = vec3(max(max(color.r, color.g), color.b));  	}  	if (params.alpha_to_zero) {  		color.rgb *= color.a;  	} +	if (params.srgb) { +		color.rgb = linear_to_srgb(color.rgb); +	}  	frag_color = color;  } diff --git a/servers/rendering/rasterizer_rd/shaders/gi.glsl b/servers/rendering/rasterizer_rd/shaders/gi.glsl new file mode 100644 index 0000000000..a1939f75ad --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/gi.glsl @@ -0,0 +1,663 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#define M_PI 3.141592 + +#define SDFGI_MAX_CASCADES 8 + +//set 0 for SDFGI and render buffers + +layout(set = 0, binding = 1) uniform texture3D sdf_cascades[SDFGI_MAX_CASCADES]; +layout(set = 0, binding = 2) uniform texture3D light_cascades[SDFGI_MAX_CASCADES]; +layout(set = 0, binding = 3) uniform texture3D aniso0_cascades[SDFGI_MAX_CASCADES]; +layout(set = 0, binding = 4) uniform texture3D aniso1_cascades[SDFGI_MAX_CASCADES]; +layout(set = 0, binding = 5) uniform texture3D occlusion_texture; + +layout(set = 0, binding = 6) uniform sampler linear_sampler; +layout(set = 0, binding = 7) uniform sampler linear_sampler_with_mipmaps; + +struct ProbeCascadeData { +	vec3 position; +	float to_probe; +	ivec3 probe_world_offset; +	float to_cell; // 1/bounds * grid_size +}; + +layout(rgba16f, set = 0, binding = 9) uniform restrict writeonly image2D ambient_buffer; +layout(rgba16f, set = 0, binding = 10) uniform restrict writeonly image2D reflection_buffer; + +layout(set = 0, binding = 11) uniform texture2DArray lightprobe_texture; + +layout(set = 0, binding = 12) uniform texture2D depth_buffer; +layout(set = 0, binding = 13) uniform texture2D normal_roughness_buffer; +layout(set = 0, binding = 14) uniform utexture2D giprobe_buffer; + +layout(set = 0, binding = 15, std140) uniform SDFGI { +	vec3 grid_size; +	uint max_cascades; + +	bool use_occlusion; +	int probe_axis_size; +	float probe_to_uvw; +	float normal_bias; + +	vec3 lightprobe_tex_pixel_size; +	float energy; + +	vec3 lightprobe_uv_offset; +	float y_mult; + +	vec3 occlusion_clamp; +	uint pad3; + +	vec3 occlusion_renormalize; +	uint pad4; + +	vec3 cascade_probe_size; +	uint pad5; + +	ProbeCascadeData cascades[SDFGI_MAX_CASCADES]; +} +sdfgi; + +#define MAX_GI_PROBES 8 + +struct GIProbeData { +	mat4 xform; +	vec3 bounds; +	float dynamic_range; + +	float bias; +	float normal_bias; +	bool blend_ambient; +	uint texture_slot; + +	float anisotropy_strength; +	float ambient_occlusion; +	float ambient_occlusion_size; +	uint pad2; +}; + +layout(set = 0, binding = 16, std140) uniform GIProbes { +	GIProbeData data[MAX_GI_PROBES]; +} +gi_probes; + +layout(set = 0, binding = 17) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; + +layout(push_constant, binding = 0, std430) uniform Params { +	ivec2 screen_size; +	float z_near; +	float z_far; + +	vec4 proj_info; + +	uint max_giprobes; +	bool high_quality_vct; +	bool use_sdfgi; +	bool orthogonal; + +	vec3 ao_color; +	uint pad; + +	mat3x4 cam_rotation; +} +params; + +vec2 octahedron_wrap(vec2 v) { +	vec2 signVal; +	signVal.x = v.x >= 0.0 ? 1.0 : -1.0; +	signVal.y = v.y >= 0.0 ? 1.0 : -1.0; +	return (1.0 - abs(v.yx)) * signVal; +} + +vec2 octahedron_encode(vec3 n) { +	// https://twitter.com/Stubbesaurus/status/937994790553227264 +	n /= (abs(n.x) + abs(n.y) + abs(n.z)); +	n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy); +	n.xy = n.xy * 0.5 + 0.5; +	return n.xy; +} + +vec4 blend_color(vec4 src, vec4 dst) { +	vec4 res; +	float sa = 1.0 - src.a; +	res.a = dst.a * sa + src.a; +	if (res.a == 0.0) { +		res.rgb = vec3(0); +	} else { +		res.rgb = (dst.rgb * dst.a * sa + src.rgb * src.a) / res.a; +	} +	return res; +} + +vec3 reconstruct_position(ivec2 screen_pos) { +	vec3 pos; +	pos.z = texelFetch(sampler2D(depth_buffer, linear_sampler), screen_pos, 0).r; + +	pos.z = pos.z * 2.0 - 1.0; +	if (params.orthogonal) { +		pos.z = ((pos.z + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; +	} else { +		pos.z = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - pos.z * (params.z_far - params.z_near)); +	} +	pos.z = -pos.z; + +	pos.xy = vec2(screen_pos) * params.proj_info.xy + params.proj_info.zw; +	if (!params.orthogonal) { +		pos.xy *= pos.z; +	} + +	return pos; +} + +void sdfgi_probe_process(uint cascade, vec3 cascade_pos, vec3 cam_pos, vec3 cam_normal, vec3 cam_specular_normal, float roughness, out vec3 diffuse_light, out vec3 specular_light) { +	cascade_pos += cam_normal * sdfgi.normal_bias; + +	vec3 base_pos = floor(cascade_pos); +	//cascade_pos += mix(vec3(0.0),vec3(0.01),lessThan(abs(cascade_pos-base_pos),vec3(0.01))) * cam_normal; +	ivec3 probe_base_pos = ivec3(base_pos); + +	vec4 diffuse_accum = vec4(0.0); +	vec3 specular_accum; + +	ivec3 tex_pos = ivec3(probe_base_pos.xy, int(cascade)); +	tex_pos.x += probe_base_pos.z * sdfgi.probe_axis_size; +	tex_pos.xy = tex_pos.xy * (SDFGI_OCT_SIZE + 2) + ivec2(1); + +	vec3 diffuse_posf = (vec3(tex_pos) + vec3(octahedron_encode(cam_normal) * float(SDFGI_OCT_SIZE), 0.0)) * sdfgi.lightprobe_tex_pixel_size; + +	vec3 specular_posf = (vec3(tex_pos) + vec3(octahedron_encode(cam_specular_normal) * float(SDFGI_OCT_SIZE), 0.0)) * sdfgi.lightprobe_tex_pixel_size; + +	specular_accum = vec3(0.0); + +	vec4 light_accum = vec4(0.0); +	float weight_accum = 0.0; + +	for (uint j = 0; j < 8; j++) { +		ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); +		ivec3 probe_posi = probe_base_pos; +		probe_posi += offset; + +		// Compute weight + +		vec3 probe_pos = vec3(probe_posi); +		vec3 probe_to_pos = cascade_pos - probe_pos; +		vec3 probe_dir = normalize(-probe_to_pos); + +		vec3 trilinear = vec3(1.0) - abs(probe_to_pos); +		float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(cam_normal, probe_dir)); + +		// Compute lightprobe occlusion + +		if (sdfgi.use_occlusion) { +			ivec3 occ_indexv = abs((sdfgi.cascades[cascade].probe_world_offset + probe_posi) & ivec3(1, 1, 1)) * ivec3(1, 2, 4); +			vec4 occ_mask = mix(vec4(0.0), vec4(1.0), equal(ivec4(occ_indexv.x | occ_indexv.y), ivec4(0, 1, 2, 3))); + +			vec3 occ_pos = clamp(cascade_pos, probe_pos - sdfgi.occlusion_clamp, probe_pos + sdfgi.occlusion_clamp) * sdfgi.probe_to_uvw; +			occ_pos.z += float(cascade); +			if (occ_indexv.z != 0) { //z bit is on, means index is >=4, so make it switch to the other half of textures +				occ_pos.x += 1.0; +			} + +			occ_pos *= sdfgi.occlusion_renormalize; +			float occlusion = dot(textureLod(sampler3D(occlusion_texture, linear_sampler), occ_pos, 0.0), occ_mask); + +			weight *= max(occlusion, 0.01); +		} + +		// Compute lightprobe texture position + +		vec3 diffuse; +		vec3 pos_uvw = diffuse_posf; +		pos_uvw.xy += vec2(offset.xy) * sdfgi.lightprobe_uv_offset.xy; +		pos_uvw.x += float(offset.z) * sdfgi.lightprobe_uv_offset.z; +		diffuse = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb; + +		diffuse_accum += vec4(diffuse * weight, weight); + +		{ +			vec3 specular = vec3(0.0); +			vec3 pos_uvw = specular_posf; +			pos_uvw.xy += vec2(offset.xy) * sdfgi.lightprobe_uv_offset.xy; +			pos_uvw.x += float(offset.z) * sdfgi.lightprobe_uv_offset.z; +			if (roughness < 0.99) { +				specular = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw + vec3(0, 0, float(sdfgi.max_cascades)), 0.0).rgb; +			} +			if (roughness > 0.2) { +				specular = mix(specular, textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb, (roughness - 0.2) * 1.25); +			} + +			specular_accum += specular * weight; +		} +	} + +	if (diffuse_accum.a > 0.0) { +		diffuse_accum.rgb /= diffuse_accum.a; +	} + +	diffuse_light = diffuse_accum.rgb; + +	if (diffuse_accum.a > 0.0) { +		specular_accum /= diffuse_accum.a; +	} + +	specular_light = specular_accum; +} + +void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, out vec4 ambient_light, out vec4 reflection_light) { +	//make vertex orientation the world one, but still align to camera +	vertex.y *= sdfgi.y_mult; +	normal.y *= sdfgi.y_mult; +	reflection.y *= sdfgi.y_mult; + +	//renormalize +	normal = normalize(normal); +	reflection = normalize(reflection); + +	vec3 cam_pos = vertex; +	vec3 cam_normal = normal; + +	vec4 light_accum = vec4(0.0); +	float weight_accum = 0.0; + +	vec4 light_blend_accum = vec4(0.0); +	float weight_blend_accum = 0.0; + +	float blend = -1.0; + +	// helper constants, compute once + +	uint cascade = 0xFFFFFFFF; +	vec3 cascade_pos; +	vec3 cascade_normal; + +	for (uint i = 0; i < sdfgi.max_cascades; i++) { +		cascade_pos = (cam_pos - sdfgi.cascades[i].position) * sdfgi.cascades[i].to_probe; + +		if (any(lessThan(cascade_pos, vec3(0.0))) || any(greaterThanEqual(cascade_pos, sdfgi.cascade_probe_size))) { +			continue; //skip cascade +		} + +		cascade = i; +		break; +	} + +	if (cascade < SDFGI_MAX_CASCADES) { +		ambient_light = vec4(0, 0, 0, 1); +		reflection_light = vec4(0, 0, 0, 1); + +		float blend; +		vec3 diffuse, specular; +		sdfgi_probe_process(cascade, cascade_pos, cam_pos, cam_normal, reflection, roughness, diffuse, specular); + +		{ +			//process blend +			float blend_from = (float(sdfgi.probe_axis_size - 1) / 2.0) - 2.5; +			float blend_to = blend_from + 2.0; + +			vec3 inner_pos = cam_pos * sdfgi.cascades[cascade].to_probe; + +			float len = length(inner_pos); + +			inner_pos = abs(normalize(inner_pos)); +			len *= max(inner_pos.x, max(inner_pos.y, inner_pos.z)); + +			if (len >= blend_from) { +				blend = smoothstep(blend_from, blend_to, len); +			} else { +				blend = 0.0; +			} +		} + +		if (blend > 0.0) { +			//blend +			if (cascade == sdfgi.max_cascades - 1) { +				ambient_light.a = 1.0 - blend; +				reflection_light.a = 1.0 - blend; + +			} else { +				vec3 diffuse2, specular2; +				cascade_pos = (cam_pos - sdfgi.cascades[cascade + 1].position) * sdfgi.cascades[cascade + 1].to_probe; +				sdfgi_probe_process(cascade + 1, cascade_pos, cam_pos, cam_normal, reflection, roughness, diffuse2, specular2); +				diffuse = mix(diffuse, diffuse2, blend); +				specular = mix(specular, specular2, blend); +			} +		} + +		ambient_light.rgb = diffuse; +#if 1 +		if (roughness < 0.2) { +			vec3 pos_to_uvw = 1.0 / sdfgi.grid_size; +			vec4 light_accum = vec4(0.0); + +			float blend_size = (sdfgi.grid_size.x / float(sdfgi.probe_axis_size - 1)) * 0.5; + +			float radius_sizes[SDFGI_MAX_CASCADES]; +			cascade = 0xFFFF; + +			float base_distance = length(cam_pos); +			for (uint i = 0; i < sdfgi.max_cascades; i++) { +				radius_sizes[i] = (1.0 / sdfgi.cascades[i].to_cell) * (sdfgi.grid_size.x * 0.5 - blend_size); +				if (cascade == 0xFFFF && base_distance < radius_sizes[i]) { +					cascade = i; +				} +			} + +			cascade = min(cascade, sdfgi.max_cascades - 1); + +			float max_distance = radius_sizes[sdfgi.max_cascades - 1]; +			vec3 ray_pos = cam_pos; +			vec3 ray_dir = reflection; + +			{ +				float prev_radius = cascade > 0 ? radius_sizes[cascade - 1] : 0.0; +				float base_blend = (base_distance - prev_radius) / (radius_sizes[cascade] - prev_radius); +				float bias = (1.0 + base_blend) * 1.1; +				vec3 abs_ray_dir = abs(ray_dir); +				//ray_pos += ray_dir * (bias / sdfgi.cascades[cascade].to_cell); //bias to avoid self occlusion +				ray_pos += (ray_dir * 1.0 / max(abs_ray_dir.x, max(abs_ray_dir.y, abs_ray_dir.z)) + cam_normal * 1.4) * bias / sdfgi.cascades[cascade].to_cell; +			} + +			float softness = 0.2 + min(1.0, roughness * 5.0) * 4.0; //approximation to roughness so it does not seem like a hard fade +			while (length(ray_pos) < max_distance) { +				for (uint i = 0; i < sdfgi.max_cascades; i++) { +					if (i >= cascade && length(ray_pos) < radius_sizes[i]) { +						cascade = max(i, cascade); //never go down + +						vec3 pos = ray_pos - sdfgi.cascades[i].position; +						pos *= sdfgi.cascades[i].to_cell * pos_to_uvw; + +						float distance = texture(sampler3D(sdf_cascades[i], linear_sampler), pos).r * 255.0 - 1.1; + +						vec4 hit_light = vec4(0.0); +						if (distance < softness) { +							hit_light.rgb = texture(sampler3D(light_cascades[i], linear_sampler), pos).rgb; +							hit_light.rgb *= 0.5; //approximation given value read is actually meant for anisotropy +							hit_light.a = clamp(1.0 - (distance / softness), 0.0, 1.0); +							hit_light.rgb *= hit_light.a; +						} + +						distance /= sdfgi.cascades[i].to_cell; + +						if (i < (sdfgi.max_cascades - 1)) { +							pos = ray_pos - sdfgi.cascades[i + 1].position; +							pos *= sdfgi.cascades[i + 1].to_cell * pos_to_uvw; + +							float distance2 = texture(sampler3D(sdf_cascades[i + 1], linear_sampler), pos).r * 255.0 - 1.1; + +							vec4 hit_light2 = vec4(0.0); +							if (distance2 < softness) { +								hit_light2.rgb = texture(sampler3D(light_cascades[i + 1], linear_sampler), pos).rgb; +								hit_light2.rgb *= 0.5; //approximation given value read is actually meant for anisotropy +								hit_light2.a = clamp(1.0 - (distance2 / softness), 0.0, 1.0); +								hit_light2.rgb *= hit_light2.a; +							} + +							float prev_radius = i == 0 ? 0.0 : radius_sizes[i - 1]; +							float blend = clamp((length(ray_pos) - prev_radius) / (radius_sizes[i] - prev_radius), 0.0, 1.0); + +							distance2 /= sdfgi.cascades[i + 1].to_cell; + +							hit_light = mix(hit_light, hit_light2, blend); +							distance = mix(distance, distance2, blend); +						} + +						light_accum += hit_light; +						ray_pos += ray_dir * distance; +						break; +					} +				} + +				if (light_accum.a > 0.99) { +					break; +				} +			} + +			vec3 light = light_accum.rgb / max(light_accum.a, 0.00001); +			float alpha = min(1.0, light_accum.a); + +			float b = min(1.0, roughness * 5.0); + +			float sa = 1.0 - b; + +			reflection_light.a = alpha * sa + b; +			if (reflection_light.a == 0) { +				specular = vec3(0.0); +			} else { +				specular = (light * alpha * sa + specular * b) / reflection_light.a; +			} +		} + +#endif + +		reflection_light.rgb = specular; + +		ambient_light.rgb *= sdfgi.energy; +		reflection_light.rgb *= sdfgi.energy; +	} else { +		ambient_light = vec4(0); +		reflection_light = vec4(0); +	} +} + +//standard voxel cone trace +vec4 voxel_cone_trace(texture3D probe, vec3 cell_size, vec3 pos, vec3 direction, float tan_half_angle, float max_distance, float p_bias) { +	float dist = p_bias; +	vec4 color = vec4(0.0); + +	while (dist < max_distance && color.a < 0.95) { +		float diameter = max(1.0, 2.0 * tan_half_angle * dist); +		vec3 uvw_pos = (pos + dist * direction) * cell_size; +		float half_diameter = diameter * 0.5; +		//check if outside, then break +		if (any(greaterThan(abs(uvw_pos - 0.5), vec3(0.5f + half_diameter * cell_size)))) { +			break; +		} +		vec4 scolor = textureLod(sampler3D(probe, linear_sampler_with_mipmaps), uvw_pos, log2(diameter)); +		float a = (1.0 - color.a); +		color += a * scolor; +		dist += half_diameter; +	} + +	return color; +} + +vec4 voxel_cone_trace_45_degrees(texture3D probe, vec3 cell_size, vec3 pos, vec3 direction, float max_distance, float p_bias) { +	float dist = p_bias; +	vec4 color = vec4(0.0); +	float radius = max(0.5, dist); +	float lod_level = log2(radius * 2.0); + +	while (dist < max_distance && color.a < 0.95) { +		vec3 uvw_pos = (pos + dist * direction) * cell_size; + +		//check if outside, then break +		if (any(greaterThan(abs(uvw_pos - 0.5), vec3(0.5f + radius * cell_size)))) { +			break; +		} +		vec4 scolor = textureLod(sampler3D(probe, linear_sampler_with_mipmaps), uvw_pos, lod_level); +		lod_level += 1.0; + +		float a = (1.0 - color.a); +		scolor *= a; +		color += scolor; +		dist += radius; +		radius = max(0.5, dist); +	} +	return color; +} + +void gi_probe_compute(uint index, vec3 position, vec3 normal, vec3 ref_vec, mat3 normal_xform, float roughness, inout vec4 out_spec, inout vec4 out_diff, inout float out_blend) { +	position = (gi_probes.data[index].xform * vec4(position, 1.0)).xyz; +	ref_vec = normalize((gi_probes.data[index].xform * vec4(ref_vec, 0.0)).xyz); +	normal = normalize((gi_probes.data[index].xform * vec4(normal, 0.0)).xyz); + +	position += normal * gi_probes.data[index].normal_bias; + +	//this causes corrupted pixels, i have no idea why.. +	if (any(bvec2(any(lessThan(position, vec3(0.0))), any(greaterThan(position, gi_probes.data[index].bounds))))) { +		return; +	} + +	mat3 dir_xform = mat3(gi_probes.data[index].xform) * normal_xform; + +	vec3 blendv = abs(position / gi_probes.data[index].bounds * 2.0 - 1.0); +	float blend = clamp(1.0 - max(blendv.x, max(blendv.y, blendv.z)), 0.0, 1.0); +	//float blend=1.0; + +	float max_distance = length(gi_probes.data[index].bounds); +	vec3 cell_size = 1.0 / gi_probes.data[index].bounds; + +	//irradiance + +	vec4 light = vec4(0.0); + +	if (params.high_quality_vct) { +		const uint cone_dir_count = 6; +		vec3 cone_dirs[cone_dir_count] = vec3[]( +				vec3(0.0, 0.0, 1.0), +				vec3(0.866025, 0.0, 0.5), +				vec3(0.267617, 0.823639, 0.5), +				vec3(-0.700629, 0.509037, 0.5), +				vec3(-0.700629, -0.509037, 0.5), +				vec3(0.267617, -0.823639, 0.5)); + +		float cone_weights[cone_dir_count] = float[](0.25, 0.15, 0.15, 0.15, 0.15, 0.15); +		float cone_angle_tan = 0.577; + +		for (uint i = 0; i < cone_dir_count; i++) { +			vec3 dir = normalize(dir_xform * cone_dirs[i]); +			light += cone_weights[i] * voxel_cone_trace(gi_probe_textures[index], cell_size, position, dir, cone_angle_tan, max_distance, gi_probes.data[index].bias); +		} +	} else { +		const uint cone_dir_count = 4; +		vec3 cone_dirs[cone_dir_count] = vec3[]( +				vec3(0.707107, 0.0, 0.707107), +				vec3(0.0, 0.707107, 0.707107), +				vec3(-0.707107, 0.0, 0.707107), +				vec3(0.0, -0.707107, 0.707107)); + +		float cone_weights[cone_dir_count] = float[](0.25, 0.25, 0.25, 0.25); +		for (int i = 0; i < cone_dir_count; i++) { +			vec3 dir = normalize(dir_xform * cone_dirs[i]); +			light += cone_weights[i] * voxel_cone_trace_45_degrees(gi_probe_textures[index], cell_size, position, dir, max_distance, gi_probes.data[index].bias); +		} +	} + +	if (gi_probes.data[index].ambient_occlusion > 0.001) { +		float size = 1.0 + gi_probes.data[index].ambient_occlusion_size * 7.0; + +		float taps, blend; +		blend = modf(size, taps); +		float ao = 0.0; +		for (float i = 1.0; i <= taps; i++) { +			vec3 ofs = (position + normal * (i * 0.5 + 1.0)) * cell_size; +			ao += textureLod(sampler3D(gi_probe_textures[index], linear_sampler_with_mipmaps), ofs, i - 1.0).a * i; +		} + +		if (blend > 0.001) { +			vec3 ofs = (position + normal * ((taps + 1.0) * 0.5 + 1.0)) * cell_size; +			ao += textureLod(sampler3D(gi_probe_textures[index], linear_sampler_with_mipmaps), ofs, taps).a * (taps + 1.0) * blend; +		} + +		ao = 1.0 - min(1.0, ao); + +		light.rgb = mix(params.ao_color, light.rgb, mix(1.0, ao, gi_probes.data[index].ambient_occlusion)); +	} + +	light.rgb *= gi_probes.data[index].dynamic_range; +	if (!gi_probes.data[index].blend_ambient) { +		light.a = 1.0; +	} + +	out_diff += light * blend; + +	//radiance +	vec4 irr_light = voxel_cone_trace(gi_probe_textures[index], cell_size, position, ref_vec, tan(roughness * 0.5 * M_PI * 0.99), max_distance, gi_probes.data[index].bias); +	irr_light.rgb *= gi_probes.data[index].dynamic_range; +	if (!gi_probes.data[index].blend_ambient) { +		irr_light.a = 1.0; +	} + +	out_spec += irr_light * blend; + +	out_blend += blend; +} + +vec4 fetch_normal_and_roughness(ivec2 pos) { +	vec4 normal_roughness = texelFetch(sampler2D(normal_roughness_buffer, linear_sampler), pos, 0); + +	normal_roughness.xyz = normalize(normal_roughness.xyz * 2.0 - 1.0); +	return normal_roughness; +} + +void main() { +	// Pixel being shaded +	ivec2 pos = ivec2(gl_GlobalInvocationID.xy); +	if (any(greaterThanEqual(pos, params.screen_size))) { //too large, do nothing +		return; +	} + +	vec3 vertex = reconstruct_position(pos); +	vertex.y = -vertex.y; + +	vec4 normal_roughness = fetch_normal_and_roughness(pos); +	vec3 normal = normal_roughness.xyz; + +	vec4 ambient_light = vec4(0.0), reflection_light = vec4(0.0); + +	if (normal.length() > 0.5) { +		//valid normal, can do GI +		float roughness = normal_roughness.w; + +		vertex = mat3(params.cam_rotation) * vertex; +		normal = normalize(mat3(params.cam_rotation) * normal); + +		vec3 reflection = normalize(reflect(normalize(vertex), normal)); + +		if (params.use_sdfgi) { +			sdfgi_process(vertex, normal, reflection, roughness, ambient_light, reflection_light); +		} + +		if (params.max_giprobes > 0) { +			uvec2 giprobe_tex = texelFetch(usampler2D(giprobe_buffer, linear_sampler), pos, 0).rg; +			roughness *= roughness; +			//find arbitrary tangent and bitangent, then build a matrix +			vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); +			vec3 tangent = normalize(cross(v0, normal)); +			vec3 bitangent = normalize(cross(tangent, normal)); +			mat3 normal_mat = mat3(tangent, bitangent, normal); + +			vec4 amb_accum = vec4(0.0); +			vec4 spec_accum = vec4(0.0); +			float blend_accum = 0.0; + +			for (uint i = 0; i < params.max_giprobes; i++) { +				if (any(equal(uvec2(i), giprobe_tex))) { +					gi_probe_compute(i, vertex, normal, reflection, normal_mat, roughness, spec_accum, amb_accum, blend_accum); +				} +			} +			if (blend_accum > 0.0) { +				amb_accum /= blend_accum; +				spec_accum /= blend_accum; +			} + +			if (params.use_sdfgi) { +				reflection_light = blend_color(spec_accum, reflection_light); +				ambient_light = blend_color(amb_accum, ambient_light); +			} else { +				reflection_light = spec_accum; +				ambient_light = amb_accum; +			} +		} +	} + +	imageStore(ambient_buffer, pos, ambient_light); +	imageStore(reflection_buffer, pos, reflection_light); +} diff --git a/servers/rendering/rasterizer_rd/shaders/resolve.glsl b/servers/rendering/rasterizer_rd/shaders/resolve.glsl new file mode 100644 index 0000000000..9429a66dc9 --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/resolve.glsl @@ -0,0 +1,110 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#ifdef MODE_RESOLVE_GI +layout(set = 0, binding = 0) uniform sampler2DMS source_depth; +layout(set = 0, binding = 1) uniform sampler2DMS source_normal_roughness; + +layout(r32f, set = 1, binding = 0) uniform restrict writeonly image2D dest_depth; +layout(rgba8, set = 1, binding = 1) uniform restrict writeonly image2D dest_normal_roughness; + +#ifdef GIPROBE_RESOLVE +layout(set = 2, binding = 0) uniform usampler2DMS source_giprobe; +layout(rg8ui, set = 3, binding = 0) uniform restrict writeonly uimage2D dest_giprobe; +#endif + +#endif + +layout(push_constant, binding = 16, std430) uniform Params { +	ivec2 screen_size; +	int sample_count; +	uint pad; +} +params; + +void main() { +	// Pixel being shaded +	ivec2 pos = ivec2(gl_GlobalInvocationID.xy); +	if (any(greaterThanEqual(pos, params.screen_size))) { //too large, do nothing +		return; +	} + +#ifdef MODE_RESOLVE_GI + +	float best_depth = 1e20; +	vec4 best_normal_roughness = vec4(0.0); +#ifdef GIPROBE_RESOLVE +	uvec2 best_giprobe; +#endif + +#if 0 + +	for(int i=0;i<params.sample_count;i++) { +		float depth = texelFetch(source_depth,pos,i).r; +		if (depth < best_depth) { //use the depth closest to camera +			best_depth = depth; +			best_normal_roughness = texelFetch(source_normal_roughness,pos,i); + +#ifdef GIPROBE_RESOLVE +			best_giprobe = texelFetch(source_giprobe,pos,i).rg; +#endif +		} +	} + +#else + +	float depths[16]; +	int depth_indices[16]; +	int depth_amount[16]; +	int depth_count = 0; + +	for (int i = 0; i < params.sample_count; i++) { +		float depth = texelFetch(source_depth, pos, i).r; +		int depth_index = -1; +		for (int j = 0; j < depth_count; j++) { +			if (abs(depths[j] - depth) < 0.000001) { +				depth_index = j; +				break; +			} +		} + +		if (depth_index == -1) { +			depths[depth_count] = depth; +			depth_indices[depth_count] = i; +			depth_amount[depth_count] = 1; +			depth_count += 1; +		} else { +			depth_amount[depth_index] += 1; +		} +	} + +	int depth_least = 0xFFFF; +	int best_index = 0; +	for (int j = 0; j < depth_count; j++) { +		if (depth_amount[j] < depth_least) { +			best_index = depth_indices[j]; +			depth_least = depth_amount[j]; +		} +	} + +	best_depth = texelFetch(source_depth, pos, best_index).r; +	best_normal_roughness = texelFetch(source_normal_roughness, pos, best_index); +#ifdef GIPROBE_RESOLVE +	best_giprobe = texelFetch(source_giprobe, pos, best_index).rg; +#endif + +#endif + +	imageStore(dest_depth, pos, vec4(best_depth)); +	imageStore(dest_normal_roughness, pos, vec4(best_normal_roughness)); +#ifdef GIPROBE_RESOLVE +	imageStore(dest_giprobe, pos, uvec4(best_giprobe, 0, 0)); +#endif + +#endif +} diff --git a/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl b/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl index 9f42b0f814..d6a56b2543 100644 --- a/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl +++ b/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl @@ -258,7 +258,6 @@ VERTEX_SHADER_CODE  		}  	}  #endif -  #ifdef MODE_RENDER_MATERIAL  	if (scene_data.material_uv2_mode) {  		gl_Position.xy = (uv2_attrib.xy + draw_call.bake_uv2_offset) * 2.0 - 1.0; @@ -341,11 +340,13 @@ layout(location = 4) out float depth_output_buffer;  #endif -#ifdef MODE_RENDER_NORMAL -layout(location = 0) out vec4 normal_output_buffer; -#ifdef MODE_RENDER_ROUGHNESS -layout(location = 1) out float roughness_output_buffer; -#endif //MODE_RENDER_ROUGHNESS +#ifdef MODE_RENDER_NORMAL_ROUGHNESS +layout(location = 0) out vec4 normal_roughness_output_buffer; + +#ifdef MODE_RENDER_GIPROBE +layout(location = 1) out uvec2 giprobe_buffer; +#endif +  #endif //MODE_RENDER_NORMAL  #else // RENDER DEPTH @@ -1321,37 +1322,39 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 normal, float roughnes  		reflection_accum += reflection;  	} -#if !defined(USE_LIGHTMAP) && !defined(USE_VOXEL_CONE_TRACING) -	if (reflections.data[ref_index].ambient.a > 0.0) { //compute ambient using skybox +	switch (reflections.data[ref_index].ambient_mode) { +		case REFLECTION_AMBIENT_DISABLED: { +			//do nothing +		} break; +		case REFLECTION_AMBIENT_ENVIRONMENT: { +			//do nothing +			vec3 local_amb_vec = (reflections.data[ref_index].local_matrix * vec4(normal, 0.0)).xyz; -		vec3 local_amb_vec = (reflections.data[ref_index].local_matrix * vec4(normal, 0.0)).xyz; +			vec4 ambient_out; -		vec4 ambient_out; - -		ambient_out.rgb = textureLod(samplerCubeArray(reflection_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(local_amb_vec, reflections.data[ref_index].index), MAX_ROUGHNESS_LOD).rgb; - -		ambient_out.a = blend; -		ambient_out.rgb = mix(reflections.data[ref_index].ambient.rgb, ambient_out.rgb, reflections.data[ref_index].ambient.a); -		if (reflections.data[ref_index].params.z < 0.5) { -			ambient_out.rgb = mix(ambient_light, ambient_out.rgb, blend); -		} +			ambient_out.rgb = textureLod(samplerCubeArray(reflection_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(local_amb_vec, reflections.data[ref_index].index), MAX_ROUGHNESS_LOD).rgb; +			ambient_out.a = blend; +			if (reflections.data[ref_index].params.z < 0.5) { //interior +				ambient_out.rgb = mix(ambient_light, ambient_out.rgb, blend); +			} -		ambient_out.rgb *= ambient_out.a; -		ambient_accum += ambient_out; -	} else { -		vec4 ambient_out; -		ambient_out.a = blend; -		ambient_out.rgb = reflections.data[ref_index].ambient.rgb; -		if (reflections.data[ref_index].params.z < 0.5) { -			ambient_out.rgb = mix(ambient_light, ambient_out.rgb, blend); -		} -		ambient_out.rgb *= ambient_out.a; -		ambient_accum += ambient_out; +			ambient_out.rgb *= ambient_out.a; +			ambient_accum += ambient_out; +		} break; +		case REFLECTION_AMBIENT_COLOR: { +			vec4 ambient_out; +			ambient_out.a = blend; +			ambient_out.rgb = reflections.data[ref_index].ambient; +			if (reflections.data[ref_index].params.z < 0.5) { +				ambient_out.rgb = mix(ambient_light, ambient_out.rgb, blend); +			} +			ambient_out.rgb *= ambient_out.a; +			ambient_accum += ambient_out; +		} break;  	} -#endif //USE_LIGHTMAP or VCT  } -#ifdef USE_VOXEL_CONE_TRACING +#ifdef USE_FORWARD_GI  //standard voxel cone trace  vec4 voxel_cone_trace(texture3D probe, vec3 cell_size, vec3 pos, vec3 direction, float tan_half_angle, float max_distance, float p_bias) { @@ -1375,42 +1378,6 @@ vec4 voxel_cone_trace(texture3D probe, vec3 cell_size, vec3 pos, vec3 direction,  	return color;  } -#ifndef GI_PROBE_HIGH_QUALITY -//faster version for 45 degrees - -#ifdef GI_PROBE_USE_ANISOTROPY - -vec4 voxel_cone_trace_anisotropic_45_degrees(texture3D probe, texture3D aniso_pos, texture3D aniso_neg, vec3 normal, vec3 cell_size, vec3 pos, vec3 direction, float tan_half_angle, float max_distance, float p_bias) { -	float dist = p_bias; -	vec4 color = vec4(0.0); -	float radius = max(0.5, tan_half_angle * dist); -	float lod_level = log2(radius * 2.0); - -	while (dist < max_distance && color.a < 0.95) { -		vec3 uvw_pos = (pos + dist * direction) * cell_size; -		//check if outside, then break -		if (any(greaterThan(abs(uvw_pos - 0.5), vec3(0.5f + radius * cell_size)))) { -			break; -		} - -		vec4 scolor = textureLod(sampler3D(probe, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uvw_pos, lod_level); -		vec3 aniso_neg = textureLod(sampler3D(aniso_neg, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uvw_pos, lod_level).rgb; -		vec3 aniso_pos = textureLod(sampler3D(aniso_pos, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uvw_pos, lod_level).rgb; - -		scolor.rgb *= dot(max(vec3(0.0), (normal * aniso_pos)), vec3(1.0)) + dot(max(vec3(0.0), (-normal * aniso_neg)), vec3(1.0)); -		lod_level += 1.0; - -		float a = (1.0 - color.a); -		scolor *= a; -		color += scolor; -		dist += radius; -		radius = max(0.5, tan_half_angle * dist); -	} - -	return color; -} -#else -  vec4 voxel_cone_trace_45_degrees(texture3D probe, vec3 cell_size, vec3 pos, vec3 direction, float tan_half_angle, float max_distance, float p_bias) {  	float dist = p_bias;  	vec4 color = vec4(0.0); @@ -1437,41 +1404,6 @@ vec4 voxel_cone_trace_45_degrees(texture3D probe, vec3 cell_size, vec3 pos, vec3  	return color;  } -#endif - -#elif defined(GI_PROBE_USE_ANISOTROPY) - -//standard voxel cone trace -vec4 voxel_cone_trace_anisotropic(texture3D probe, texture3D aniso_pos, texture3D aniso_neg, vec3 normal, vec3 cell_size, vec3 pos, vec3 direction, float tan_half_angle, float max_distance, float p_bias) { -	float dist = p_bias; -	vec4 color = vec4(0.0); - -	while (dist < max_distance && color.a < 0.95) { -		float diameter = max(1.0, 2.0 * tan_half_angle * dist); -		vec3 uvw_pos = (pos + dist * direction) * cell_size; -		float half_diameter = diameter * 0.5; -		//check if outside, then break -		if (any(greaterThan(abs(uvw_pos - 0.5), vec3(0.5f + half_diameter * cell_size)))) { -			break; -		} -		float log2_diameter = log2(diameter); -		vec4 scolor = textureLod(sampler3D(probe, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uvw_pos, log2_diameter); -		vec3 aniso_neg = textureLod(sampler3D(aniso_neg, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uvw_pos, log2_diameter).rgb; -		vec3 aniso_pos = textureLod(sampler3D(aniso_pos, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uvw_pos, log2_diameter).rgb; - -		scolor.rgb *= dot(max(vec3(0.0), (normal * aniso_pos)), vec3(1.0)) + dot(max(vec3(0.0), (-normal * aniso_neg)), vec3(1.0)); - -		float a = (1.0 - color.a); -		scolor *= a; -		color += scolor; -		dist += half_diameter; -	} - -	return color; -} - -#endif -  void gi_probe_compute(uint index, vec3 position, vec3 normal, vec3 ref_vec, mat3 normal_xform, float roughness, vec3 ambient, vec3 environment, inout vec4 out_spec, inout vec4 out_diff) {  	position = (gi_probes.data[index].xform * vec4(position, 1.0)).xyz;  	ref_vec = normalize((gi_probes.data[index].xform * vec4(ref_vec, 0.0)).xyz); @@ -1493,31 +1425,6 @@ void gi_probe_compute(uint index, vec3 position, vec3 normal, vec3 ref_vec, mat3  	//radiance -#ifdef GI_PROBE_HIGH_QUALITY - -#define MAX_CONE_DIRS 6 -	vec3 cone_dirs[MAX_CONE_DIRS] = vec3[]( -			vec3(0.0, 0.0, 1.0), -			vec3(0.866025, 0.0, 0.5), -			vec3(0.267617, 0.823639, 0.5), -			vec3(-0.700629, 0.509037, 0.5), -			vec3(-0.700629, -0.509037, 0.5), -			vec3(0.267617, -0.823639, 0.5)); - -	float cone_weights[MAX_CONE_DIRS] = float[](0.25, 0.15, 0.15, 0.15, 0.15, 0.15); -	float cone_angle_tan = 0.577; - -#elif defined(GI_PROBE_LOW_QUALITY) - -#define MAX_CONE_DIRS 1 - -	vec3 cone_dirs[MAX_CONE_DIRS] = vec3[]( -			vec3(0.0, 0.0, 1.0)); - -	float cone_weights[MAX_CONE_DIRS] = float[](1.0); -	float cone_angle_tan = 4; //~76 degrees -#else // MEDIUM QUALITY -  #define MAX_CONE_DIRS 4  	vec3 cone_dirs[MAX_CONE_DIRS] = vec3[]( @@ -1529,31 +1436,13 @@ void gi_probe_compute(uint index, vec3 position, vec3 normal, vec3 ref_vec, mat3  	float cone_weights[MAX_CONE_DIRS] = float[](0.25, 0.25, 0.25, 0.25);  	float cone_angle_tan = 0.98269; -#endif  	vec3 light = vec3(0.0);  	for (int i = 0; i < MAX_CONE_DIRS; i++) {  		vec3 dir = normalize((gi_probes.data[index].xform * vec4(normal_xform * cone_dirs[i], 0.0)).xyz); -#if defined(GI_PROBE_HIGH_QUALITY) || defined(GI_PROBE_LOW_QUALITY) - -#ifdef GI_PROBE_USE_ANISOTROPY -		vec4 cone_light = voxel_cone_trace_anisotropic(gi_probe_textures[gi_probes.data[index].texture_slot], gi_probe_textures[gi_probes.data[index].texture_slot + 1], gi_probe_textures[gi_probes.data[index].texture_slot + 2], normalize(mix(dir, normal, gi_probes.data[index].anisotropy_strength)), cell_size, position, dir, cone_angle_tan, max_distance, gi_probes.data[index].bias); -#else - -		vec4 cone_light = voxel_cone_trace(gi_probe_textures[gi_probes.data[index].texture_slot], cell_size, position, dir, cone_angle_tan, max_distance, gi_probes.data[index].bias); - -#endif // GI_PROBE_USE_ANISOTROPY - -#else - -#ifdef GI_PROBE_USE_ANISOTROPY -		vec4 cone_light = voxel_cone_trace_anisotropic_45_degrees(gi_probe_textures[gi_probes.data[index].texture_slot], gi_probe_textures[gi_probes.data[index].texture_slot + 1], gi_probe_textures[gi_probes.data[index].texture_slot + 2], normalize(mix(dir, normal, gi_probes.data[index].anisotropy_strength)), cell_size, position, dir, cone_angle_tan, max_distance, gi_probes.data[index].bias); -#else -		vec4 cone_light = voxel_cone_trace_45_degrees(gi_probe_textures[gi_probes.data[index].texture_slot], cell_size, position, dir, cone_angle_tan, max_distance, gi_probes.data[index].bias); -#endif // GI_PROBE_USE_ANISOTROPY +		vec4 cone_light = voxel_cone_trace_45_degrees(gi_probe_textures[index], cell_size, position, dir, cone_angle_tan, max_distance, gi_probes.data[index].bias); -#endif  		if (gi_probes.data[index].blend_ambient) {  			cone_light.rgb = mix(ambient, cone_light.rgb, min(1.0, cone_light.a / 0.95));  		} @@ -1562,33 +1451,10 @@ void gi_probe_compute(uint index, vec3 position, vec3 normal, vec3 ref_vec, mat3  	}  	light *= gi_probes.data[index].dynamic_range; - -	if (gi_probes.data[index].ambient_occlusion > 0.001) { -		float size = 1.0 + gi_probes.data[index].ambient_occlusion_size * 7.0; - -		float taps, blend; -		blend = modf(size, taps); -		float ao = 0.0; -		for (float i = 1.0; i <= taps; i++) { -			vec3 ofs = (position + normal * (i * 0.5 + 1.0)) * cell_size; -			ao += textureLod(sampler3D(gi_probe_textures[gi_probes.data[index].texture_slot], material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), ofs, i - 1.0).a * i; -		} - -		if (blend > 0.001) { -			vec3 ofs = (position + normal * ((taps + 1.0) * 0.5 + 1.0)) * cell_size; -			ao += textureLod(sampler3D(gi_probe_textures[gi_probes.data[index].texture_slot], material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), ofs, taps).a * (taps + 1.0) * blend; -		} - -		ao = 1.0 - min(1.0, ao); - -		light = mix(scene_data.ao_color.rgb, light, mix(1.0, ao, gi_probes.data[index].ambient_occlusion)); -	} -  	out_diff += vec4(light * blend, blend);  	//irradiance -#ifndef GI_PROBE_LOW_QUALITY -	vec4 irr_light = voxel_cone_trace(gi_probe_textures[gi_probes.data[index].texture_slot], cell_size, position, ref_vec, tan(roughness * 0.5 * M_PI * 0.99), max_distance, gi_probes.data[index].bias); +	vec4 irr_light = voxel_cone_trace(gi_probe_textures[index], cell_size, position, ref_vec, tan(roughness * 0.5 * M_PI * 0.99), max_distance, gi_probes.data[index].bias);  	if (gi_probes.data[index].blend_ambient) {  		irr_light.rgb = mix(environment, irr_light.rgb, min(1.0, irr_light.a / 0.95));  	} @@ -1596,10 +1462,142 @@ void gi_probe_compute(uint index, vec3 position, vec3 normal, vec3 ref_vec, mat3  	//irr_light=vec3(0.0);  	out_spec += vec4(irr_light.rgb * blend, blend); -#endif  } -#endif //USE_VOXEL_CONE_TRACING +#endif //USE_FORWARD_GI + +vec2 octahedron_wrap(vec2 v) { +	vec2 signVal; +	signVal.x = v.x >= 0.0 ? 1.0 : -1.0; +	signVal.y = v.y >= 0.0 ? 1.0 : -1.0; +	return (1.0 - abs(v.yx)) * signVal; +} + +vec2 octahedron_encode(vec3 n) { +	// https://twitter.com/Stubbesaurus/status/937994790553227264 +	n /= (abs(n.x) + abs(n.y) + abs(n.z)); +	n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy); +	n.xy = n.xy * 0.5 + 0.5; +	return n.xy; +} + +void sdfgi_process(uint cascade, vec3 cascade_pos, vec3 cam_pos, vec3 cam_normal, vec3 cam_specular_normal, bool use_specular, float roughness, out vec3 diffuse_light, out vec3 specular_light, out float blend) { +	cascade_pos += cam_normal * sdfgi.normal_bias; + +	vec3 base_pos = floor(cascade_pos); +	//cascade_pos += mix(vec3(0.0),vec3(0.01),lessThan(abs(cascade_pos-base_pos),vec3(0.01))) * cam_normal; +	ivec3 probe_base_pos = ivec3(base_pos); + +	vec4 diffuse_accum = vec4(0.0); +	vec3 specular_accum; + +	ivec3 tex_pos = ivec3(probe_base_pos.xy, int(cascade)); +	tex_pos.x += probe_base_pos.z * sdfgi.probe_axis_size; +	tex_pos.xy = tex_pos.xy * (SDFGI_OCT_SIZE + 2) + ivec2(1); + +	vec3 diffuse_posf = (vec3(tex_pos) + vec3(octahedron_encode(cam_normal) * float(SDFGI_OCT_SIZE), 0.0)) * sdfgi.lightprobe_tex_pixel_size; + +	vec3 specular_posf; + +	if (use_specular) { +		specular_accum = vec3(0.0); +		specular_posf = (vec3(tex_pos) + vec3(octahedron_encode(cam_specular_normal) * float(SDFGI_OCT_SIZE), 0.0)) * sdfgi.lightprobe_tex_pixel_size; +	} + +	vec4 light_accum = vec4(0.0); +	float weight_accum = 0.0; + +	for (uint j = 0; j < 8; j++) { +		ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); +		ivec3 probe_posi = probe_base_pos; +		probe_posi += offset; + +		// Compute weight + +		vec3 probe_pos = vec3(probe_posi); +		vec3 probe_to_pos = cascade_pos - probe_pos; +		vec3 probe_dir = normalize(-probe_to_pos); + +		vec3 trilinear = vec3(1.0) - abs(probe_to_pos); +		float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(cam_normal, probe_dir)); + +		// Compute lightprobe occlusion + +		if (sdfgi.use_occlusion) { +			ivec3 occ_indexv = abs((sdfgi.cascades[cascade].probe_world_offset + probe_posi) & ivec3(1, 1, 1)) * ivec3(1, 2, 4); +			vec4 occ_mask = mix(vec4(0.0), vec4(1.0), equal(ivec4(occ_indexv.x | occ_indexv.y), ivec4(0, 1, 2, 3))); + +			vec3 occ_pos = clamp(cascade_pos, probe_pos - sdfgi.occlusion_clamp, probe_pos + sdfgi.occlusion_clamp) * sdfgi.probe_to_uvw; +			occ_pos.z += float(cascade); +			if (occ_indexv.z != 0) { //z bit is on, means index is >=4, so make it switch to the other half of textures +				occ_pos.x += 1.0; +			} + +			occ_pos *= sdfgi.occlusion_renormalize; +			float occlusion = dot(textureLod(sampler3D(sdfgi_occlusion_cascades, material_samplers[SAMPLER_LINEAR_CLAMP]), occ_pos, 0.0), occ_mask); + +			weight *= max(occlusion, 0.01); +		} + +		// Compute lightprobe texture position + +		vec3 diffuse; +		vec3 pos_uvw = diffuse_posf; +		pos_uvw.xy += vec2(offset.xy) * sdfgi.lightprobe_uv_offset.xy; +		pos_uvw.x += float(offset.z) * sdfgi.lightprobe_uv_offset.z; +		diffuse = textureLod(sampler2DArray(sdfgi_lightprobe_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), pos_uvw, 0.0).rgb; + +		diffuse_accum += vec4(diffuse * weight, weight); + +		if (use_specular) { +			vec3 specular = vec3(0.0); +			vec3 pos_uvw = specular_posf; +			pos_uvw.xy += vec2(offset.xy) * sdfgi.lightprobe_uv_offset.xy; +			pos_uvw.x += float(offset.z) * sdfgi.lightprobe_uv_offset.z; +			if (roughness < 0.99) { +				specular = textureLod(sampler2DArray(sdfgi_lightprobe_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), pos_uvw + vec3(0, 0, float(sdfgi.max_cascades)), 0.0).rgb; +			} +			if (roughness > 0.5) { +				specular = mix(specular, textureLod(sampler2DArray(sdfgi_lightprobe_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), pos_uvw, 0.0).rgb, (roughness - 0.5) * 2.0); +			} + +			specular_accum += specular * weight; +		} +	} + +	if (diffuse_accum.a > 0.0) { +		diffuse_accum.rgb /= diffuse_accum.a; +	} + +	diffuse_light = diffuse_accum.rgb; + +	if (use_specular) { +		if (diffuse_accum.a > 0.0) { +			specular_accum /= diffuse_accum.a; +		} + +		specular_light = specular_accum; +	} + +	{ +		//process blend +		float blend_from = (float(sdfgi.probe_axis_size - 1) / 2.0) - 2.5; +		float blend_to = blend_from + 2.0; + +		vec3 inner_pos = cam_pos * sdfgi.cascades[cascade].to_probe; + +		float len = length(inner_pos); + +		inner_pos = abs(normalize(inner_pos)); +		len *= max(inner_pos.x, max(inner_pos.y, inner_pos.z)); + +		if (len >= blend_from) { +			blend = smoothstep(blend_from, blend_to, len); +		} else { +			blend = 0.0; +		} +	} +}  #endif //!defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) @@ -1812,6 +1810,15 @@ FRAGMENT_SHADER_CODE  #endif //not render depth  	/////////////////////// LIGHTING ////////////////////////////// +	if (scene_data.roughness_limiter_enabled) { +		//http://www.jp.square-enix.com/tech/library/pdf/ImprovedGeometricSpecularAA.pdf +		float roughness2 = roughness * roughness; +		vec3 dndu = dFdx(normal), dndv = dFdx(normal); +		float variance = scene_data.roughness_limiter_amount * (dot(dndu, dndu) + dot(dndv, dndv)); +		float kernelRoughness2 = min(2.0 * variance, scene_data.roughness_limiter_limit); //limit effect +		float filteredRoughness2 = min(1.0, roughness2 + kernelRoughness2); +		roughness = sqrt(filteredRoughness2); +	}  	//apply energy conservation  	vec3 specular_light = vec3(0.0, 0.0, 0.0); @@ -1820,11 +1827,6 @@ FRAGMENT_SHADER_CODE  #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) -	if (scene_data.roughness_limiter_enabled) { -		float limit = texelFetch(sampler2D(roughness_buffer, material_samplers[SAMPLER_NEAREST_CLAMP]), ivec2(gl_FragCoord.xy), 0).r; -		roughness = max(roughness, limit); -	} -  	if (scene_data.use_reflection_cubemap) {  		vec3 ref_vec = reflect(-view, normal);  		ref_vec = scene_data.radiance_inverse_xform * ref_vec; @@ -1871,7 +1873,6 @@ FRAGMENT_SHADER_CODE  #endif  #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) -	//gi probes  #ifdef USE_LIGHTMAP @@ -1928,10 +1929,80 @@ FRAGMENT_SHADER_CODE  			ambient_light += textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw, 0.0).rgb;  		}  	} -#endif -	//lightmap capture +#elif defined(USE_FORWARD_GI) + +	if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture + +		//make vertex orientation the world one, but still align to camera +		vec3 cam_pos = mat3(scene_data.camera_matrix) * vertex; +		vec3 cam_normal = mat3(scene_data.camera_matrix) * normal; +		vec3 cam_reflection = mat3(scene_data.camera_matrix) * reflect(-view, normal); + +		//apply y-mult +		cam_pos.y *= sdfgi.y_mult; +		cam_normal.y *= sdfgi.y_mult; +		cam_normal = normalize(cam_normal); +		cam_reflection.y *= sdfgi.y_mult; +		cam_normal = normalize(cam_normal); +		cam_reflection = normalize(cam_reflection); + +		vec4 light_accum = vec4(0.0); +		float weight_accum = 0.0; + +		vec4 light_blend_accum = vec4(0.0); +		float weight_blend_accum = 0.0; + +		float blend = -1.0; + +		// helper constants, compute once + +		uint cascade = 0xFFFFFFFF; +		vec3 cascade_pos; +		vec3 cascade_normal; + +		for (uint i = 0; i < sdfgi.max_cascades; i++) { +			cascade_pos = (cam_pos - sdfgi.cascades[i].position) * sdfgi.cascades[i].to_probe; + +			if (any(lessThan(cascade_pos, vec3(0.0))) || any(greaterThanEqual(cascade_pos, sdfgi.cascade_probe_size))) { +				continue; //skip cascade +			} + +			cascade = i; +			break; +		} + +		if (cascade < SDFGI_MAX_CASCADES) { +			bool use_specular = true; +			float blend; +			vec3 diffuse, specular; +			sdfgi_process(cascade, cascade_pos, cam_pos, cam_normal, cam_reflection, use_specular, roughness, diffuse, specular, blend); + +			if (blend > 0.0) { +				//blend +				if (cascade == sdfgi.max_cascades - 1) { +					diffuse = mix(diffuse, ambient_light, blend); +					if (use_specular) { +						specular = mix(specular, specular_light, blend); +					} +				} else { +					vec3 diffuse2, specular2; +					float blend2; +					cascade_pos = (cam_pos - sdfgi.cascades[cascade + 1].position) * sdfgi.cascades[cascade + 1].to_probe; +					sdfgi_process(cascade + 1, cascade_pos, cam_pos, cam_normal, cam_reflection, use_specular, roughness, diffuse2, specular2, blend2); +					diffuse = mix(diffuse, diffuse2, blend); +					if (use_specular) { +						specular = mix(specular, specular2, blend); +					} +				} +			} + +			ambient_light = diffuse; +			if (use_specular) { +				specular_light = specular; +			} +		} +	} -#ifdef USE_VOXEL_CONE_TRACING  	if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes  		uint index1 = instances.data[instance_index].gi_offset & 0xFFFF; @@ -1963,6 +2034,56 @@ FRAGMENT_SHADER_CODE  		specular_light = spec_accum.rgb;  		ambient_light = amb_accum.rgb;  	} +#else +	if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers + +		ivec2 coord; + +		if (scene_data.gi_upscale_for_msaa) { +			/* +			//find the closest depth to upscale from, based on neighbours +			ivec2 base_coord = ivec2(gl_FragCoord.xy); +			float z_dist = gl_FragCoord.z; +			ivec2 closest_coord = base_coord; +			float closest_z_dist = abs(texelFetch(sampler2D(depth_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), base_coord,0).r-z_dist); + +			for(int i=0;i<4;i++) { +				const ivec2 neighbours[4]=ivec2[](ivec2(-1,0),ivec2(1,0),ivec2(0,-1),ivec2(0,1)); +				ivec2 neighbour_coord = base_coord + neighbours[i]; +				float neighbour_z_dist = abs(texelFetch(sampler2D(depth_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), neighbour_coord,0).r-z_dist); +				if (neighbour_z_dist < closest_z_dist) { +					closest_z_dist = neighbour_z_dist; +					closest_coord = neighbour_coord; +				} +			} + +*/ +			ivec2 base_coord = ivec2(gl_FragCoord.xy); +			ivec2 closest_coord = base_coord; +			float closest_ang = dot(normal, texelFetch(sampler2D(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), base_coord, 0).xyz * 2.0 - 1.0); + +			for (int i = 0; i < 4; i++) { +				const ivec2 neighbours[4] = ivec2[](ivec2(-1, 0), ivec2(1, 0), ivec2(0, -1), ivec2(0, 1)); +				ivec2 neighbour_coord = base_coord + neighbours[i]; +				float neighbour_ang = dot(normal, texelFetch(sampler2D(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), neighbour_coord, 0).xyz * 2.0 - 1.0); +				if (neighbour_ang > closest_ang) { +					closest_ang = neighbour_ang; +					closest_coord = neighbour_coord; +				} +			} + +			coord = closest_coord; + +		} else { +			coord = ivec2(gl_FragCoord.xy); +		} + +		vec4 buffer_ambient = texelFetch(sampler2D(ambient_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), coord, 0); +		vec4 buffer_reflection = texelFetch(sampler2D(reflection_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), coord, 0); + +		ambient_light = mix(ambient_light, buffer_ambient.rgb, buffer_ambient.a); +		specular_light = mix(specular_light, buffer_reflection.rgb, buffer_reflection.a); +	}  #endif  	{ // process reflections @@ -2376,6 +2497,97 @@ FRAGMENT_SHADER_CODE  #ifdef MODE_RENDER_DEPTH +#ifdef MODE_RENDER_SDF + +	{ +		vec3 local_pos = (scene_data.sdf_to_bounds * vec4(vertex, 1.0)).xyz; +		ivec3 grid_pos = scene_data.sdf_offset + ivec3(local_pos * vec3(scene_data.sdf_size)); + +		uint albedo16 = 0x1; //solid flag +		albedo16 |= clamp(uint(albedo.r * 31.0), 0, 31) << 11; +		albedo16 |= clamp(uint(albedo.g * 31.0), 0, 31) << 6; +		albedo16 |= clamp(uint(albedo.b * 31.0), 0, 31) << 1; + +		imageStore(albedo_volume_grid, grid_pos, uvec4(albedo16)); + +		uint facing_bits = 0; +		const vec3 aniso_dir[6] = vec3[]( +				vec3(1, 0, 0), +				vec3(0, 1, 0), +				vec3(0, 0, 1), +				vec3(-1, 0, 0), +				vec3(0, -1, 0), +				vec3(0, 0, -1)); + +		vec3 cam_normal = mat3(scene_data.camera_matrix) * normalize(normal_interp); + +		float closest_dist = -1e20; + +		for (uint i = 0; i < 6; i++) { +			float d = dot(cam_normal, aniso_dir[i]); +			if (d > closest_dist) { +				closest_dist = d; +				facing_bits = (1 << i); +			} +		} + +		imageAtomicOr(geom_facing_grid, grid_pos, facing_bits); //store facing bits + +		if (length(emission) > 0.001) { +			float lumas[6]; +			vec3 light_total = vec3(0); + +			for (int i = 0; i < 6; i++) { +				float strength = max(0.0, dot(cam_normal, aniso_dir[i])); +				vec3 light = emission * strength; +				light_total += light; +				lumas[i] = max(light.r, max(light.g, light.b)); +			} + +			float luma_total = max(light_total.r, max(light_total.g, light_total.b)); + +			uint light_aniso = 0; + +			for (int i = 0; i < 6; i++) { +				light_aniso |= min(31, uint((lumas[i] / luma_total) * 31.0)) << (i * 5); +			} + +			//compress to RGBE9995 to save space + +			const float pow2to9 = 512.0f; +			const float B = 15.0f; +			const float N = 9.0f; +			const float LN2 = 0.6931471805599453094172321215; + +			float cRed = clamp(light_total.r, 0.0, 65408.0); +			float cGreen = clamp(light_total.g, 0.0, 65408.0); +			float cBlue = clamp(light_total.b, 0.0, 65408.0); + +			float cMax = max(cRed, max(cGreen, cBlue)); + +			float expp = max(-B - 1.0f, floor(log(cMax) / LN2)) + 1.0f + B; + +			float sMax = floor((cMax / pow(2.0f, expp - B - N)) + 0.5f); + +			float exps = expp + 1.0f; + +			if (0.0 <= sMax && sMax < pow2to9) { +				exps = expp; +			} + +			float sRed = floor((cRed / pow(2.0f, exps - B - N)) + 0.5f); +			float sGreen = floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f); +			float sBlue = floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f); +			//store as 8985 to have 2 extra neighbour bits +			uint light_rgbe = ((uint(sRed) & 0x1FF) >> 1) | ((uint(sGreen) & 0x1FF) << 8) | (((uint(sBlue) & 0x1FF) >> 1) << 17) | ((uint(exps) & 0x1F) << 25); + +			imageStore(emission_grid, grid_pos, uvec4(light_rgbe)); +			imageStore(emission_aniso_grid, grid_pos, uvec4(light_aniso)); +		} +	} + +#endif +  #ifdef MODE_RENDER_MATERIAL  	albedo_output_buffer.rgb = albedo; @@ -2398,11 +2610,21 @@ FRAGMENT_SHADER_CODE  	emission_output_buffer.a = 0.0;  #endif -#ifdef MODE_RENDER_NORMAL -	normal_output_buffer = vec4(normal * 0.5 + 0.5, 0.0); -#ifdef MODE_RENDER_ROUGHNESS -	roughness_output_buffer = roughness; -#endif //MODE_RENDER_ROUGHNESS +#ifdef MODE_RENDER_NORMAL_ROUGHNESS +	normal_roughness_output_buffer = vec4(normal * 0.5 + 0.5, roughness); + +#ifdef MODE_RENDER_GIPROBE +	if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes +		uint index1 = instances.data[instance_index].gi_offset & 0xFFFF; +		uint index2 = instances.data[instance_index].gi_offset >> 16; +		giprobe_buffer.x = index1 & 0xFF; +		giprobe_buffer.y = index2 & 0xFF; +	} else { +		giprobe_buffer.x = 0xFF; +		giprobe_buffer.y = 0xFF; +	} +#endif +  #endif //MODE_RENDER_NORMAL  //nothing happens, so a tree-ssa optimizer will result in no fragment shader :) @@ -2455,7 +2677,6 @@ FRAGMENT_SHADER_CODE  #endif  	diffuse_buffer = vec4(emission + diffuse_light + ambient_light, sss_strength);  	specular_buffer = vec4(specular_light, metallic); -  #endif  #else //MODE_MULTIPLE_RENDER_TARGETS diff --git a/servers/rendering/rasterizer_rd/shaders/scene_high_end_inc.glsl b/servers/rendering/rasterizer_rd/shaders/scene_high_end_inc.glsl index 1cac12406a..1244599097 100644 --- a/servers/rendering/rasterizer_rd/shaders/scene_high_end_inc.glsl +++ b/servers/rendering/rasterizer_rd/shaders/scene_high_end_inc.glsl @@ -1,6 +1,8 @@  #define M_PI 3.14159265359  #define ROUGHNESS_MAX_LOD 5 +#define MAX_GI_PROBES 8 +  layout(push_constant, binding = 0, std430) uniform DrawCall {  	uint instance_index;  	uint pad; //16 bits minimum size @@ -27,6 +29,8 @@ layout(set = 0, binding = 1) uniform sampler material_samplers[12];  layout(set = 0, binding = 2) uniform sampler shadow_sampler; +#define SDFGI_MAX_CASCADES 8 +  layout(set = 0, binding = 3, std140) uniform SceneData {  	mat4 projection_matrix;  	mat4 inv_projection_matrix; @@ -76,11 +80,19 @@ layout(set = 0, binding = 3, std140) uniform SceneData {  	float ssao_ao_affect;  	bool roughness_limiter_enabled; +	float roughness_limiter_amount; +	float roughness_limiter_limit; +	uvec2 roughness_limiter_pad; +  	vec4 ao_color; + +	mat4 sdf_to_bounds; + +	ivec3 sdf_offset;  	bool material_uv2_mode; -	uint pad_material0; -	uint pad_material1; -	uint pad_material2; + +	ivec3 sdf_size; +	bool gi_upscale_for_msaa;  #if 0  	vec4 ambient_light_color; @@ -120,6 +132,8 @@ layout(set = 0, binding = 3, std140) uniform SceneData {  scene_data; +#define INSTANCE_FLAGS_USE_GI_BUFFERS (1 << 6) +#define INSTANCE_FLAGS_USE_SDFGI (1 << 7)  #define INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE (1 << 8)  #define INSTANCE_FLAGS_USE_LIGHTMAP (1 << 9)  #define INSTANCE_FLAGS_USE_SH_LIGHTMAP (1 << 10) @@ -175,13 +189,18 @@ layout(set = 0, binding = 5, std430) restrict readonly buffer Lights {  }  lights; +#define REFLECTION_AMBIENT_DISABLED 0 +#define REFLECTION_AMBIENT_ENVIRONMENT 1 +#define REFLECTION_AMBIENT_COLOR 2 +  struct ReflectionData {  	vec3 box_extents;  	float index;  	vec3 box_offset;  	uint mask;  	vec4 params; // intensity, 0, interior , boxproject -	vec4 ambient; // ambient color, energy +	vec3 ambient; // ambient color +	uint ambient_mode;  	mat4 local_matrix; // up to here for spot and omni, rest is for directional  	// notes: for ambientblend, use distance to edge to blend between already existing global environment  }; @@ -229,29 +248,6 @@ layout(set = 0, binding = 7, std140) uniform DirectionalLights {  }  directional_lights; -struct GIProbeData { -	mat4 xform; -	vec3 bounds; -	float dynamic_range; - -	float bias; -	float normal_bias; -	bool blend_ambient; -	uint texture_slot; - -	float anisotropy_strength; -	float ambient_occlusion; -	float ambient_occlusion_size; -	uint pad2; -}; - -layout(set = 0, binding = 8, std140) uniform GIProbes { -	GIProbeData data[MAX_GI_PROBES]; -} -gi_probes; - -layout(set = 0, binding = 9) uniform texture3D gi_probe_textures[MAX_GI_PROBE_TEXTURES]; -  #define LIGHTMAP_FLAG_USE_DIRECTION 1  #define LIGHTMAP_FLAG_USE_SPECULAR_DIRECTION 2 @@ -319,6 +315,41 @@ layout(set = 0, binding = 19, std430) restrict readonly buffer GlobalVariableDat  }  global_variables; +struct SDFGIProbeCascadeData { +	vec3 position; +	float to_probe; +	ivec3 probe_world_offset; +	float to_cell; // 1/bounds * grid_size +}; + +layout(set = 0, binding = 20, std140) uniform SDFGI { +	vec3 grid_size; +	uint max_cascades; + +	bool use_occlusion; +	int probe_axis_size; +	float probe_to_uvw; +	float normal_bias; + +	vec3 lightprobe_tex_pixel_size; +	float energy; + +	vec3 lightprobe_uv_offset; +	float y_mult; + +	vec3 occlusion_clamp; +	uint pad3; + +	vec3 occlusion_renormalize; +	uint pad4; + +	vec3 cascade_probe_size; +	uint pad5; + +	SDFGIProbeCascadeData cascades[SDFGI_MAX_CASCADES]; +} +sdfgi; +  // decal atlas  /* Set 1, Radiance */ @@ -339,13 +370,57 @@ layout(set = 2, binding = 0) uniform textureCubeArray reflection_atlas;  layout(set = 2, binding = 1) uniform texture2D shadow_atlas; +layout(set = 2, binding = 2) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; +  /* Set 3, Render Buffers */ +#ifdef MODE_RENDER_SDF + +layout(r16ui, set = 3, binding = 0) uniform restrict writeonly uimage3D albedo_volume_grid; +layout(r32ui, set = 3, binding = 1) uniform restrict writeonly uimage3D emission_grid; +layout(r32ui, set = 3, binding = 2) uniform restrict writeonly uimage3D emission_aniso_grid; +layout(r32ui, set = 3, binding = 3) uniform restrict uimage3D geom_facing_grid; + +//still need to be present for shaders that use it, so remap them to something +#define depth_buffer shadow_atlas +#define color_buffer shadow_atlas +#define normal_roughness_buffer shadow_atlas + +#else +  layout(set = 3, binding = 0) uniform texture2D depth_buffer;  layout(set = 3, binding = 1) uniform texture2D color_buffer; -layout(set = 3, binding = 2) uniform texture2D normal_buffer; -layout(set = 3, binding = 3) uniform texture2D roughness_buffer; +layout(set = 3, binding = 2) uniform texture2D normal_roughness_buffer;  layout(set = 3, binding = 4) uniform texture2D ao_buffer; +layout(set = 3, binding = 5) uniform texture2D ambient_buffer; +layout(set = 3, binding = 6) uniform texture2D reflection_buffer; + +layout(set = 3, binding = 7) uniform texture2DArray sdfgi_lightprobe_texture; + +layout(set = 3, binding = 8) uniform texture3D sdfgi_occlusion_cascades; + +struct GIProbeData { +	mat4 xform; +	vec3 bounds; +	float dynamic_range; + +	float bias; +	float normal_bias; +	bool blend_ambient; +	uint texture_slot; + +	float anisotropy_strength; +	float ambient_occlusion; +	float ambient_occlusion_size; +	uint pad2; +}; + +layout(set = 3, binding = 9, std140) uniform GIProbes { +	GIProbeData data[MAX_GI_PROBES]; +} +gi_probes; + +#endif  /* Set 4 Skeleton & Instancing (Multimesh) */ diff --git a/servers/rendering/rasterizer_rd/shaders/screen_space_reflection.glsl b/servers/rendering/rasterizer_rd/shaders/screen_space_reflection.glsl index 084f28d932..a8ee33a664 100644 --- a/servers/rendering/rasterizer_rd/shaders/screen_space_reflection.glsl +++ b/servers/rendering/rasterizer_rd/shaders/screen_space_reflection.glsl @@ -12,11 +12,8 @@ layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly image2D ssr_ima  #ifdef MODE_ROUGH  layout(r8, set = 1, binding = 1) uniform restrict writeonly image2D blur_radius_image;  #endif -layout(rgba8, set = 2, binding = 0) uniform restrict readonly image2D source_normal; +layout(rgba8, set = 2, binding = 0) uniform restrict readonly image2D source_normal_roughness;  layout(set = 3, binding = 0) uniform sampler2D source_metallic; -#ifdef MODE_ROUGH -layout(set = 3, binding = 1) uniform sampler2D source_roughness; -#endif  layout(push_constant, binding = 2, std430) uniform Params {  	vec4 proj_info; @@ -75,7 +72,8 @@ void main() {  	// World space point being shaded  	vec3 vertex = reconstructCSPosition(uv * vec2(params.screen_size), base_depth); -	vec3 normal = imageLoad(source_normal, ssC).xyz * 2.0 - 1.0; +	vec4 normal_roughness = imageLoad(source_normal_roughness, ssC); +	vec3 normal = normal_roughness.xyz * 2.0 - 1.0;  	normal = normalize(normal);  	normal.y = -normal.y; //because this code reads flipped @@ -208,7 +206,7 @@ void main() {  		// if roughness is enabled, do screen space cone tracing  		float blur_radius = 0.0; -		float roughness = texelFetch(source_roughness, ssC << 1, 0).r; +		float roughness = normal_roughness.w;  		if (roughness > 0.001) {  			float cone_angle = min(roughness, 0.999) * M_PI * 0.5; diff --git a/servers/rendering/rasterizer_rd/shaders/sdfgi_debug.glsl b/servers/rendering/rasterizer_rd/shaders/sdfgi_debug.glsl new file mode 100644 index 0000000000..813ea29fa1 --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/sdfgi_debug.glsl @@ -0,0 +1,275 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#define MAX_CASCADES 8 + +layout(set = 0, binding = 1) uniform texture3D sdf_cascades[MAX_CASCADES]; +layout(set = 0, binding = 2) uniform texture3D light_cascades[MAX_CASCADES]; +layout(set = 0, binding = 3) uniform texture3D aniso0_cascades[MAX_CASCADES]; +layout(set = 0, binding = 4) uniform texture3D aniso1_cascades[MAX_CASCADES]; +layout(set = 0, binding = 5) uniform texture3D occlusion_texture; + +layout(set = 0, binding = 8) uniform sampler linear_sampler; + +struct CascadeData { +	vec3 offset; //offset of (0,0,0) in world coordinates +	float to_cell; // 1/bounds * grid_size +	ivec3 probe_world_offset; +	uint pad; +}; + +layout(set = 0, binding = 9, std140) uniform Cascades { +	CascadeData data[MAX_CASCADES]; +} +cascades; + +layout(rgba16f, set = 0, binding = 10) uniform restrict writeonly image2D screen_buffer; + +layout(set = 0, binding = 11) uniform texture2DArray lightprobe_texture; + +layout(push_constant, binding = 0, std430) uniform Params { +	vec3 grid_size; +	uint max_cascades; + +	ivec2 screen_size; +	bool use_occlusion; +	float y_mult; + +	vec3 cam_extent; +	int probe_axis_size; + +	mat4 cam_transform; +} +params; + +vec3 linear_to_srgb(vec3 color) { +	//if going to srgb, clamp from 0 to 1. +	color = clamp(color, vec3(0.0), vec3(1.0)); +	const vec3 a = vec3(0.055f); +	return mix((vec3(1.0f) + a) * pow(color.rgb, vec3(1.0f / 2.4f)) - a, 12.92f * color.rgb, lessThan(color.rgb, vec3(0.0031308f))); +} + +vec2 octahedron_wrap(vec2 v) { +	vec2 signVal; +	signVal.x = v.x >= 0.0 ? 1.0 : -1.0; +	signVal.y = v.y >= 0.0 ? 1.0 : -1.0; +	return (1.0 - abs(v.yx)) * signVal; +} + +vec2 octahedron_encode(vec3 n) { +	// https://twitter.com/Stubbesaurus/status/937994790553227264 +	n /= (abs(n.x) + abs(n.y) + abs(n.z)); +	n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy); +	n.xy = n.xy * 0.5 + 0.5; +	return n.xy; +} + +void main() { +	// Pixel being shaded +	ivec2 screen_pos = ivec2(gl_GlobalInvocationID.xy); +	if (any(greaterThanEqual(screen_pos, params.screen_size))) { //too large, do nothing +		return; +	} + +	vec3 ray_pos; +	vec3 ray_dir; +	{ +		ray_pos = params.cam_transform[3].xyz; + +		ray_dir.xy = params.cam_extent.xy * ((vec2(screen_pos) / vec2(params.screen_size)) * 2.0 - 1.0); +		ray_dir.z = params.cam_extent.z; + +		ray_dir = normalize(mat3(params.cam_transform) * ray_dir); +	} + +	ray_pos.y *= params.y_mult; +	ray_dir.y *= params.y_mult; +	ray_dir = normalize(ray_dir); + +	vec3 pos_to_uvw = 1.0 / params.grid_size; + +	vec3 light = vec3(0.0); +	float blend = 0.0; + +#if 1 +	vec3 inv_dir = 1.0 / ray_dir; + +	float rough = 0.5; +	bool hit = false; + +	for (uint i = 0; i < params.max_cascades; i++) { +		//convert to local bounds +		vec3 pos = ray_pos - cascades.data[i].offset; +		pos *= cascades.data[i].to_cell; + +		// Should never happen for debug, since we start mostly at the bounds center, +		// but add anyway. +		//if (any(lessThan(pos,vec3(0.0))) || any(greaterThanEqual(pos,params.grid_size))) { +		//	continue; //already past bounds for this cascade, goto next +		//} + +		//find maximum advance distance (until reaching bounds) +		vec3 t0 = -pos * inv_dir; +		vec3 t1 = (params.grid_size - pos) * inv_dir; +		vec3 tmax = max(t0, t1); +		float max_advance = min(tmax.x, min(tmax.y, tmax.z)); + +		float advance = 0.0; +		vec3 uvw; +		hit = false; + +		while (advance < max_advance) { +			//read how much to advance from SDF +			uvw = (pos + ray_dir * advance) * pos_to_uvw; + +			float distance = texture(sampler3D(sdf_cascades[i], linear_sampler), uvw).r * 255.0 - 1.7; + +			if (distance < 0.001) { +				//consider hit +				hit = true; +				break; +			} + +			advance += distance; +		} + +		if (!hit) { +			pos += ray_dir * min(advance, max_advance); +			pos /= cascades.data[i].to_cell; +			pos += cascades.data[i].offset; +			ray_pos = pos; +			continue; +		} + +		//compute albedo, emission and normal at hit point + +		const float EPSILON = 0.001; +		vec3 hit_normal = normalize(vec3( +				texture(sampler3D(sdf_cascades[i], linear_sampler), uvw + vec3(EPSILON, 0.0, 0.0)).r - texture(sampler3D(sdf_cascades[i], linear_sampler), uvw - vec3(EPSILON, 0.0, 0.0)).r, +				texture(sampler3D(sdf_cascades[i], linear_sampler), uvw + vec3(0.0, EPSILON, 0.0)).r - texture(sampler3D(sdf_cascades[i], linear_sampler), uvw - vec3(0.0, EPSILON, 0.0)).r, +				texture(sampler3D(sdf_cascades[i], linear_sampler), uvw + vec3(0.0, 0.0, EPSILON)).r - texture(sampler3D(sdf_cascades[i], linear_sampler), uvw - vec3(0.0, 0.0, EPSILON)).r)); + +		vec3 hit_light = texture(sampler3D(light_cascades[i], linear_sampler), uvw).rgb; +		vec4 aniso0 = texture(sampler3D(aniso0_cascades[i], linear_sampler), uvw); +		vec3 hit_aniso0 = aniso0.rgb; +		vec3 hit_aniso1 = vec3(aniso0.a, texture(sampler3D(aniso1_cascades[i], linear_sampler), uvw).rg); + +		hit_light *= (dot(max(vec3(0.0), (hit_normal * hit_aniso0)), vec3(1.0)) + dot(max(vec3(0.0), (-hit_normal * hit_aniso1)), vec3(1.0))); + +		if (blend > 0.0) { +			light = mix(light, hit_light, blend); +			blend = 0.0; +		} else { +			light = hit_light; + +			//process blend +			float blend_from = (float(params.probe_axis_size - 1) / 2.0) - 2.5; +			float blend_to = blend_from + 2.0; + +			vec3 cam_pos = params.cam_transform[3].xyz - cascades.data[i].offset; +			cam_pos *= cascades.data[i].to_cell; + +			pos += ray_dir * min(advance, max_advance); +			vec3 inner_pos = pos - cam_pos; + +			inner_pos = inner_pos * float(params.probe_axis_size - 1) / params.grid_size.x; + +			float len = length(inner_pos); + +			inner_pos = abs(normalize(inner_pos)); +			len *= max(inner_pos.x, max(inner_pos.y, inner_pos.z)); + +			if (len >= blend_from) { +				blend = smoothstep(blend_from, blend_to, len); + +				pos /= cascades.data[i].to_cell; +				pos += cascades.data[i].offset; +				ray_pos = pos; +				hit = false; //continue trace for blend + +				continue; +			} +		} + +		break; +	} + +	light = mix(light, vec3(0.0), blend); + +#else + +	vec3 inv_dir = 1.0 / ray_dir; + +	bool hit = false; +	vec4 light_accum = vec4(0.0); + +	float blend_size = (params.grid_size.x / float(params.probe_axis_size - 1)) * 0.5; + +	float radius_sizes[MAX_CASCADES]; +	for (uint i = 0; i < params.max_cascades; i++) { +		radius_sizes[i] = (1.0 / cascades.data[i].to_cell) * (params.grid_size.x * 0.5 - blend_size); +	} + +	float max_distance = radius_sizes[params.max_cascades - 1]; +	float advance = 0; +	while (advance < max_distance) { +		for (uint i = 0; i < params.max_cascades; i++) { +			if (advance < radius_sizes[i]) { +				vec3 pos = (ray_pos + ray_dir * advance) - cascades.data[i].offset; +				pos *= cascades.data[i].to_cell * pos_to_uvw; + +				float distance = texture(sampler3D(sdf_cascades[i], linear_sampler), pos).r * 255.0 - 1.0; + +				vec4 hit_light = vec4(0.0); +				if (distance < 1.0) { +					hit_light.a = max(0.0, 1.0 - distance); +					hit_light.rgb = texture(sampler3D(light_cascades[i], linear_sampler), pos).rgb; +					hit_light.rgb *= hit_light.a; +				} + +				distance /= cascades.data[i].to_cell; + +				if (i < (params.max_cascades - 1)) { +					pos = (ray_pos + ray_dir * advance) - cascades.data[i + 1].offset; +					pos *= cascades.data[i + 1].to_cell * pos_to_uvw; + +					float distance2 = texture(sampler3D(sdf_cascades[i + 1], linear_sampler), pos).r * 255.0 - 1.0; + +					vec4 hit_light2 = vec4(0.0); +					if (distance2 < 1.0) { +						hit_light2.a = max(0.0, 1.0 - distance2); +						hit_light2.rgb = texture(sampler3D(light_cascades[i + 1], linear_sampler), pos).rgb; +						hit_light2.rgb *= hit_light2.a; +					} + +					float prev_radius = i == 0 ? 0.0 : radius_sizes[i - 1]; +					float blend = (advance - prev_radius) / (radius_sizes[i] - prev_radius); + +					distance2 /= cascades.data[i + 1].to_cell; + +					hit_light = mix(hit_light, hit_light2, blend); +					distance = mix(distance, distance2, blend); +				} + +				light_accum += hit_light; +				advance += distance; +				break; +			} +		} + +		if (light_accum.a > 0.98) { +			break; +		} +	} + +	light = light_accum.rgb / light_accum.a; + +#endif + +	imageStore(screen_buffer, screen_pos, vec4(linear_to_srgb(light), 1.0)); +} diff --git a/servers/rendering/rasterizer_rd/shaders/sdfgi_debug_probes.glsl b/servers/rendering/rasterizer_rd/shaders/sdfgi_debug_probes.glsl new file mode 100644 index 0000000000..08da283dad --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/sdfgi_debug_probes.glsl @@ -0,0 +1,231 @@ +#[vertex] + +#version 450 + +VERSION_DEFINES + +#define MAX_CASCADES 8 + +layout(push_constant, binding = 0, std430) uniform Params { +	mat4 projection; + +	uint band_power; +	uint sections_in_band; +	uint band_mask; +	float section_arc; + +	vec3 grid_size; +	uint cascade; + +	uint pad; +	float y_mult; +	uint probe_debug_index; +	int probe_axis_size; +} +params; + +// http://in4k.untergrund.net/html_articles/hugi_27_-_coding_corner_polaris_sphere_tessellation_101.htm + +vec3 get_sphere_vertex(uint p_vertex_id) { +	float x_angle = float(p_vertex_id & 1u) + (p_vertex_id >> params.band_power); + +	float y_angle = +			float((p_vertex_id & params.band_mask) >> 1) + ((p_vertex_id >> params.band_power) * params.sections_in_band); + +	x_angle *= params.section_arc * 0.5f; // remember - 180AA x rot not 360 +	y_angle *= -params.section_arc; + +	vec3 point = vec3(sin(x_angle) * sin(y_angle), cos(x_angle), sin(x_angle) * cos(y_angle)); + +	return point; +} + +#ifdef MODE_PROBES + +layout(location = 0) out vec3 normal_interp; +layout(location = 1) out flat uint probe_index; + +#endif + +#ifdef MODE_VISIBILITY + +layout(location = 0) out float visibility; + +#endif + +struct CascadeData { +	vec3 offset; //offset of (0,0,0) in world coordinates +	float to_cell; // 1/bounds * grid_size +	ivec3 probe_world_offset; +	uint pad; +}; + +layout(set = 0, binding = 1, std140) uniform Cascades { +	CascadeData data[MAX_CASCADES]; +} +cascades; + +layout(set = 0, binding = 4) uniform texture3D occlusion_texture; +layout(set = 0, binding = 3) uniform sampler linear_sampler; + +void main() { +#ifdef MODE_PROBES +	probe_index = gl_InstanceIndex; + +	normal_interp = get_sphere_vertex(gl_VertexIndex); + +	vec3 vertex = normal_interp * 0.2; + +	float probe_cell_size = float(params.grid_size / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; + +	ivec3 probe_cell; +	probe_cell.x = int(probe_index % params.probe_axis_size); +	probe_cell.y = int(probe_index / (params.probe_axis_size * params.probe_axis_size)); +	probe_cell.z = int((probe_index / params.probe_axis_size) % params.probe_axis_size); + +	vertex += (cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size) / vec3(1.0, params.y_mult, 1.0); + +	gl_Position = params.projection * vec4(vertex, 1.0); +#endif + +#ifdef MODE_VISIBILITY + +	int probe_index = int(params.probe_debug_index); + +	vec3 vertex = get_sphere_vertex(gl_VertexIndex) * 0.01; + +	float probe_cell_size = float(params.grid_size / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; + +	ivec3 probe_cell; +	probe_cell.x = int(probe_index % params.probe_axis_size); +	probe_cell.y = int((probe_index % (params.probe_axis_size * params.probe_axis_size)) / params.probe_axis_size); +	probe_cell.z = int(probe_index / (params.probe_axis_size * params.probe_axis_size)); + +	vertex += (cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size) / vec3(1.0, params.y_mult, 1.0); + +	int probe_voxels = int(params.grid_size.x) / int(params.probe_axis_size - 1); +	int occluder_index = int(gl_InstanceIndex); + +	int diameter = probe_voxels * 2; +	ivec3 occluder_pos; +	occluder_pos.x = int(occluder_index % diameter); +	occluder_pos.y = int(occluder_index / (diameter * diameter)); +	occluder_pos.z = int((occluder_index / diameter) % diameter); + +	float cell_size = 1.0 / cascades.data[params.cascade].to_cell; + +	ivec3 occluder_offset = occluder_pos - ivec3(diameter / 2); +	vertex += ((vec3(occluder_offset) + vec3(0.5)) * cell_size) / vec3(1.0, params.y_mult, 1.0); + +	ivec3 global_cell = probe_cell + cascades.data[params.cascade].probe_world_offset; +	uint occlusion_layer = 0; +	if ((global_cell.x & 1) != 0) { +		occlusion_layer |= 1; +	} +	if ((global_cell.y & 1) != 0) { +		occlusion_layer |= 2; +	} +	if ((global_cell.z & 1) != 0) { +		occlusion_layer |= 4; +	} +	ivec3 tex_pos = probe_cell * probe_voxels + occluder_offset; + +	const vec4 layer_axis[4] = vec4[]( +			vec4(1, 0, 0, 0), +			vec4(0, 1, 0, 0), +			vec4(0, 0, 1, 0), +			vec4(0, 0, 0, 1)); + +	tex_pos.z += int(params.cascade) * int(params.grid_size); +	if (occlusion_layer >= 4) { +		tex_pos.x += int(params.grid_size.x); +		occlusion_layer &= 3; +	} + +	visibility = dot(texelFetch(sampler3D(occlusion_texture, linear_sampler), tex_pos, 0), layer_axis[occlusion_layer]); + +	gl_Position = params.projection * vec4(vertex, 1.0); + +#endif +} + +#[fragment] + +#version 450 + +VERSION_DEFINES + +layout(location = 0) out vec4 frag_color; + +layout(set = 0, binding = 2) uniform texture2DArray lightprobe_texture; +layout(set = 0, binding = 3) uniform sampler linear_sampler; + +layout(push_constant, binding = 0, std430) uniform Params { +	mat4 projection; + +	uint band_power; +	uint sections_in_band; +	uint band_mask; +	float section_arc; + +	vec3 grid_size; +	uint cascade; + +	uint pad; +	float y_mult; +	uint probe_debug_index; +	int probe_axis_size; +} +params; + +#ifdef MODE_PROBES + +layout(location = 0) in vec3 normal_interp; +layout(location = 1) in flat uint probe_index; + +#endif + +#ifdef MODE_VISIBILITY +layout(location = 0) in float visibility; +#endif + +vec2 octahedron_wrap(vec2 v) { +	vec2 signVal; +	signVal.x = v.x >= 0.0 ? 1.0 : -1.0; +	signVal.y = v.y >= 0.0 ? 1.0 : -1.0; +	return (1.0 - abs(v.yx)) * signVal; +} + +vec2 octahedron_encode(vec3 n) { +	// https://twitter.com/Stubbesaurus/status/937994790553227264 +	n /= (abs(n.x) + abs(n.y) + abs(n.z)); +	n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy); +	n.xy = n.xy * 0.5 + 0.5; +	return n.xy; +} + +void main() { +#ifdef MODE_PROBES + +	ivec3 tex_pos; +	tex_pos.x = int(probe_index) % params.probe_axis_size; //x +	tex_pos.y = int(probe_index) / (params.probe_axis_size * params.probe_axis_size); +	tex_pos.x += params.probe_axis_size * ((int(probe_index) / params.probe_axis_size) % params.probe_axis_size); //z +	tex_pos.z = int(params.cascade); + +	vec3 tex_pos_ofs = vec3(octahedron_encode(normal_interp) * float(OCT_SIZE), 0.0); +	vec3 tex_posf = vec3(vec2(tex_pos.xy * (OCT_SIZE + 2) + ivec2(1)), float(tex_pos.z)) + tex_pos_ofs; + +	tex_posf.xy /= vec2(ivec2(params.probe_axis_size * params.probe_axis_size * (OCT_SIZE + 2), params.probe_axis_size * (OCT_SIZE + 2))); + +	vec4 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), tex_posf, 0.0); + +	frag_color = indirect_light; + +#endif + +#ifdef MODE_VISIBILITY + +	frag_color = vec4(vec3(1, visibility, visibility), 1.0); +#endif +} diff --git a/servers/rendering/rasterizer_rd/shaders/sdfgi_direct_light.glsl b/servers/rendering/rasterizer_rd/shaders/sdfgi_direct_light.glsl new file mode 100644 index 0000000000..c4b29216d5 --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/sdfgi_direct_light.glsl @@ -0,0 +1,472 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +#define MAX_CASCADES 8 + +layout(set = 0, binding = 1) uniform texture3D sdf_cascades[MAX_CASCADES]; +layout(set = 0, binding = 2) uniform sampler linear_sampler; + +layout(set = 0, binding = 3, std430) restrict readonly buffer DispatchData { +	uint x; +	uint y; +	uint z; +	uint total_count; +} +dispatch_data; + +struct ProcessVoxel { +	uint position; //xyz 7 bit packed, extra 11 bits for neigbours +	uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neibhbours +	uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbous +	uint light_aniso; //55555 light anisotropy, extra 2 bits for neighbours +	//total neighbours: 26 +}; + +#ifdef MODE_PROCESS_STATIC +layout(set = 0, binding = 4, std430) restrict buffer ProcessVoxels { +#else +layout(set = 0, binding = 4, std430) restrict buffer readonly ProcessVoxels { +#endif +	ProcessVoxel data[]; +} +process_voxels; + +layout(r32ui, set = 0, binding = 5) uniform restrict uimage3D dst_light; +layout(rgba8, set = 0, binding = 6) uniform restrict image3D dst_aniso0; +layout(rg8, set = 0, binding = 7) uniform restrict image3D dst_aniso1; + +struct CascadeData { +	vec3 offset; //offset of (0,0,0) in world coordinates +	float to_cell; // 1/bounds * grid_size +	ivec3 probe_world_offset; +	uint pad; +}; + +layout(set = 0, binding = 8, std140) uniform Cascades { +	CascadeData data[MAX_CASCADES]; +} +cascades; + +#define LIGHT_TYPE_DIRECTIONAL 0 +#define LIGHT_TYPE_OMNI 1 +#define LIGHT_TYPE_SPOT 2 + +struct Light { +	vec3 color; +	float energy; + +	vec3 direction; +	bool has_shadow; + +	vec3 position; +	float attenuation; + +	uint type; +	float spot_angle; +	float spot_attenuation; +	float radius; + +	vec4 shadow_color; +}; + +layout(set = 0, binding = 9, std140) buffer restrict readonly Lights { +	Light data[]; +} +lights; + +layout(set = 0, binding = 10) uniform texture2DArray lightprobe_texture; + +layout(push_constant, binding = 0, std430) uniform Params { +	vec3 grid_size; +	uint max_cascades; + +	uint cascade; +	uint light_count; +	uint process_offset; +	uint process_increment; + +	int probe_axis_size; +	bool multibounce; +	float y_mult; +	uint pad; +} +params; + +vec2 octahedron_wrap(vec2 v) { +	vec2 signVal; +	signVal.x = v.x >= 0.0 ? 1.0 : -1.0; +	signVal.y = v.y >= 0.0 ? 1.0 : -1.0; +	return (1.0 - abs(v.yx)) * signVal; +} + +vec2 octahedron_encode(vec3 n) { +	// https://twitter.com/Stubbesaurus/status/937994790553227264 +	n /= (abs(n.x) + abs(n.y) + abs(n.z)); +	n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy); +	n.xy = n.xy * 0.5 + 0.5; +	return n.xy; +} + +void main() { +	uint voxel_index = uint(gl_GlobalInvocationID.x); + +	//used for skipping voxels every N frames +	voxel_index = params.process_offset + voxel_index * params.process_increment; + +	if (voxel_index >= dispatch_data.total_count) { +		return; +	} + +	uint voxel_position = process_voxels.data[voxel_index].position; + +	//keep for storing to texture +	ivec3 positioni = ivec3((uvec3(voxel_position, voxel_position, voxel_position) >> uvec3(0, 7, 14)) & uvec3(0x7F)); + +	vec3 position = vec3(positioni) + vec3(0.5); +	position /= cascades.data[params.cascade].to_cell; +	position += cascades.data[params.cascade].offset; + +	uint voxel_albedo = process_voxels.data[voxel_index].albedo; + +	vec3 albedo = vec3(uvec3(voxel_albedo >> 10, voxel_albedo >> 5, voxel_albedo) & uvec3(0x1F)) / float(0x1F); +	vec3 light_accum[6]; + +	uint valid_aniso = (voxel_albedo >> 15) & 0x3F; + +	{ +		uint rgbe = process_voxels.data[voxel_index].light; + +		//read rgbe8985 +		float r = float((rgbe & 0xff) << 1); +		float g = float((rgbe >> 8) & 0x1ff); +		float b = float(((rgbe >> 17) & 0xff) << 1); +		float e = float((rgbe >> 25) & 0x1F); +		float m = pow(2.0, e - 15.0 - 9.0); + +		vec3 l = vec3(r, g, b) * m; + +		uint aniso = process_voxels.data[voxel_index].light_aniso; +		for (uint i = 0; i < 6; i++) { +			float strength = ((aniso >> (i * 5)) & 0x1F) / float(0x1F); +			light_accum[i] = l * strength; +		} +	} + +	const vec3 aniso_dir[6] = vec3[]( +			vec3(1, 0, 0), +			vec3(0, 1, 0), +			vec3(0, 0, 1), +			vec3(-1, 0, 0), +			vec3(0, -1, 0), +			vec3(0, 0, -1)); + +	// Raytrace light + +	vec3 pos_to_uvw = 1.0 / params.grid_size; +	vec3 uvw_ofs = pos_to_uvw * 0.5; + +	for (uint i = 0; i < params.light_count; i++) { +		float attenuation = 1.0; +		vec3 direction; +		float light_distance = 1e20; + +		switch (lights.data[i].type) { +			case LIGHT_TYPE_DIRECTIONAL: { +				direction = -lights.data[i].direction; +			} break; +			case LIGHT_TYPE_OMNI: { +				vec3 rel_vec = lights.data[i].position - position; +				direction = normalize(rel_vec); +				light_distance = length(rel_vec); +				rel_vec.y /= params.y_mult; +				attenuation = pow(clamp(1.0 - length(rel_vec) / lights.data[i].radius, 0.0, 1.0), lights.data[i].attenuation); +			} break; +			case LIGHT_TYPE_SPOT: { +				vec3 rel_vec = lights.data[i].position - position; +				direction = normalize(rel_vec); +				light_distance = length(rel_vec); +				rel_vec.y /= params.y_mult; +				attenuation = pow(clamp(1.0 - length(rel_vec) / lights.data[i].radius, 0.0, 1.0), lights.data[i].attenuation); + +				float angle = acos(dot(normalize(rel_vec), -lights.data[i].direction)); +				if (angle > lights.data[i].spot_angle) { +					attenuation = 0.0; +				} else { +					float d = clamp(angle / lights.data[i].spot_angle, 0, 1); +					attenuation *= pow(1.0 - d, lights.data[i].spot_attenuation); +				} +			} break; +		} + +		if (attenuation < 0.001) { +			continue; +		} + +		bool hit = false; + +		vec3 ray_pos = position; +		vec3 ray_dir = direction; +		vec3 inv_dir = 1.0 / ray_dir; + +		//this is how to properly bias outgoing rays +		float cell_size = 1.0 / cascades.data[params.cascade].to_cell; +		ray_pos += sign(direction) * cell_size * 0.48; // go almost to the box edge but remain inside +		ray_pos += ray_dir * 0.4 * cell_size; //apply a small bias from there + +		for (uint j = params.cascade; j < params.max_cascades; j++) { +			//convert to local bounds +			vec3 pos = ray_pos - cascades.data[j].offset; +			pos *= cascades.data[j].to_cell; +			float local_distance = light_distance * cascades.data[j].to_cell; + +			if (any(lessThan(pos, vec3(0.0))) || any(greaterThanEqual(pos, params.grid_size))) { +				continue; //already past bounds for this cascade, goto next +			} + +			//find maximum advance distance (until reaching bounds) +			vec3 t0 = -pos * inv_dir; +			vec3 t1 = (params.grid_size - pos) * inv_dir; +			vec3 tmax = max(t0, t1); +			float max_advance = min(tmax.x, min(tmax.y, tmax.z)); + +			max_advance = min(local_distance, max_advance); + +			float advance = 0.0; +			float occlusion = 1.0; + +			while (advance < max_advance) { +				//read how much to advance from SDF +				vec3 uvw = (pos + ray_dir * advance) * pos_to_uvw; + +				float distance = texture(sampler3D(sdf_cascades[j], linear_sampler), uvw).r * 255.0 - 1.0; +				if (distance < 0.001) { +					//consider hit +					hit = true; +					break; +				} + +				occlusion = min(occlusion, distance); + +				advance += distance; +			} + +			if (hit) { +				attenuation *= occlusion; +				break; +			} + +			if (advance >= local_distance) { +				break; //past light distance, abandon search +			} +			//change ray origin to collision with bounds +			pos += ray_dir * max_advance; +			pos /= cascades.data[j].to_cell; +			pos += cascades.data[j].offset; +			light_distance -= max_advance / cascades.data[j].to_cell; +			ray_pos = pos; +		} + +		if (!hit) { +			vec3 light = albedo * lights.data[i].color.rgb * lights.data[i].energy * attenuation; + +			for (int j = 0; j < 6; j++) { +				if (bool(valid_aniso & (1 << j))) { +					light_accum[j] += max(0.0, dot(aniso_dir[j], direction)) * light; +				} +			} +		} +	} + +	// Add indirect light + +	if (params.multibounce) { +		vec3 pos = (vec3(positioni) + vec3(0.5)) * float(params.probe_axis_size - 1) / params.grid_size; +		ivec3 probe_base_pos = ivec3(pos); + +		vec4 probe_accum[6] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); +		float weight_accum[6] = float[](0, 0, 0, 0, 0, 0); + +		ivec3 tex_pos = ivec3(probe_base_pos.xy, int(params.cascade)); +		tex_pos.x += probe_base_pos.z * int(params.probe_axis_size); + +		tex_pos.xy = tex_pos.xy * (OCT_SIZE + 2) + ivec2(1); + +		vec3 base_tex_posf = vec3(tex_pos); +		vec2 tex_pixel_size = 1.0 / vec2(ivec2((OCT_SIZE + 2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE + 2) * params.probe_axis_size)); +		vec3 probe_uv_offset = (ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx; + +		for (uint j = 0; j < 8; j++) { +			ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); +			ivec3 probe_posi = probe_base_pos; +			probe_posi += offset; + +			// Compute weight + +			vec3 probe_pos = vec3(probe_posi); +			vec3 probe_to_pos = pos - probe_pos; +			vec3 probe_dir = normalize(-probe_to_pos); + +			// Compute lightprobe texture position + +			vec3 trilinear = vec3(1.0) - abs(probe_to_pos); + +			for (uint k = 0; k < 6; k++) { +				if (bool(valid_aniso & (1 << k))) { +					vec3 n = aniso_dir[k]; +					float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(n, probe_dir)); + +					vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0); +					tex_posf.xy *= tex_pixel_size; + +					vec3 pos_uvw = tex_posf; +					pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy; +					pos_uvw.x += float(offset.z) * probe_uv_offset.z; +					vec4 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0); + +					probe_accum[k] += indirect_light * weight; +					weight_accum[k] += weight; +				} +			} +		} + +		for (uint k = 0; k < 6; k++) { +			if (weight_accum[k] > 0.0) { +				light_accum[k] += probe_accum[k].rgb * albedo / weight_accum[k]; +			} +		} +	} + +	// Store the light in the light texture + +	float lumas[6]; +	vec3 light_total = vec3(0); + +	for (int i = 0; i < 6; i++) { +		light_total += light_accum[i]; +		lumas[i] = max(light_accum[i].r, max(light_accum[i].g, light_accum[i].b)); +	} + +	float luma_total = max(light_total.r, max(light_total.g, light_total.b)); + +	uint light_total_rgbe; + +	{ +		//compress to RGBE9995 to save space + +		const float pow2to9 = 512.0f; +		const float B = 15.0f; +		const float N = 9.0f; +		const float LN2 = 0.6931471805599453094172321215; + +		float cRed = clamp(light_total.r, 0.0, 65408.0); +		float cGreen = clamp(light_total.g, 0.0, 65408.0); +		float cBlue = clamp(light_total.b, 0.0, 65408.0); + +		float cMax = max(cRed, max(cGreen, cBlue)); + +		float expp = max(-B - 1.0f, floor(log(cMax) / LN2)) + 1.0f + B; + +		float sMax = floor((cMax / pow(2.0f, expp - B - N)) + 0.5f); + +		float exps = expp + 1.0f; + +		if (0.0 <= sMax && sMax < pow2to9) { +			exps = expp; +		} + +		float sRed = floor((cRed / pow(2.0f, exps - B - N)) + 0.5f); +		float sGreen = floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f); +		float sBlue = floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f); +#ifdef MODE_PROCESS_STATIC +		//since its self-save, use RGBE8985 +		light_total_rgbe = ((uint(sRed) & 0x1FF) >> 1) | ((uint(sGreen) & 0x1FF) << 8) | (((uint(sBlue) & 0x1FF) >> 1) << 17) | ((uint(exps) & 0x1F) << 25); + +#else +		light_total_rgbe = (uint(sRed) & 0x1FF) | ((uint(sGreen) & 0x1FF) << 9) | ((uint(sBlue) & 0x1FF) << 18) | ((uint(exps) & 0x1F) << 27); +#endif +	} + +#ifdef MODE_PROCESS_DYNAMIC + +	vec4 aniso0; +	aniso0.r = lumas[0] / luma_total; +	aniso0.g = lumas[1] / luma_total; +	aniso0.b = lumas[2] / luma_total; +	aniso0.a = lumas[3] / luma_total; + +	vec2 aniso1; +	aniso1.r = lumas[4] / luma_total; +	aniso1.g = lumas[5] / luma_total; + +	//save to 3D textures +	imageStore(dst_aniso0, positioni, aniso0); +	imageStore(dst_aniso1, positioni, vec4(aniso1, 0.0, 0.0)); +	imageStore(dst_light, positioni, uvec4(light_total_rgbe)); + +	//also fill neighbours, so light interpolation during the indirect pass works + +	//recover the neighbour list from the leftover bits +	uint neighbours = (voxel_albedo >> 21) | ((voxel_position >> 21) << 11) | ((process_voxels.data[voxel_index].light >> 30) << 22) | ((process_voxels.data[voxel_index].light_aniso >> 30) << 24); + +	const uint max_neighbours = 26; +	const ivec3 neighbour_positions[max_neighbours] = ivec3[]( +			ivec3(-1, -1, -1), +			ivec3(-1, -1, 0), +			ivec3(-1, -1, 1), +			ivec3(-1, 0, -1), +			ivec3(-1, 0, 0), +			ivec3(-1, 0, 1), +			ivec3(-1, 1, -1), +			ivec3(-1, 1, 0), +			ivec3(-1, 1, 1), +			ivec3(0, -1, -1), +			ivec3(0, -1, 0), +			ivec3(0, -1, 1), +			ivec3(0, 0, -1), +			ivec3(0, 0, 1), +			ivec3(0, 1, -1), +			ivec3(0, 1, 0), +			ivec3(0, 1, 1), +			ivec3(1, -1, -1), +			ivec3(1, -1, 0), +			ivec3(1, -1, 1), +			ivec3(1, 0, -1), +			ivec3(1, 0, 0), +			ivec3(1, 0, 1), +			ivec3(1, 1, -1), +			ivec3(1, 1, 0), +			ivec3(1, 1, 1)); + +	for (uint i = 0; i < max_neighbours; i++) { +		if (bool(neighbours & (1 << i))) { +			ivec3 neighbour_pos = positioni + neighbour_positions[i]; +			imageStore(dst_light, neighbour_pos, uvec4(light_total_rgbe)); +			imageStore(dst_aniso0, neighbour_pos, aniso0); +			imageStore(dst_aniso1, neighbour_pos, vec4(aniso1, 0.0, 0.0)); +		} +	} + +#endif + +#ifdef MODE_PROCESS_STATIC + +	//save back the anisotropic + +	uint light = process_voxels.data[voxel_index].light & (3 << 30); +	light |= light_total_rgbe; +	process_voxels.data[voxel_index].light = light; //replace + +	uint light_aniso = process_voxels.data[voxel_index].light_aniso & (3 << 30); +	for (int i = 0; i < 6; i++) { +		light_aniso |= min(31, uint((lumas[i] / luma_total) * 31.0)) << (i * 5); +	} + +	process_voxels.data[voxel_index].light_aniso = light_aniso; + +#endif +} diff --git a/servers/rendering/rasterizer_rd/shaders/sdfgi_fields.glsl b/servers/rendering/rasterizer_rd/shaders/sdfgi_fields.glsl new file mode 100644 index 0000000000..eec0a90c0d --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/sdfgi_fields.glsl @@ -0,0 +1,182 @@ +/* clang-format off */ +[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = OCT_RES, local_size_y = OCT_RES, local_size_z = 1) in; + +/* clang-format on */ + +#define MAX_CASCADES 8 + +layout(rgba16f, set = 0, binding = 1) uniform restrict image2DArray irradiance_texture; +layout(rg16f, set = 0, binding = 2) uniform restrict image2DArray depth_texture; + +ayout(rgba32ui, set = 0, binding = 3) uniform restrict uimage2DArray irradiance_history_texture; +layout(rg32ui, set = 0, binding = 4) uniform restrict uimage2DArray depth_history_texture; + +struct CascadeData { +	vec3 offset; //offset of (0,0,0) in world coordinates +	float to_cell; // 1/bounds * grid_size +}; + +layout(set = 0, binding = 5, std140) uniform Cascades { +	CascadeData data[MAX_CASCADES]; +} +cascades; + +#define DEPTH_HISTORY_BITS 24 +#define IRRADIANCE_HISTORY_BITS 16 + +layout(push_constant, binding = 0, std430) uniform Params { +	vec3 grid_size; +	uint max_cascades; + +	uint probe_axis_size; +	uint cascade; +	uint history_size; +	uint pad0; + +	ivec3 scroll; //scroll in probes +	uint pad1; +} +params; + +void main() { +	ivec2 local = ivec2(gl_LocalInvocationID.xy); +	ivec2 probe = ivec2(gl_WorkGroupID.xy); + +	ivec3 probe_cell; +	probe_cell.x = probe.x % int(params.probe_axis_size); +	probe_cell.y = probe.y; +	probe_cell.z = probe.x / int(params.probe_axis_size); + +#ifdef MODE_SCROLL_BEGIN + +	ivec3 read_cell = probe_cell - params.scroll; + +	uint src_layer = (params.history_size + 1) * params.cascade; +	uint dst_layer = (params.history_size + 1) * params.max_cascades; + +	for (uint i = 0; i <= params.history_size; i++) { +		ivec3 write_pos = ivec3(probe * OCT_RES + local, int(i)); + +		if (any(lessThan(read_pos, ivec3(0))) || any(greaterThanEqual(read_pos, ivec3(params.probe_axis_size)))) { +			// nowhere to read from for scrolling, try finding the value from upper probes + +#ifdef MODE_IRRADIANCE +			imageStore(irradiance_history_texture, write_pos, uvec4(0)); +#endif +#ifdef MODE_DEPTH +			imageStore(depth_history_texture, write_pos, uvec4(0)); +#endif +		} else { +			ivec3 read_pos; +			read_pos.xy = read_cell.xy; +			read_pos.x += read_cell.z * params.probe_axis_size; +			read_pos.xy = read_pos.xy * OCT_RES + local; +			read_pos.z = int(i); + +#ifdef MODE_IRRADIANCE +			uvec4 value = imageLoad(irradiance_history_texture, read_pos); +			imageStore(irradiance_history_texture, write_pos, value); +#endif +#ifdef MODE_DEPTH +			uvec2 value = imageLoad(depth_history_texture, read_pos); +			imageStore(depth_history_texture, write_pos, value); +#endif +		} +	} + +#endif // MODE_SCROLL_BEGIN + +#ifdef MODE_SCROLL_END + +	uint src_layer = (params.history_size + 1) * params.max_cascades; +	uint dst_layer = (params.history_size + 1) * params.cascade; + +	for (uint i = 0; i <= params.history_size; i++) { +		ivec3 pos = ivec3(probe * OCT_RES + local, int(i)); + +#ifdef MODE_IRRADIANCE +		uvec4 value = imageLoad(irradiance_history_texture, read_pos); +		imageStore(irradiance_history_texture, write_pos, value); +#endif +#ifdef MODE_DEPTH +		uvec2 value = imageLoad(depth_history_texture, read_pos); +		imageStore(depth_history_texture, write_pos, value); +#endif +	} + +#endif //MODE_SCROLL_END + +#ifdef MODE_STORE + +	uint src_layer = (params.history_size + 1) * params.cascade + params.history_size; +	ivec3 read_pos = ivec3(probe * OCT_RES + local, int(src_layer)); + +	ivec3 write_pos = ivec3(probe * (OCT_RES + 2) + ivec2(1), int(params.cascade)); + +	ivec3 copy_to[4] = ivec3[](write_pos, ivec3(-2, -2, -2), ivec3(-2, -2, -2), ivec3(-2, -2, -2)); + +#ifdef MODE_IRRADIANCE +	uvec4 average = imageLoad(irradiance_history_texture, read_pos); +	vec4 light_accum = vec4(average / params.history_size) / float(1 << IRRADIANCE_HISTORY_BITS); + +#endif +#ifdef MODE_DEPTH +	uvec2 value = imageLoad(depth_history_texture, read_pos); +	vec2 depth_accum = vec4(average / params.history_size) / float(1 << IRRADIANCE_HISTORY_BITS); + +	float probe_cell_size = float(params.grid_size / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; +	float max_depth = length(params.grid_size / cascades.data[params.max_cascades - 1].to_cell); +	max_depth /= probe_cell_size; + +	depth_value = (vec2(average / params.history_size) / float(1 << DEPTH_HISTORY_BITS)) * vec2(max_depth, max_depth * max_depth); + +#endif + +	/* Fill the border if required */ + +	if (local == ivec2(0, 0)) { +		copy_to[1] = texture_pos + ivec3(OCT_RES - 1, -1, 0); +		copy_to[2] = texture_pos + ivec3(-1, OCT_RES - 1, 0); +		copy_to[3] = texture_pos + ivec3(OCT_RES, OCT_RES, 0); +	} else if (local == ivec2(OCT_RES - 1, 0)) { +		copy_to[1] = texture_pos + ivec3(0, -1, 0); +		copy_to[2] = texture_pos + ivec3(OCT_RES, OCT_RES - 1, 0); +		copy_to[3] = texture_pos + ivec3(-1, OCT_RES, 0); +	} else if (local == ivec2(0, OCT_RES - 1)) { +		copy_to[1] = texture_pos + ivec3(-1, 0, 0); +		copy_to[2] = texture_pos + ivec3(OCT_RES - 1, OCT_RES, 0); +		copy_to[3] = texture_pos + ivec3(OCT_RES, -1, 0); +	} else if (local == ivec2(OCT_RES - 1, OCT_RES - 1)) { +		copy_to[1] = texture_pos + ivec3(0, OCT_RES, 0); +		copy_to[2] = texture_pos + ivec3(OCT_RES, 0, 0); +		copy_to[3] = texture_pos + ivec3(-1, -1, 0); +	} else if (local.y == 0) { +		copy_to[1] = texture_pos + ivec3(OCT_RES - local.x - 1, local.y - 1, 0); +	} else if (local.x == 0) { +		copy_to[1] = texture_pos + ivec3(local.x - 1, OCT_RES - local.y - 1, 0); +	} else if (local.y == OCT_RES - 1) { +		copy_to[1] = texture_pos + ivec3(OCT_RES - local.x - 1, local.y + 1, 0); +	} else if (local.x == OCT_RES - 1) { +		copy_to[1] = texture_pos + ivec3(local.x + 1, OCT_RES - local.y - 1, 0); +	} + +	for (int i = 0; i < 4; i++) { +		if (copy_to[i] == ivec3(-2, -2, -2)) { +			continue; +		} +#ifdef MODE_IRRADIANCE +		imageStore(irradiance_texture, copy_to[i], light_accum); +#endif +#ifdef MODE_DEPTH +		imageStore(depth_texture, copy_to[i], vec4(depth_value, 0.0, 0.0)); +#endif +	} + +#endif // MODE_STORE +} diff --git a/servers/rendering/rasterizer_rd/shaders/sdfgi_integrate.glsl b/servers/rendering/rasterizer_rd/shaders/sdfgi_integrate.glsl new file mode 100644 index 0000000000..e4779aafaf --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/sdfgi_integrate.glsl @@ -0,0 +1,605 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#define MAX_CASCADES 8 + +layout(set = 0, binding = 1) uniform texture3D sdf_cascades[MAX_CASCADES]; +layout(set = 0, binding = 2) uniform texture3D light_cascades[MAX_CASCADES]; +layout(set = 0, binding = 3) uniform texture3D aniso0_cascades[MAX_CASCADES]; +layout(set = 0, binding = 4) uniform texture3D aniso1_cascades[MAX_CASCADES]; + +layout(set = 0, binding = 6) uniform sampler linear_sampler; + +struct CascadeData { +	vec3 offset; //offset of (0,0,0) in world coordinates +	float to_cell; // 1/bounds * grid_size +	ivec3 probe_world_offset; +	uint pad; +}; + +layout(set = 0, binding = 7, std140) uniform Cascades { +	CascadeData data[MAX_CASCADES]; +} +cascades; + +layout(r32ui, set = 0, binding = 8) uniform restrict uimage2DArray lightprobe_texture_data; +layout(rgba16i, set = 0, binding = 9) uniform restrict iimage2DArray lightprobe_history_texture; +layout(rgba32i, set = 0, binding = 10) uniform restrict iimage2D lightprobe_average_texture; + +//used for scrolling +layout(rgba16i, set = 0, binding = 11) uniform restrict iimage2DArray lightprobe_history_scroll_texture; +layout(rgba32i, set = 0, binding = 12) uniform restrict iimage2D lightprobe_average_scroll_texture; + +layout(rgba32i, set = 0, binding = 13) uniform restrict iimage2D lightprobe_average_parent_texture; + +layout(set = 1, binding = 0) uniform textureCube sky_irradiance; + +layout(set = 1, binding = 1) uniform sampler linear_sampler_mipmaps; + +#define HISTORY_BITS 10 + +#define SKY_MODE_DISABLED 0 +#define SKY_MODE_COLOR 1 +#define SKY_MODE_SKY 2 + +layout(push_constant, binding = 0, std430) uniform Params { +	vec3 grid_size; +	uint max_cascades; + +	uint probe_axis_size; +	uint cascade; +	uint history_index; +	uint history_size; + +	uint ray_count; +	float ray_bias; +	ivec2 image_size; + +	ivec3 world_offset; +	uint sky_mode; + +	ivec3 scroll; +	float sky_energy; + +	vec3 sky_color; +	float y_mult; +} +params; + +const float PI = 3.14159265f; +const float GOLDEN_ANGLE = PI * (3.0 - sqrt(5.0)); + +vec3 vogel_hemisphere(uint p_index, uint p_count, float p_offset) { +	float r = sqrt(float(p_index) + 0.5f) / sqrt(float(p_count)); +	float theta = float(p_index) * GOLDEN_ANGLE + p_offset; +	float y = cos(r * PI * 0.5); +	float l = sin(r * PI * 0.5); +	return vec3(l * cos(theta), l * sin(theta), y * (float(p_index & 1) * 2.0 - 1.0)); +} + +uvec3 hash3(uvec3 x) { +	x = ((x >> 16) ^ x) * 0x45d9f3b; +	x = ((x >> 16) ^ x) * 0x45d9f3b; +	x = (x >> 16) ^ x; +	return x; +} + +float hashf3(vec3 co) { +	return fract(sin(dot(co, vec3(12.9898, 78.233, 137.13451))) * 43758.5453); +} + +vec3 octahedron_encode(vec2 f) { +	// https://twitter.com/Stubbesaurus/status/937994790553227264 +	f = f * 2.0 - 1.0; +	vec3 n = vec3(f.x, f.y, 1.0f - abs(f.x) - abs(f.y)); +	float t = clamp(-n.z, 0.0, 1.0); +	n.x += n.x >= 0 ? -t : t; +	n.y += n.y >= 0 ? -t : t; +	return normalize(n); +} + +uint rgbe_encode(vec3 color) { +	const float pow2to9 = 512.0f; +	const float B = 15.0f; +	const float N = 9.0f; +	const float LN2 = 0.6931471805599453094172321215; + +	float cRed = clamp(color.r, 0.0, 65408.0); +	float cGreen = clamp(color.g, 0.0, 65408.0); +	float cBlue = clamp(color.b, 0.0, 65408.0); + +	float cMax = max(cRed, max(cGreen, cBlue)); + +	float expp = max(-B - 1.0f, floor(log(cMax) / LN2)) + 1.0f + B; + +	float sMax = floor((cMax / pow(2.0f, expp - B - N)) + 0.5f); + +	float exps = expp + 1.0f; + +	if (0.0 <= sMax && sMax < pow2to9) { +		exps = expp; +	} + +	float sRed = floor((cRed / pow(2.0f, exps - B - N)) + 0.5f); +	float sGreen = floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f); +	float sBlue = floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f); +	return (uint(sRed) & 0x1FF) | ((uint(sGreen) & 0x1FF) << 9) | ((uint(sBlue) & 0x1FF) << 18) | ((uint(exps) & 0x1F) << 27); +} + +void main() { +	ivec2 pos = ivec2(gl_GlobalInvocationID.xy); +	if (any(greaterThanEqual(pos, params.image_size))) { //too large, do nothing +		return; +	} + +#ifdef MODE_PROCESS + +	float probe_cell_size = float(params.grid_size.x / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; + +	ivec3 probe_cell; +	probe_cell.x = pos.x % int(params.probe_axis_size); +	probe_cell.y = pos.y; +	probe_cell.z = pos.x / int(params.probe_axis_size); + +	vec3 probe_pos = cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size; +	vec3 pos_to_uvw = 1.0 / params.grid_size; + +	vec4 probe_sh_accum[SH_SIZE] = vec4[]( +			vec4(0.0), +			vec4(0.0), +			vec4(0.0), +			vec4(0.0), +			vec4(0.0), +			vec4(0.0), +			vec4(0.0), +			vec4(0.0), +			vec4(0.0) +#if (SH_SIZE == 16) +					, +			vec4(0.0), +			vec4(0.0), +			vec4(0.0), +			vec4(0.0), +			vec4(0.0), +			vec4(0.0), +			vec4(0.0) +#endif +	); + +	// quickly ensure each probe has a different "offset" for the vogel function, based on integer world position +	uvec3 h3 = hash3(uvec3(params.world_offset + probe_cell)); +	float offset = hashf3(vec3(h3 & uvec3(0xFFFFF))); + +	//for a more homogeneous hemisphere, alternate based on history frames +	uint ray_offset = params.history_index; +	uint ray_mult = params.history_size; +	uint ray_total = ray_mult * params.ray_count; + +	for (uint i = 0; i < params.ray_count; i++) { +		vec3 ray_dir = vogel_hemisphere(ray_offset + i * ray_mult, ray_total, offset); +		ray_dir.y *= params.y_mult; +		ray_dir = normalize(ray_dir); + +		//needs to be visible +		vec3 ray_pos = probe_pos; +		vec3 inv_dir = 1.0 / ray_dir; + +		bool hit = false; +		vec3 hit_normal; +		vec3 hit_light; +		vec3 hit_aniso0; +		vec3 hit_aniso1; + +		float bias = params.ray_bias; +		vec3 abs_ray_dir = abs(ray_dir); +		ray_pos += ray_dir * 1.0 / max(abs_ray_dir.x, max(abs_ray_dir.y, abs_ray_dir.z)) * bias / cascades.data[params.cascade].to_cell; + +		for (uint j = params.cascade; j < params.max_cascades; j++) { +			//convert to local bounds +			vec3 pos = ray_pos - cascades.data[j].offset; +			pos *= cascades.data[j].to_cell; + +			if (any(lessThan(pos, vec3(0.0))) || any(greaterThanEqual(pos, params.grid_size))) { +				continue; //already past bounds for this cascade, goto next +			} + +			//find maximum advance distance (until reaching bounds) +			vec3 t0 = -pos * inv_dir; +			vec3 t1 = (params.grid_size - pos) * inv_dir; +			vec3 tmax = max(t0, t1); +			float max_advance = min(tmax.x, min(tmax.y, tmax.z)); + +			float advance = 0.0; + +			vec3 uvw; + +			while (advance < max_advance) { +				//read how much to advance from SDF +				uvw = (pos + ray_dir * advance) * pos_to_uvw; + +				float distance = texture(sampler3D(sdf_cascades[j], linear_sampler), uvw).r * 255.0 - 1.0; +				if (distance < 0.001) { +					//consider hit +					hit = true; +					break; +				} + +				advance += distance; +			} + +			if (hit) { +				const float EPSILON = 0.001; +				hit_normal = normalize(vec3( +						texture(sampler3D(sdf_cascades[j], linear_sampler), uvw + vec3(EPSILON, 0.0, 0.0)).r - texture(sampler3D(sdf_cascades[j], linear_sampler), uvw - vec3(EPSILON, 0.0, 0.0)).r, +						texture(sampler3D(sdf_cascades[j], linear_sampler), uvw + vec3(0.0, EPSILON, 0.0)).r - texture(sampler3D(sdf_cascades[j], linear_sampler), uvw - vec3(0.0, EPSILON, 0.0)).r, +						texture(sampler3D(sdf_cascades[j], linear_sampler), uvw + vec3(0.0, 0.0, EPSILON)).r - texture(sampler3D(sdf_cascades[j], linear_sampler), uvw - vec3(0.0, 0.0, EPSILON)).r)); + +				hit_light = texture(sampler3D(light_cascades[j], linear_sampler), uvw).rgb; +				vec4 aniso0 = texture(sampler3D(aniso0_cascades[j], linear_sampler), uvw); +				hit_aniso0 = aniso0.rgb; +				hit_aniso1 = vec3(aniso0.a, texture(sampler3D(aniso1_cascades[j], linear_sampler), uvw).rg); + +				break; +			} + +			//change ray origin to collision with bounds +			pos += ray_dir * max_advance; +			pos /= cascades.data[j].to_cell; +			pos += cascades.data[j].offset; +			ray_pos = pos; +		} + +		vec4 light; +		if (hit) { +			//one liner magic +			light.rgb = hit_light * (dot(max(vec3(0.0), (hit_normal * hit_aniso0)), vec3(1.0)) + dot(max(vec3(0.0), (-hit_normal * hit_aniso1)), vec3(1.0))); +			light.a = 1.0; +		} else if (params.sky_mode == SKY_MODE_SKY) { +			light.rgb = textureLod(samplerCube(sky_irradiance, linear_sampler_mipmaps), ray_dir, 2.0).rgb; //use second mipmap because we dont usually throw a lot of rays, so this compensates +			light.rgb *= params.sky_energy; +			light.a = 0.0; + +		} else if (params.sky_mode == SKY_MODE_COLOR) { +			light.rgb = params.sky_color; +			light.rgb *= params.sky_energy; +			light.a = 0.0; +		} else { +			light = vec4(0, 0, 0, 0); +		} + +		vec3 ray_dir2 = ray_dir * ray_dir; +		float c[SH_SIZE] = float[]( + +				0.282095, //l0 +				0.488603 * ray_dir.y, //l1n1 +				0.488603 * ray_dir.z, //l1n0 +				0.488603 * ray_dir.x, //l1p1 +				1.092548 * ray_dir.x * ray_dir.y, //l2n2 +				1.092548 * ray_dir.y * ray_dir.z, //l2n1 +				0.315392 * (3.0 * ray_dir2.z - 1.0), //l20 +				1.092548 * ray_dir.x * ray_dir.z, //l2p1 +				0.546274 * (ray_dir2.x - ray_dir2.y) //l2p2 +#if (SH_SIZE == 16) +				, +				0.590043 * ray_dir.y * (3.0f * ray_dir2.x - ray_dir2.y), +				2.890611 * ray_dir.y * ray_dir.x * ray_dir.z, +				0.646360 * ray_dir.y * (-1.0f + 5.0f * ray_dir2.z), +				0.373176 * (5.0f * ray_dir2.z * ray_dir.z - 3.0f * ray_dir.z), +				0.457045 * ray_dir.x * (-1.0f + 5.0f * ray_dir2.z), +				1.445305 * (ray_dir2.x - ray_dir2.y) * ray_dir.z, +				0.590043 * ray_dir.x * (ray_dir2.x - 3.0f * ray_dir2.y) + +#endif +		); + +		for (uint j = 0; j < SH_SIZE; j++) { +			probe_sh_accum[j] += light * c[j]; +		} +	} + +	for (uint i = 0; i < SH_SIZE; i++) { +		// store in history texture +		ivec3 prev_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(params.history_index)); +		ivec2 average_pos = prev_pos.xy; + +		vec4 value = probe_sh_accum[i] * 4.0 / float(params.ray_count); + +		ivec4 ivalue = clamp(ivec4(value * float(1 << HISTORY_BITS)), -32768, 32767); //clamp to 16 bits, so higher values don't break average + +		ivec4 prev_value = imageLoad(lightprobe_history_texture, prev_pos); +		ivec4 average = imageLoad(lightprobe_average_texture, average_pos); + +		average -= prev_value; +		average += ivalue; + +		imageStore(lightprobe_history_texture, prev_pos, ivalue); +		imageStore(lightprobe_average_texture, average_pos, average); +	} +#endif // MODE PROCESS + +#ifdef MODE_STORE + +	// converting to octahedral in this step is requiered because +	// octahedral is much faster to read from the screen than spherical harmonics, +	// despite the very slight quality loss + +	ivec2 sh_pos = (pos / OCT_SIZE) * ivec2(1, SH_SIZE); +	ivec2 oct_pos = (pos / OCT_SIZE) * (OCT_SIZE + 2) + ivec2(1); +	ivec2 local_pos = pos % OCT_SIZE; + +	//fill the spherical harmonic +	vec4 sh[SH_SIZE]; + +	for (uint i = 0; i < SH_SIZE; i++) { +		// store in history texture +		ivec2 average_pos = sh_pos + ivec2(0, i); +		ivec4 average = imageLoad(lightprobe_average_texture, average_pos); + +		sh[i] = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS); +	} + +	//compute the octahedral normal for this texel +	vec3 normal = octahedron_encode(vec2(local_pos) / float(OCT_SIZE)); +	/* +	// read the spherical harmonic +	const float c1 = 0.429043; +	const float c2 = 0.511664; +	const float c3 = 0.743125; +	const float c4 = 0.886227; +	const float c5 = 0.247708; +	vec4 light = (c1 * sh[8] * (normal.x * normal.x - normal.y * normal.y) + +					  c3 * sh[6] * normal.z * normal.z + +					  c4 * sh[0] - +					  c5 * sh[6] + +					  2.0 * c1 * sh[4] * normal.x * normal.y + +					  2.0 * c1 * sh[7] * normal.x * normal.z + +					  2.0 * c1 * sh[5] * normal.y * normal.z + +					  2.0 * c2 * sh[3] * normal.x + +					  2.0 * c2 * sh[1] * normal.y + +					  2.0 * c2 * sh[2] * normal.z); +*/ +	vec3 normal2 = normal * normal; +	float c[SH_SIZE] = float[]( + +			0.282095, //l0 +			0.488603 * normal.y, //l1n1 +			0.488603 * normal.z, //l1n0 +			0.488603 * normal.x, //l1p1 +			1.092548 * normal.x * normal.y, //l2n2 +			1.092548 * normal.y * normal.z, //l2n1 +			0.315392 * (3.0 * normal2.z - 1.0), //l20 +			1.092548 * normal.x * normal.z, //l2p1 +			0.546274 * (normal2.x - normal2.y) //l2p2 +#if (SH_SIZE == 16) +			, +			0.590043 * normal.y * (3.0f * normal2.x - normal2.y), +			2.890611 * normal.y * normal.x * normal.z, +			0.646360 * normal.y * (-1.0f + 5.0f * normal2.z), +			0.373176 * (5.0f * normal2.z * normal.z - 3.0f * normal.z), +			0.457045 * normal.x * (-1.0f + 5.0f * normal2.z), +			1.445305 * (normal2.x - normal2.y) * normal.z, +			0.590043 * normal.x * (normal2.x - 3.0f * normal2.y) + +#endif +	); + +	const float l_mult[SH_SIZE] = float[]( +			1.0, +			2.0 / 3.0, +			2.0 / 3.0, +			2.0 / 3.0, +			1.0 / 4.0, +			1.0 / 4.0, +			1.0 / 4.0, +			1.0 / 4.0, +			1.0 / 4.0 +#if (SH_SIZE == 16) +			, // l4 does not contribute to irradiance +			0.0, +			0.0, +			0.0, +			0.0, +			0.0, +			0.0, +			0.0 +#endif +	); + +	vec3 irradiance = vec3(0.0); +	vec3 radiance = vec3(0.0); + +	for (uint i = 0; i < SH_SIZE; i++) { +		vec3 m = sh[i].rgb * c[i] * 4.0; +		irradiance += m * l_mult[i]; +		radiance += m; +	} + +	//encode RGBE9995 for the final texture + +	uint irradiance_rgbe = rgbe_encode(irradiance); +	uint radiance_rgbe = rgbe_encode(radiance); + +	//store in octahedral map + +	ivec3 texture_pos = ivec3(oct_pos, int(params.cascade)); +	ivec3 copy_to[4] = ivec3[](ivec3(-2, -2, -2), ivec3(-2, -2, -2), ivec3(-2, -2, -2), ivec3(-2, -2, -2)); +	copy_to[0] = texture_pos + ivec3(local_pos, 0); + +	if (local_pos == ivec2(0, 0)) { +		copy_to[1] = texture_pos + ivec3(OCT_SIZE - 1, -1, 0); +		copy_to[2] = texture_pos + ivec3(-1, OCT_SIZE - 1, 0); +		copy_to[3] = texture_pos + ivec3(OCT_SIZE, OCT_SIZE, 0); +	} else if (local_pos == ivec2(OCT_SIZE - 1, 0)) { +		copy_to[1] = texture_pos + ivec3(0, -1, 0); +		copy_to[2] = texture_pos + ivec3(OCT_SIZE, OCT_SIZE - 1, 0); +		copy_to[3] = texture_pos + ivec3(-1, OCT_SIZE, 0); +	} else if (local_pos == ivec2(0, OCT_SIZE - 1)) { +		copy_to[1] = texture_pos + ivec3(-1, 0, 0); +		copy_to[2] = texture_pos + ivec3(OCT_SIZE - 1, OCT_SIZE, 0); +		copy_to[3] = texture_pos + ivec3(OCT_SIZE, -1, 0); +	} else if (local_pos == ivec2(OCT_SIZE - 1, OCT_SIZE - 1)) { +		copy_to[1] = texture_pos + ivec3(0, OCT_SIZE, 0); +		copy_to[2] = texture_pos + ivec3(OCT_SIZE, 0, 0); +		copy_to[3] = texture_pos + ivec3(-1, -1, 0); +	} else if (local_pos.y == 0) { +		copy_to[1] = texture_pos + ivec3(OCT_SIZE - local_pos.x - 1, local_pos.y - 1, 0); +	} else if (local_pos.x == 0) { +		copy_to[1] = texture_pos + ivec3(local_pos.x - 1, OCT_SIZE - local_pos.y - 1, 0); +	} else if (local_pos.y == OCT_SIZE - 1) { +		copy_to[1] = texture_pos + ivec3(OCT_SIZE - local_pos.x - 1, local_pos.y + 1, 0); +	} else if (local_pos.x == OCT_SIZE - 1) { +		copy_to[1] = texture_pos + ivec3(local_pos.x + 1, OCT_SIZE - local_pos.y - 1, 0); +	} + +	for (int i = 0; i < 4; i++) { +		if (copy_to[i] == ivec3(-2, -2, -2)) { +			continue; +		} +		imageStore(lightprobe_texture_data, copy_to[i], uvec4(irradiance_rgbe)); +		imageStore(lightprobe_texture_data, copy_to[i] + ivec3(0, 0, int(params.max_cascades)), uvec4(radiance_rgbe)); +	} + +#endif + +#ifdef MODE_SCROLL + +	ivec3 probe_cell; +	probe_cell.x = pos.x % int(params.probe_axis_size); +	probe_cell.y = pos.y; +	probe_cell.z = pos.x / int(params.probe_axis_size); + +	ivec3 read_probe = probe_cell - params.scroll; + +	if (all(greaterThanEqual(read_probe, ivec3(0))) && all(lessThan(read_probe, ivec3(params.probe_axis_size)))) { +		// can scroll +		ivec2 tex_pos; +		tex_pos = read_probe.xy; +		tex_pos.x += read_probe.z * int(params.probe_axis_size); + +		//scroll +		for (uint j = 0; j < params.history_size; j++) { +			for (int i = 0; i < SH_SIZE; i++) { +				// copy from history texture +				ivec3 src_pos = ivec3(tex_pos.x, tex_pos.y * SH_SIZE + i, int(j)); +				ivec3 dst_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(j)); +				ivec4 value = imageLoad(lightprobe_history_texture, src_pos); +				imageStore(lightprobe_history_scroll_texture, dst_pos, value); +			} +		} + +		for (int i = 0; i < SH_SIZE; i++) { +			// copy from average texture +			ivec2 src_pos = ivec2(tex_pos.x, tex_pos.y * SH_SIZE + i); +			ivec2 dst_pos = ivec2(pos.x, pos.y * SH_SIZE + i); +			ivec4 value = imageLoad(lightprobe_average_texture, src_pos); +			imageStore(lightprobe_average_scroll_texture, dst_pos, value); +		} +	} else if (params.cascade < params.max_cascades - 1) { +		//cant scroll, must look for position in parent cascade + +		//to global coords +		float probe_cell_size = float(params.grid_size.x / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; +		vec3 probe_pos = cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size; + +		//to parent local coords +		probe_pos -= cascades.data[params.cascade + 1].offset; +		probe_pos *= cascades.data[params.cascade + 1].to_cell; +		probe_pos = probe_pos * float(params.probe_axis_size - 1) / float(params.grid_size.x); + +		ivec3 probe_posi = ivec3(probe_pos); +		//add up all light, no need to use occlusion here, since occlusion will do its work afterwards + +		vec4 average_light[SH_SIZE] = vec4[](vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0) +#if (SH_SIZE == 16) +																															 , +				vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0) +#endif +		); +		float total_weight = 0.0; + +		for (int i = 0; i < 8; i++) { +			ivec3 offset = probe_posi + ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); + +			vec3 trilinear = vec3(1.0) - abs(probe_pos - vec3(offset)); +			float weight = trilinear.x * trilinear.y * trilinear.z; + +			ivec2 tex_pos; +			tex_pos = offset.xy; +			tex_pos.x += offset.z * int(params.probe_axis_size); + +			for (int j = 0; j < SH_SIZE; j++) { +				// copy from history texture +				ivec2 src_pos = ivec2(tex_pos.x, tex_pos.y * SH_SIZE + j); +				ivec4 average = imageLoad(lightprobe_average_parent_texture, src_pos); +				vec4 value = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS); +				average_light[j] += value * weight; +			} + +			total_weight += weight; +		} + +		if (total_weight > 0.0) { +			total_weight = 1.0 / total_weight; +		} +		//store the averaged values everywhere + +		for (int i = 0; i < SH_SIZE; i++) { +			ivec4 ivalue = clamp(ivec4(average_light[i] * total_weight * float(1 << HISTORY_BITS)), ivec4(-32768), ivec4(32767)); //clamp to 16 bits, so higher values don't break average +			// copy from history texture +			ivec3 dst_pos = ivec3(pos.x, pos.y * SH_SIZE + i, 0); +			for (uint j = 0; j < params.history_size; j++) { +				dst_pos.z = int(j); +				imageStore(lightprobe_history_scroll_texture, dst_pos, ivalue); +			} + +			ivalue *= int(params.history_size); //average needs to have all history added up +			imageStore(lightprobe_average_scroll_texture, dst_pos.xy, ivalue); +		} + +	} else { +		// clear and let it re-raytrace, only for the last cascade, which happens very un-often +		//scroll +		for (uint j = 0; j < params.history_size; j++) { +			for (int i = 0; i < SH_SIZE; i++) { +				// copy from history texture +				ivec3 dst_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(j)); +				imageStore(lightprobe_history_scroll_texture, dst_pos, ivec4(0)); +			} +		} + +		for (int i = 0; i < SH_SIZE; i++) { +			// copy from average texture +			ivec2 dst_pos = ivec2(pos.x, pos.y * SH_SIZE + i); +			imageStore(lightprobe_average_scroll_texture, dst_pos, ivec4(0)); +		} +	} + +#endif + +#ifdef MODE_SCROLL_STORE + +	//do not update probe texture, as these will be updated later + +	for (uint j = 0; j < params.history_size; j++) { +		for (int i = 0; i < SH_SIZE; i++) { +			// copy from history texture +			ivec3 spos = ivec3(pos.x, pos.y * SH_SIZE + i, int(j)); +			ivec4 value = imageLoad(lightprobe_history_scroll_texture, spos); +			imageStore(lightprobe_history_texture, spos, value); +		} +	} + +	for (int i = 0; i < SH_SIZE; i++) { +		// copy from average texture +		ivec2 spos = ivec2(pos.x, pos.y * SH_SIZE + i); +		ivec4 average = imageLoad(lightprobe_average_scroll_texture, spos); +		imageStore(lightprobe_average_texture, spos, average); +	} + +#endif +} diff --git a/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl b/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl new file mode 100644 index 0000000000..d7d19897e3 --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl @@ -0,0 +1,1056 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +#ifdef MODE_JUMPFLOOD_OPTIMIZED +#define GROUP_SIZE 8 + +layout(local_size_x = GROUP_SIZE, local_size_y = GROUP_SIZE, local_size_z = GROUP_SIZE) in; + +#elif defined(MODE_OCCLUSION) || defined(MODE_SCROLL) +//buffer layout +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +#else +//grid layout +layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; + +#endif + +#if defined(MODE_INITIALIZE_JUMP_FLOOD) || defined(MODE_INITIALIZE_JUMP_FLOOD_HALF) +layout(r16ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_color; +layout(rgba8ui, set = 0, binding = 2) uniform restrict writeonly uimage3D dst_positions; +#endif + +#ifdef MODE_UPSCALE_JUMP_FLOOD +layout(r16ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_color; +layout(rgba8ui, set = 0, binding = 2) uniform restrict readonly uimage3D src_positions_half; +layout(rgba8ui, set = 0, binding = 3) uniform restrict writeonly uimage3D dst_positions; +#endif + +#if defined(MODE_JUMPFLOOD) || defined(MODE_JUMPFLOOD_OPTIMIZED) +layout(rgba8ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_positions; +layout(rgba8ui, set = 0, binding = 2) uniform restrict writeonly uimage3D dst_positions; +#endif + +#ifdef MODE_JUMPFLOOD_OPTIMIZED + +shared uvec4 group_positions[(GROUP_SIZE + 2) * (GROUP_SIZE + 2) * (GROUP_SIZE + 2)]; //4x4x4 with margins + +void group_store(ivec3 p_pos, uvec4 p_value) { +	uint offset = uint(p_pos.z * (GROUP_SIZE + 2) * (GROUP_SIZE + 2) + p_pos.y * (GROUP_SIZE + 2) + p_pos.x); +	group_positions[offset] = p_value; +} + +uvec4 group_load(ivec3 p_pos) { +	uint offset = uint(p_pos.z * (GROUP_SIZE + 2) * (GROUP_SIZE + 2) + p_pos.y * (GROUP_SIZE + 2) + p_pos.x); +	return group_positions[offset]; +} + +#endif + +#ifdef MODE_OCCLUSION + +layout(r16ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_color; +layout(r8, set = 0, binding = 2) uniform restrict image3D dst_occlusion[8]; +layout(r32ui, set = 0, binding = 3) uniform restrict readonly uimage3D src_facing; + +const uvec2 group_size_offset[11] = uvec2[](uvec2(1, 0), uvec2(3, 1), uvec2(6, 4), uvec2(10, 10), uvec2(15, 20), uvec2(21, 35), uvec2(28, 56), uvec2(36, 84), uvec2(42, 120), uvec2(46, 162), uvec2(48, 208)); +const uint group_pos[256] = uint[](0, +		65536, 256, 1, +		131072, 65792, 512, 65537, 257, 2, +		196608, 131328, 66048, 768, 131073, 65793, 513, 65538, 258, 3, +		262144, 196864, 131584, 66304, 1024, 196609, 131329, 66049, 769, 131074, 65794, 514, 65539, 259, 4, +		327680, 262400, 197120, 131840, 66560, 1280, 262145, 196865, 131585, 66305, 1025, 196610, 131330, 66050, 770, 131075, 65795, 515, 65540, 260, 5, +		393216, 327936, 262656, 197376, 132096, 66816, 1536, 327681, 262401, 197121, 131841, 66561, 1281, 262146, 196866, 131586, 66306, 1026, 196611, 131331, 66051, 771, 131076, 65796, 516, 65541, 261, 6, +		458752, 393472, 328192, 262912, 197632, 132352, 67072, 1792, 393217, 327937, 262657, 197377, 132097, 66817, 1537, 327682, 262402, 197122, 131842, 66562, 1282, 262147, 196867, 131587, 66307, 1027, 196612, 131332, 66052, 772, 131077, 65797, 517, 65542, 262, 7, +		459008, 393728, 328448, 263168, 197888, 132608, 67328, 458753, 393473, 328193, 262913, 197633, 132353, 67073, 1793, 393218, 327938, 262658, 197378, 132098, 66818, 1538, 327683, 262403, 197123, 131843, 66563, 1283, 262148, 196868, 131588, 66308, 1028, 196613, 131333, 66053, 773, 131078, 65798, 518, 65543, 263, +		459264, 393984, 328704, 263424, 198144, 132864, 459009, 393729, 328449, 263169, 197889, 132609, 67329, 458754, 393474, 328194, 262914, 197634, 132354, 67074, 1794, 393219, 327939, 262659, 197379, 132099, 66819, 1539, 327684, 262404, 197124, 131844, 66564, 1284, 262149, 196869, 131589, 66309, 1029, 196614, 131334, 66054, 774, 131079, 65799, 519, +		459520, 394240, 328960, 263680, 198400, 459265, 393985, 328705, 263425, 198145, 132865, 459010, 393730, 328450, 263170, 197890, 132610, 67330, 458755, 393475, 328195, 262915, 197635, 132355, 67075, 1795, 393220, 327940, 262660, 197380, 132100, 66820, 1540, 327685, 262405, 197125, 131845, 66565, 1285, 262150, 196870, 131590, 66310, 1030, 196615, 131335, 66055, 775); + +shared uint occlusion_facing[((OCCLUSION_SIZE * 2) * (OCCLUSION_SIZE * 2) * (OCCLUSION_SIZE * 2)) / 4]; + +uint get_facing(ivec3 p_pos) { +	uint ofs = uint(p_pos.z * OCCLUSION_SIZE * 2 * OCCLUSION_SIZE * 2 + p_pos.y * OCCLUSION_SIZE * 2 + p_pos.x); +	uint v = occlusion_facing[ofs / 4]; +	return (v >> ((ofs % 4) * 8)) & 0xFF; +} + +#endif + +#ifdef MODE_STORE + +layout(rgba8ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_positions; +layout(r16ui, set = 0, binding = 2) uniform restrict readonly uimage3D src_albedo; +layout(r8, set = 0, binding = 3) uniform restrict readonly image3D src_occlusion[8]; +layout(r32ui, set = 0, binding = 4) uniform restrict readonly uimage3D src_light; +layout(r32ui, set = 0, binding = 5) uniform restrict readonly uimage3D src_light_aniso; +layout(r32ui, set = 0, binding = 6) uniform restrict readonly uimage3D src_facing; + +layout(r8, set = 0, binding = 7) uniform restrict writeonly image3D dst_sdf; +layout(r16ui, set = 0, binding = 8) uniform restrict writeonly uimage3D dst_occlusion; + +layout(set = 0, binding = 10, std430) restrict buffer DispatchData { +	uint x; +	uint y; +	uint z; +	uint total_count; +} +dispatch_data; + +struct ProcessVoxel { +	uint position; //xyz 7 bit packed, extra 11 bits for neigbours +	uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neibhbours +	uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbous +	uint light_aniso; //55555 light anisotropy, extra 2 bits for neighbours +	//total neighbours: 26 +}; + +layout(set = 0, binding = 11, std430) restrict buffer writeonly ProcessVoxels { +	ProcessVoxel data[]; +} +dst_process_voxels; + +shared ProcessVoxel store_positions[4 * 4 * 4]; +shared uint store_position_count; +shared uint store_from_index; +#endif + +#ifdef MODE_SCROLL + +layout(r16ui, set = 0, binding = 1) uniform restrict writeonly uimage3D dst_albedo; +layout(r32ui, set = 0, binding = 2) uniform restrict writeonly uimage3D dst_facing; +layout(r32ui, set = 0, binding = 3) uniform restrict writeonly uimage3D dst_light; +layout(r32ui, set = 0, binding = 4) uniform restrict writeonly uimage3D dst_light_aniso; + +layout(set = 0, binding = 5, std430) restrict buffer readonly DispatchData { +	uint x; +	uint y; +	uint z; +	uint total_count; +} +dispatch_data; + +struct ProcessVoxel { +	uint position; //xyz 7 bit packed, extra 11 bits for neigbours +	uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neibhbours +	uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbous +	uint light_aniso; //55555 light anisotropy, extra 2 bits for neighbours +	//total neighbours: 26 +}; + +layout(set = 0, binding = 6, std430) restrict buffer readonly ProcessVoxels { +	ProcessVoxel data[]; +} +src_process_voxels; + +#endif + +#ifdef MODE_SCROLL_OCCLUSION + +layout(r8, set = 0, binding = 1) uniform restrict image3D dst_occlusion[8]; +layout(r16ui, set = 0, binding = 2) uniform restrict readonly uimage3D src_occlusion; + +#endif + +layout(push_constant, binding = 0, std430) uniform Params { +	ivec3 scroll; + +	int grid_size; + +	ivec3 probe_offset; +	int step_size; + +	bool half_size; +	uint occlusion_index; +	int cascade; +	uint pad; +} +params; + +void main() { +#ifdef MODE_SCROLL + +	// Pixel being shaded +	int index = int(gl_GlobalInvocationID.x); +	if (index >= dispatch_data.total_count) { //too big +		return; +	} + +	ivec3 read_pos = (ivec3(src_process_voxels.data[index].position) >> ivec3(0, 7, 14)) & ivec3(0x7F); +	ivec3 write_pos = read_pos + params.scroll; + +	if (any(lessThan(write_pos, ivec3(0))) || any(greaterThanEqual(write_pos, ivec3(params.grid_size)))) { +		return; //fits outside the 3D texture, dont do anything +	} + +	uint albedo = ((src_process_voxels.data[index].albedo & 0x7FFF) << 1) | 1; //add solid bit +	imageStore(dst_albedo, write_pos, uvec4(albedo)); + +	uint facing = (src_process_voxels.data[index].albedo >> 15) & 0x3F; //6 anisotropic facing bits +	imageStore(dst_facing, write_pos, uvec4(facing)); + +	uint light = src_process_voxels.data[index].light & 0x3fffffff; //30 bits of RGBE8985 +	imageStore(dst_light, write_pos, uvec4(light)); + +	uint light_aniso = src_process_voxels.data[index].light_aniso & 0x3fffffff; //30 bits of 6 anisotropic 5 bits values +	imageStore(dst_light_aniso, write_pos, uvec4(light_aniso)); + +#endif + +#ifdef MODE_SCROLL_OCCLUSION + +	ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); +	if (any(greaterThanEqual(pos, ivec3(params.grid_size) - abs(params.scroll)))) { //too large, do nothing +		return; +	} + +	ivec3 read_pos = pos + max(ivec3(0), -params.scroll); +	ivec3 write_pos = pos + max(ivec3(0), params.scroll); + +	read_pos.z += params.cascade * params.grid_size; +	uint occlusion = imageLoad(src_occlusion, read_pos).r; +	read_pos.x += params.grid_size; +	occlusion |= imageLoad(src_occlusion, read_pos).r << 16; + +	const uint occlusion_shift[8] = uint[](12, 8, 4, 0, 28, 24, 20, 16); + +	for (uint i = 0; i < 8; i++) { +		float o = float((occlusion >> occlusion_shift[i]) & 0xF) / 15.0; +		imageStore(dst_occlusion[i], write_pos, vec4(o)); +	} + +#endif + +#ifdef MODE_INITIALIZE_JUMP_FLOOD + +	ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + +	uint c = imageLoad(src_color, pos).r; +	uvec4 v; +	if (bool(c & 0x1)) { +		//bit set means this is solid +		v.xyz = uvec3(pos); +		v.w = 255; //not zero means used +	} else { +		v.xyz = uvec3(0); +		v.w = 0; // zero means unused +	} + +	imageStore(dst_positions, pos, v); +#endif + +#ifdef MODE_INITIALIZE_JUMP_FLOOD_HALF + +	ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); +	ivec3 base_pos = pos * 2; + +	//since we store in half size, lets kind of randomize what we store, so +	//the half size jump flood has a bit better chance to find something +	uvec4 closest[8]; +	int closest_count = 0; + +	for (uint i = 0; i < 8; i++) { +		ivec3 src_pos = base_pos + ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); +		uint c = imageLoad(src_color, src_pos).r; +		if (bool(c & 1)) { +			uvec4 v = uvec4(uvec3(src_pos), 255); +			closest[closest_count] = v; +			closest_count++; +		} +	} + +	if (closest_count == 0) { +		imageStore(dst_positions, pos, uvec4(0)); +	} else { +		ivec3 indexv = (pos & ivec3(1, 1, 1)) * ivec3(1, 2, 4); +		int index = (indexv.x | indexv.y | indexv.z) % closest_count; +		imageStore(dst_positions, pos, closest[index]); +	} + +#endif + +#ifdef MODE_JUMPFLOOD + +	//regular jumpflood, efficent for large steps, inefficient for small steps +	ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + +	vec3 posf = vec3(pos); + +	if (params.half_size) { +		posf = posf * 2.0 + 0.5; +	} + +	uvec4 p = imageLoad(src_positions, pos); + +	if (!params.half_size && p == uvec4(uvec3(pos), 255)) { +		imageStore(dst_positions, pos, p); +		return; //points to itself and valid, nothing better can be done, just pass +	} + +	float p_dist; + +	if (p.w != 0) { +		p_dist = distance(posf, vec3(p.xyz)); +	} else { +		p_dist = 0.0; //should not matter +	} + +	const uint offset_count = 26; +	const ivec3 offsets[offset_count] = ivec3[]( +			ivec3(-1, -1, -1), +			ivec3(-1, -1, 0), +			ivec3(-1, -1, 1), +			ivec3(-1, 0, -1), +			ivec3(-1, 0, 0), +			ivec3(-1, 0, 1), +			ivec3(-1, 1, -1), +			ivec3(-1, 1, 0), +			ivec3(-1, 1, 1), +			ivec3(0, -1, -1), +			ivec3(0, -1, 0), +			ivec3(0, -1, 1), +			ivec3(0, 0, -1), +			ivec3(0, 0, 1), +			ivec3(0, 1, -1), +			ivec3(0, 1, 0), +			ivec3(0, 1, 1), +			ivec3(1, -1, -1), +			ivec3(1, -1, 0), +			ivec3(1, -1, 1), +			ivec3(1, 0, -1), +			ivec3(1, 0, 0), +			ivec3(1, 0, 1), +			ivec3(1, 1, -1), +			ivec3(1, 1, 0), +			ivec3(1, 1, 1)); + +	for (uint i = 0; i < offset_count; i++) { +		ivec3 ofs = pos + offsets[i] * params.step_size; +		if (any(lessThan(ofs, ivec3(0))) || any(greaterThanEqual(ofs, ivec3(params.grid_size)))) { +			continue; +		} +		uvec4 q = imageLoad(src_positions, ofs); + +		if (q.w == 0) { +			continue; //was not initialized yet, ignore +		} + +		float q_dist = distance(posf, vec3(p.xyz)); +		if (p.w == 0 || q_dist < p_dist) { +			p = q; //just replace because current is unused +			p_dist = q_dist; +		} +	} + +	imageStore(dst_positions, pos, p); +#endif + +#ifdef MODE_JUMPFLOOD_OPTIMIZED +	//optimized version using shared compute memory + +	ivec3 group_offset = ivec3(gl_WorkGroupID.xyz) % params.step_size; +	ivec3 group_pos = group_offset + (ivec3(gl_WorkGroupID.xyz) / params.step_size) * ivec3(GROUP_SIZE * params.step_size); + +	//load data into local group memory + +	if (all(lessThan(ivec3(gl_LocalInvocationID.xyz), ivec3((GROUP_SIZE + 2) / 2)))) { +		//use this thread for loading, this method uses less threads for this but its simpler and less divergent +		ivec3 base_pos = ivec3(gl_LocalInvocationID.xyz) * 2; +		for (uint i = 0; i < 8; i++) { +			ivec3 load_pos = base_pos + ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); +			ivec3 load_global_pos = group_pos + (load_pos - ivec3(1)) * params.step_size; +			uvec4 q; +			if (all(greaterThanEqual(load_global_pos, ivec3(0))) && all(lessThan(load_global_pos, ivec3(params.grid_size)))) { +				q = imageLoad(src_positions, load_global_pos); +			} else { +				q = uvec4(0); //unused +			} + +			group_store(load_pos, q); +		} +	} + +	ivec3 global_pos = group_pos + ivec3(gl_LocalInvocationID.xyz) * params.step_size; + +	if (any(lessThan(global_pos, ivec3(0))) || any(greaterThanEqual(global_pos, ivec3(params.grid_size)))) { +		return; //do nothing else, end here because outside range +	} + +	//sync +	groupMemoryBarrier(); +	barrier(); + +	ivec3 local_pos = ivec3(gl_LocalInvocationID.xyz) + ivec3(1); + +	const uint offset_count = 27; +	const ivec3 offsets[offset_count] = ivec3[]( +			ivec3(-1, -1, -1), +			ivec3(-1, -1, 0), +			ivec3(-1, -1, 1), +			ivec3(-1, 0, -1), +			ivec3(-1, 0, 0), +			ivec3(-1, 0, 1), +			ivec3(-1, 1, -1), +			ivec3(-1, 1, 0), +			ivec3(-1, 1, 1), +			ivec3(0, -1, -1), +			ivec3(0, -1, 0), +			ivec3(0, -1, 1), +			ivec3(0, 0, -1), +			ivec3(0, 0, 0), +			ivec3(0, 0, 1), +			ivec3(0, 1, -1), +			ivec3(0, 1, 0), +			ivec3(0, 1, 1), +			ivec3(1, -1, -1), +			ivec3(1, -1, 0), +			ivec3(1, -1, 1), +			ivec3(1, 0, -1), +			ivec3(1, 0, 0), +			ivec3(1, 0, 1), +			ivec3(1, 1, -1), +			ivec3(1, 1, 0), +			ivec3(1, 1, 1)); + +	//only makes sense if point is inside screen +	uvec4 closest = uvec4(0); +	float closest_dist = 0.0; + +	vec3 posf = vec3(global_pos); + +	if (params.half_size) { +		posf = posf * 2.0 + 0.5; +	} + +	for (uint i = 0; i < offset_count; i++) { +		uvec4 point = group_load(local_pos + offsets[i]); + +		if (point.w == 0) { +			continue; //was not initialized yet, ignore +		} + +		float dist = distance(posf, vec3(point.xyz)); +		if (closest.w == 0 || dist < closest_dist) { +			closest = point; +			closest_dist = dist; +		} +	} + +	imageStore(dst_positions, global_pos, closest); + +#endif + +#ifdef MODE_UPSCALE_JUMP_FLOOD + +	ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + +	uint c = imageLoad(src_color, pos).r; +	uvec4 v; +	if (bool(c & 1)) { +		//bit set means this is solid +		v.xyz = uvec3(pos); +		v.w = 255; //not zero means used +	} else { +		v = imageLoad(src_positions_half, pos >> 1); +		float d = length(vec3(ivec3(v.xyz) - pos)); + +		ivec3 vbase = ivec3(v.xyz - (v.xyz & uvec3(1))); + +		//search around if there is a better candidate from the same block +		for (int i = 0; i < 8; i++) { +			ivec3 bits = ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); +			ivec3 p = vbase + bits; + +			float d2 = length(vec3(p - pos)); +			if (d2 < d) { //check valid distance before test so we avoid a read +				uint c2 = imageLoad(src_color, p).r; +				if (bool(c2 & 1)) { +					v.xyz = uvec3(p); +					d = d2; +				} +			} +		} + +		//could validate better position.. +	} + +	imageStore(dst_positions, pos, v); + +#endif + +#ifdef MODE_OCCLUSION + +	uint invocation_idx = uint(gl_LocalInvocationID.x); +	ivec3 region = ivec3(gl_WorkGroupID); + +	ivec3 region_offset = -ivec3(OCCLUSION_SIZE); +	region_offset += region * OCCLUSION_SIZE * 2; +	region_offset += params.probe_offset * OCCLUSION_SIZE; + +	if (params.scroll != ivec3(0)) { +		//validate scroll region +		ivec3 region_offset_to = region_offset + ivec3(OCCLUSION_SIZE * 2); +		uvec3 scroll_mask = uvec3(notEqual(params.scroll, ivec3(0))); //save which axes acre scrolling +		ivec3 scroll_from = mix(ivec3(0), ivec3(params.grid_size) + params.scroll, lessThan(params.scroll, ivec3(0))); +		ivec3 scroll_to = mix(ivec3(params.grid_size), params.scroll, greaterThan(params.scroll, ivec3(0))); + +		if ((uvec3(lessThanEqual(region_offset_to, scroll_from)) | uvec3(greaterThanEqual(region_offset, scroll_to))) * scroll_mask == scroll_mask) { //all axes that scroll are out, exit +			return; //region outside scroll bounds, quit +		} +	} + +#define OCC_HALF_SIZE (OCCLUSION_SIZE / 2) + +	ivec3 local_ofs = ivec3(uvec3(invocation_idx % OCC_HALF_SIZE, (invocation_idx % (OCC_HALF_SIZE * OCC_HALF_SIZE)) / OCC_HALF_SIZE, invocation_idx / (OCC_HALF_SIZE * OCC_HALF_SIZE))) * 4; + +	/*	for(int i=0;i<64;i++) { +		ivec3 offset = region_offset + local_ofs + ((ivec3(i) >> ivec3(0,2,4)) & ivec3(3,3,3)); +		uint facig = +		if (all(greaterThanEqual(offset,ivec3(0))) && all(lessThan(offset,ivec3(params.grid_size)))) {*/ + +	for (int i = 0; i < 16; i++) { //skip x, so it can be packed + +		ivec3 offset = local_ofs + ((ivec3(i * 4) >> ivec3(0, 2, 4)) & ivec3(3, 3, 3)); + +		uint facing_pack = 0; +		for (int j = 0; j < 4; j++) { +			ivec3 foffset = region_offset + offset + ivec3(j, 0, 0); +			if (all(greaterThanEqual(foffset, ivec3(0))) && all(lessThan(foffset, ivec3(params.grid_size)))) { +				uint f = imageLoad(src_facing, foffset).r; +				facing_pack |= f << (j * 8); +			} +		} + +		occlusion_facing[(offset.z * (OCCLUSION_SIZE * 2 * OCCLUSION_SIZE * 2) + offset.y * (OCCLUSION_SIZE * 2) + offset.x) / 4] = facing_pack; +	} + +	//sync occlusion saved +	groupMemoryBarrier(); +	barrier(); + +	//process occlusion + +#define OCC_STEPS (OCCLUSION_SIZE * 3 - 2) +#define OCC_HALF_STEPS (OCC_STEPS / 2) + +	for (int step = 0; step < OCC_STEPS; step++) { +		bool shrink = step >= OCC_HALF_STEPS; +		int occ_step = shrink ? OCC_HALF_STEPS - (step - OCC_HALF_STEPS) - 1 : step; + +		if (invocation_idx < group_size_offset[occ_step].x) { +			uint pv = group_pos[group_size_offset[occ_step].y + invocation_idx]; +			ivec3 proc_abs = (ivec3(int(pv)) >> ivec3(0, 8, 16)) & ivec3(0xFF); + +			if (shrink) { +				proc_abs = ivec3(OCCLUSION_SIZE) - proc_abs - ivec3(1); +			} + +			for (int i = 0; i < 8; i++) { +				ivec3 bits = ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); +				ivec3 proc_sign = bits * 2 - 1; +				ivec3 local_offset = ivec3(OCCLUSION_SIZE) + proc_abs * proc_sign - (ivec3(1) - bits); +				ivec3 offset = local_offset + region_offset; +				if (all(greaterThanEqual(offset, ivec3(0))) && all(lessThan(offset, ivec3(params.grid_size)))) { +					float occ; + +					uint facing = get_facing(local_offset); + +					if (facing != 0) { //solid +						occ = 0.0; +					} else if (step == 0) { +#if 0 +						occ = 0.0; +						if (get_facing(local_offset - ivec3(proc_sign.x,0,0))==0) { +							occ+=1.0; +						} +						if (get_facing(local_offset - ivec3(0,proc_sign.y,0))==0) { +							occ+=1.0; +						} +						if (get_facing(local_offset - ivec3(0,0,proc_sign.z))==0) { +							occ+=1.0; +						} +						/* +						if (get_facing(local_offset - proc_sign)==0) { +							occ+=1.0; +						}*/ + +						occ/=3.0; +#endif +						occ = 1.0; + +					} else { +						ivec3 read_dir = -proc_sign; + +						ivec3 major_axis; +						if (proc_abs.x < proc_abs.y) { +							if (proc_abs.z < proc_abs.y) { +								major_axis = ivec3(0, 1, 0); +							} else { +								major_axis = ivec3(0, 0, 1); +							} +						} else { +							if (proc_abs.z < proc_abs.x) { +								major_axis = ivec3(1, 0, 0); +							} else { +								major_axis = ivec3(0, 0, 1); +							} +						} + +						float avg = 0.0; +						occ = 0.0; + +						ivec3 read_x = offset + ivec3(read_dir.x, 0, 0) + (proc_abs.x == 0 ? major_axis * read_dir : ivec3(0)); +						ivec3 read_y = offset + ivec3(0, read_dir.y, 0) + (proc_abs.y == 0 ? major_axis * read_dir : ivec3(0)); +						ivec3 read_z = offset + ivec3(0, 0, read_dir.z) + (proc_abs.z == 0 ? major_axis * read_dir : ivec3(0)); + +						uint facing_x = get_facing(read_x - region_offset); +						if (facing_x == 0) { +							if (all(greaterThanEqual(read_x, ivec3(0))) && all(lessThan(read_x, ivec3(params.grid_size)))) { +								occ += imageLoad(dst_occlusion[params.occlusion_index], read_x).r; +								avg += 1.0; +							} +						} else { +							if (proc_abs.x != 0) { //do not occlude from voxels in the opposite octant +								avg += 1.0; +							} +						} + +						uint facing_y = get_facing(read_y - region_offset); +						if (facing_y == 0) { +							if (all(greaterThanEqual(read_y, ivec3(0))) && all(lessThan(read_y, ivec3(params.grid_size)))) { +								occ += imageLoad(dst_occlusion[params.occlusion_index], read_y).r; +								avg += 1.0; +							} +						} else { +							if (proc_abs.y != 0) { +								avg += 1.0; +							} +						} + +						uint facing_z = get_facing(read_z - region_offset); +						if (facing_z == 0) { +							if (all(greaterThanEqual(read_z, ivec3(0))) && all(lessThan(read_z, ivec3(params.grid_size)))) { +								occ += imageLoad(dst_occlusion[params.occlusion_index], read_z).r; +								avg += 1.0; +							} +						} else { +							if (proc_abs.z != 0) { +								avg += 1.0; +							} +						} + +						if (avg > 0.0) { +							occ /= avg; +						} +					} + +					imageStore(dst_occlusion[params.occlusion_index], offset, vec4(occ)); +				} +			} +		} + +		groupMemoryBarrier(); +		barrier(); +	} +#if 1 +	//bias solid voxels away + +	for (int i = 0; i < 64; i++) { +		ivec3 local_offset = local_ofs + ((ivec3(i) >> ivec3(0, 2, 4)) & ivec3(3, 3, 3)); +		ivec3 offset = region_offset + local_offset; + +		if (all(greaterThanEqual(offset, ivec3(0))) && all(lessThan(offset, ivec3(params.grid_size)))) { +			uint facing = get_facing(local_offset); + +			if (facing != 0) { +				//only work on solids + +				ivec3 proc_pos = local_offset - ivec3(OCCLUSION_SIZE); +				proc_pos += mix(ivec3(0), ivec3(1), greaterThanEqual(proc_pos, ivec3(0))); + +				float avg = 0.0; +				float occ = 0.0; + +				ivec3 read_dir = -sign(proc_pos); +				ivec3 read_dir_x = ivec3(read_dir.x, 0, 0); +				ivec3 read_dir_y = ivec3(0, read_dir.y, 0); +				ivec3 read_dir_z = ivec3(0, 0, read_dir.z); +				//solid +#if 0 + +				uvec3 facing_pos_base = (uvec3(facing) >> uvec3(0,1,2)) & uvec3(1,1,1); +				uvec3 facing_neg_base = (uvec3(facing) >> uvec3(3,4,5)) & uvec3(1,1,1); +				uvec3 facing_pos=  facing_pos_base &((~facing_neg_base)&uvec3(1,1,1)); +				uvec3 facing_neg=  facing_neg_base &((~facing_pos_base)&uvec3(1,1,1)); +#else +				uvec3 facing_pos = (uvec3(facing) >> uvec3(0, 1, 2)) & uvec3(1, 1, 1); +				uvec3 facing_neg = (uvec3(facing) >> uvec3(3, 4, 5)) & uvec3(1, 1, 1); +#endif +				bvec3 read_valid = bvec3(mix(facing_neg, facing_pos, greaterThan(read_dir, ivec3(0)))); + +				//sides +				if (read_valid.x) { +					ivec3 read_offset = local_offset + read_dir_x; +					uint f = get_facing(read_offset); +					if (f == 0) { +						read_offset += region_offset; +						if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { +							occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; +							avg += 1.0; +						} +					} +				} + +				if (read_valid.y) { +					ivec3 read_offset = local_offset + read_dir_y; +					uint f = get_facing(read_offset); +					if (f == 0) { +						read_offset += region_offset; +						if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { +							occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; +							avg += 1.0; +						} +					} +				} + +				if (read_valid.z) { +					ivec3 read_offset = local_offset + read_dir_z; +					uint f = get_facing(read_offset); +					if (f == 0) { +						read_offset += region_offset; +						if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { +							occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; +							avg += 1.0; +						} +					} +				} + +				//adjacents + +				if (all(read_valid.yz)) { +					ivec3 read_offset = local_offset + read_dir_y + read_dir_z; +					uint f = get_facing(read_offset); +					if (f == 0) { +						read_offset += region_offset; +						if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { +							occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; +							avg += 1.0; +						} +					} +				} + +				if (all(read_valid.xz)) { +					ivec3 read_offset = local_offset + read_dir_x + read_dir_z; +					uint f = get_facing(read_offset); +					if (f == 0) { +						read_offset += region_offset; +						if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { +							occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; +							avg += 1.0; +						} +					} +				} + +				if (all(read_valid.xy)) { +					ivec3 read_offset = local_offset + read_dir_x + read_dir_y; +					uint f = get_facing(read_offset); +					if (f == 0) { +						read_offset += region_offset; +						if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { +							occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; +							avg += 1.0; +						} +					} +				} + +				//diagonal + +				if (all(read_valid)) { +					ivec3 read_offset = local_offset + read_dir; +					uint f = get_facing(read_offset); +					if (f == 0) { +						read_offset += region_offset; +						if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { +							occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; +							avg += 1.0; +						} +					} +				} + +				if (avg > 0.0) { +					occ /= avg; +				} + +				imageStore(dst_occlusion[params.occlusion_index], offset, vec4(occ)); +			} +		} +	} + +#endif + +#if 1 +	groupMemoryBarrier(); +	barrier(); + +	for (int i = 0; i < 64; i++) { +		ivec3 local_offset = local_ofs + ((ivec3(i) >> ivec3(0, 2, 4)) & ivec3(3, 3, 3)); +		ivec3 offset = region_offset + local_offset; + +		if (all(greaterThanEqual(offset, ivec3(0))) && all(lessThan(offset, ivec3(params.grid_size)))) { +			uint facing = get_facing(local_offset); + +			if (facing == 0) { +				ivec3 proc_pos = local_offset - ivec3(OCCLUSION_SIZE); +				proc_pos += mix(ivec3(0), ivec3(1), greaterThanEqual(proc_pos, ivec3(0))); + +				ivec3 proc_abs = abs(proc_pos); + +				ivec3 read_dir = sign(proc_pos); //opposite direction +				ivec3 read_dir_x = ivec3(read_dir.x, 0, 0); +				ivec3 read_dir_y = ivec3(0, read_dir.y, 0); +				ivec3 read_dir_z = ivec3(0, 0, read_dir.z); +				//solid +				uvec3 read_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), greaterThan(read_dir, ivec3(0))); //match positive with negative normals +				uvec3 block_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), lessThan(read_dir, ivec3(0))); //match positive with negative normals + +				block_mask = uvec3(0); + +				float visible = 0.0; +				float occlude_total = 0.0; + +				if (proc_abs.x < OCCLUSION_SIZE) { +					ivec3 read_offset = local_offset + read_dir_x; +					uint x_mask = get_facing(read_offset); +					if (x_mask != 0) { +						read_offset += region_offset; +						if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { +							occlude_total += 1.0; +							if (bool(x_mask & read_mask.x) && !bool(x_mask & block_mask.x)) { +								visible += 1.0; +							} +						} +					} +				} + +				if (proc_abs.y < OCCLUSION_SIZE) { +					ivec3 read_offset = local_offset + read_dir_y; +					uint y_mask = get_facing(read_offset); +					if (y_mask != 0) { +						read_offset += region_offset; +						if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { +							occlude_total += 1.0; +							if (bool(y_mask & read_mask.y) && !bool(y_mask & block_mask.y)) { +								visible += 1.0; +							} +						} +					} +				} + +				if (proc_abs.z < OCCLUSION_SIZE) { +					ivec3 read_offset = local_offset + read_dir_z; +					uint z_mask = get_facing(read_offset); +					if (z_mask != 0) { +						read_offset += region_offset; +						if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { +							occlude_total += 1.0; +							if (bool(z_mask & read_mask.z) && !bool(z_mask & block_mask.z)) { +								visible += 1.0; +							} +						} +					} +				} + +				//if near the cartesian plane, test in opposite direction too + +				read_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), lessThan(read_dir, ivec3(0))); //match negative with positive normals +				block_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), greaterThan(read_dir, ivec3(0))); //match negative with positive normals +				block_mask = uvec3(0); + +				if (proc_abs.x == 1) { +					ivec3 read_offset = local_offset - read_dir_x; +					uint x_mask = get_facing(read_offset); +					if (x_mask != 0) { +						read_offset += region_offset; +						if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { +							occlude_total += 1.0; +							if (bool(x_mask & read_mask.x) && !bool(x_mask & block_mask.x)) { +								visible += 1.0; +							} +						} +					} +				} + +				if (proc_abs.y == 1) { +					ivec3 read_offset = local_offset - read_dir_y; +					uint y_mask = get_facing(read_offset); +					if (y_mask != 0) { +						read_offset += region_offset; +						if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { +							occlude_total += 1.0; +							if (bool(y_mask & read_mask.y) && !bool(y_mask & block_mask.y)) { +								visible += 1.0; +							} +						} +					} +				} + +				if (proc_abs.z == 1) { +					ivec3 read_offset = local_offset - read_dir_z; +					uint z_mask = get_facing(read_offset); +					if (z_mask != 0) { +						read_offset += region_offset; +						if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { +							occlude_total += 1.0; +							if (bool(z_mask & read_mask.z) && !bool(z_mask & block_mask.z)) { +								visible += 1.0; +							} +						} +					} +				} + +				if (occlude_total > 0.0) { +					float occ = imageLoad(dst_occlusion[params.occlusion_index], offset).r; +					occ *= visible / occlude_total; +					imageStore(dst_occlusion[params.occlusion_index], offset, vec4(occ)); +				} +			} +		} +	} + +#endif + +	/* +	for(int i=0;i<8;i++) { +		ivec3 local_offset = local_pos + ((ivec3(i) >> ivec3(2,1,0)) & ivec3(1,1,1)) * OCCLUSION_SIZE; +		ivec3 offset = local_offset - ivec3(OCCLUSION_SIZE); //looking around probe, so starts negative +		offset += region * OCCLUSION_SIZE * 2; //offset by region +		offset += params.probe_offset * OCCLUSION_SIZE; // offset by probe offset +		if (all(greaterThanEqual(offset,ivec3(0))) && all(lessThan(offset,ivec3(params.grid_size)))) { +			imageStore(dst_occlusion[params.occlusion_index],offset,vec4( occlusion_data[ to_linear(local_offset) ]  )); +			//imageStore(dst_occlusion[params.occlusion_index],offset,vec4( occlusion_solid[ to_linear(local_offset) ] )); +		} +	} +*/ + +#endif + +#ifdef MODE_STORE + +	ivec3 local = ivec3(gl_LocalInvocationID.xyz); +	ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); +	// store SDF +	uvec4 p = imageLoad(src_positions, pos); + +	bool solid = false; +	float d; +	if (ivec3(p.xyz) == pos) { +		//solid block +		d = 0; +		solid = true; +	} else { +		//distance block +		d = 1.0 + length(vec3(p.xyz) - vec3(pos)); +	} + +	d /= 255.0; + +	imageStore(dst_sdf, pos, vec4(d)); + +	// STORE OCCLUSION + +	uint occlusion = 0; +	const uint occlusion_shift[8] = uint[](12, 8, 4, 0, 28, 24, 20, 16); +	for (int i = 0; i < 8; i++) { +		float occ = imageLoad(src_occlusion[i], pos).r; +		occlusion |= uint(clamp(occ * 15.0, 0.0, 15.0)) << occlusion_shift[i]; +	} +	{ +		ivec3 occ_pos = pos; +		occ_pos.z += params.cascade * params.grid_size; +		imageStore(dst_occlusion, occ_pos, uvec4(occlusion & 0xFFFF)); +		occ_pos.x += params.grid_size; +		imageStore(dst_occlusion, occ_pos, uvec4(occlusion >> 16)); +	} + +	// STORE POSITIONS + +	if (local == ivec3(0)) { +		store_position_count = 0; //base one stores as zero, the others wait +	} + +	groupMemoryBarrier(); +	barrier(); + +	if (solid) { +		uint index = atomicAdd(store_position_count, 1); +		// At least do the conversion work in parallel +		store_positions[index].position = uint(pos.x | (pos.y << 7) | (pos.z << 14)); + +		//see around which voxels point to this one, add them to the list +		uint bit_index = 0; +		uint neighbour_bits = 0; +		for (int i = -1; i <= 1; i++) { +			for (int j = -1; j <= 1; j++) { +				for (int k = -1; k <= 1; k++) { +					if (i == 0 && j == 0 && k == 0) { +						continue; +					} +					ivec3 npos = pos + ivec3(i, j, k); +					if (all(greaterThanEqual(npos, ivec3(0))) && all(lessThan(npos, ivec3(params.grid_size)))) { +						p = imageLoad(src_positions, npos); +						if (ivec3(p.xyz) == pos) { +							neighbour_bits |= (1 << bit_index); +						} +					} +					bit_index++; +				} +			} +		} + +		uint rgb = imageLoad(src_albedo, pos).r; +		uint facing = imageLoad(src_facing, pos).r; + +		store_positions[index].albedo = rgb >> 1; //store as it comes (555) to avoid precision loss (and move away the alpha bit) +		store_positions[index].albedo |= (facing & 0x3F) << 15; // store facing in bits 15-21 + +		store_positions[index].albedo |= neighbour_bits << 21; //store lower 11 bits of neighbours with remaining albedo +		store_positions[index].position |= (neighbour_bits >> 11) << 21; //store 11 bits more of neighbours with position + +		store_positions[index].light = imageLoad(src_light, pos).r; +		store_positions[index].light_aniso = imageLoad(src_light_aniso, pos).r; +		//add neighbours +		store_positions[index].light |= (neighbour_bits >> 22) << 30; //store 2 bits more of neighbours with light +		store_positions[index].light_aniso |= (neighbour_bits >> 24) << 30; //store 2 bits more of neighbours with aniso +	} + +	groupMemoryBarrier(); +	barrier(); + +	// global increment only once per group, to reduce pressure + +	if (local == ivec3(0) && store_position_count > 0) { +		store_from_index = atomicAdd(dispatch_data.total_count, store_position_count); +		uint group_count = (store_from_index + store_position_count - 1) / 64 + 1; +		atomicMax(dispatch_data.x, group_count); +	} + +	groupMemoryBarrier(); +	barrier(); + +	uint read_index = uint(local.z * 4 * 4 + local.y * 4 + local.x); +	uint write_index = store_from_index + read_index; + +	if (read_index < store_position_count) { +		dst_process_voxels.data[write_index] = store_positions[read_index]; +	} + +	if (pos == ivec3(0)) { +		//this thread clears y and z +		dispatch_data.y = 1; +		dispatch_data.z = 1; +	} +#endif +} diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 55b65d2747..7c70148180 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -212,10 +212,14 @@ RID RenderingDevice::_render_pipeline_create(RID p_shader, FramebufferFormatID p  	return render_pipeline_create(p_shader, p_framebuffer_format, p_vertex_format, p_render_primitive, rasterization_state, multisample_state, depth_stencil_state, color_blend_state, p_dynamic_state_flags);  } -Vector<int64_t> RenderingDevice::_draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region) { +Vector<int64_t> RenderingDevice::_draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const TypedArray<RID> &p_storage_textures) {  	Vector<DrawListID> splits;  	splits.resize(p_splits); -	draw_list_begin_split(p_framebuffer, p_splits, splits.ptrw(), p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region); +	Vector<RID> stextures; +	for (int i = 0; i < p_storage_textures.size(); i++) { +		stextures.push_back(p_storage_textures[i]); +	} +	draw_list_begin_split(p_framebuffer, p_splits, splits.ptrw(), p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, stextures);  	Vector<int64_t> split_ids;  	split_ids.resize(splits.size()); @@ -236,6 +240,10 @@ void RenderingDevice::_compute_list_set_push_constant(ComputeListID p_list, cons  	compute_list_set_push_constant(p_list, p_data.ptr(), p_data_size);  } +void RenderingDevice::compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads, uint32_t p_x_local_group, uint32_t p_y_local_group, uint32_t p_z_local_group) { +	compute_list_dispatch(p_list, (p_x_threads - 1) / p_x_local_group + 1, (p_y_threads - 1) / p_y_local_group + 1, (p_z_threads - 1) / p_z_local_group + 1); +} +  void RenderingDevice::_bind_methods() {  	ClassDB::bind_method(D_METHOD("texture_create", "format", "view", "data"), &RenderingDevice::_texture_create, DEFVAL(Array()));  	ClassDB::bind_method(D_METHOD("texture_create_shared", "view", "with_texture"), &RenderingDevice::_texture_create_shared); @@ -254,8 +262,10 @@ void RenderingDevice::_bind_methods() {  	ClassDB::bind_method(D_METHOD("texture_resolve_multisample", "from_texture", "to_texture", "sync_with_draw"), &RenderingDevice::texture_resolve_multisample, DEFVAL(false));  	ClassDB::bind_method(D_METHOD("framebuffer_format_create", "attachments"), &RenderingDevice::_framebuffer_format_create); +	ClassDB::bind_method(D_METHOD("framebuffer_format_create_empty", "size"), &RenderingDevice::framebuffer_format_create_empty);  	ClassDB::bind_method(D_METHOD("framebuffer_format_get_texture_samples", "format"), &RenderingDevice::framebuffer_format_get_texture_samples);  	ClassDB::bind_method(D_METHOD("framebuffer_create", "textures", "validate_with_format"), &RenderingDevice::_framebuffer_create, DEFVAL(INVALID_FORMAT_ID)); +	ClassDB::bind_method(D_METHOD("framebuffer_create_empty", "size", "validate_with_format"), &RenderingDevice::framebuffer_create_empty, DEFVAL(INVALID_FORMAT_ID));  	ClassDB::bind_method(D_METHOD("framebuffer_get_format", "framebuffer"), &RenderingDevice::framebuffer_get_format);  	ClassDB::bind_method(D_METHOD("sampler_create", "state"), &RenderingDevice::_sampler_create); @@ -292,8 +302,8 @@ void RenderingDevice::_bind_methods() {  	ClassDB::bind_method(D_METHOD("draw_list_begin_for_screen", "screen", "clear_color"), &RenderingDevice::draw_list_begin_for_screen, DEFVAL(DisplayServer::MAIN_WINDOW_ID), DEFVAL(Color())); -	ClassDB::bind_method(D_METHOD("draw_list_begin", "framebuffer", "initial_color_action", "final_color_action", "initial_depth_action", "final_depth_action", "clear_color_values", "clear_depth", "clear_stencil", "region"), &RenderingDevice::draw_list_begin, DEFVAL(Vector<Color>()), DEFVAL(1.0), DEFVAL(0), DEFVAL(Rect2i())); -	ClassDB::bind_method(D_METHOD("draw_list_begin_split", "framebuffer", "splits", "initial_color_action", "final_color_action", "initial_depth_action", "final_depth_action", "clear_color_values", "clear_depth", "clear_stencil", "region"), &RenderingDevice::_draw_list_begin_split, DEFVAL(Vector<Color>()), DEFVAL(1.0), DEFVAL(0), DEFVAL(Rect2i())); +	ClassDB::bind_method(D_METHOD("draw_list_begin", "framebuffer", "initial_color_action", "final_color_action", "initial_depth_action", "final_depth_action", "clear_color_values", "clear_depth", "clear_stencil", "region", "storage_textures"), &RenderingDevice::draw_list_begin, DEFVAL(Vector<Color>()), DEFVAL(1.0), DEFVAL(0), DEFVAL(Rect2i()), DEFVAL(TypedArray<RID>())); +	ClassDB::bind_method(D_METHOD("draw_list_begin_split", "framebuffer", "splits", "initial_color_action", "final_color_action", "initial_depth_action", "final_depth_action", "clear_color_values", "clear_depth", "clear_stencil", "region", "storage_textures"), &RenderingDevice::_draw_list_begin_split, DEFVAL(Vector<Color>()), DEFVAL(1.0), DEFVAL(0), DEFVAL(Rect2i()), DEFVAL(TypedArray<RID>()));  	ClassDB::bind_method(D_METHOD("draw_list_bind_render_pipeline", "draw_list", "render_pipeline"), &RenderingDevice::draw_list_bind_render_pipeline);  	ClassDB::bind_method(D_METHOD("draw_list_bind_uniform_set", "draw_list", "uniform_set", "set_index"), &RenderingDevice::draw_list_bind_uniform_set); @@ -625,6 +635,8 @@ void RenderingDevice::_bind_methods() {  	BIND_ENUM_CONSTANT(INDEX_BUFFER_FORMAT_UINT16);  	BIND_ENUM_CONSTANT(INDEX_BUFFER_FORMAT_UINT32); +	BIND_ENUM_CONSTANT(STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT); +  	BIND_ENUM_CONSTANT(UNIFORM_TYPE_SAMPLER); //for sampling only (sampler GLSL type)  	BIND_ENUM_CONSTANT(UNIFORM_TYPE_SAMPLER_WITH_TEXTURE); // for sampling only); but includes a texture); (samplerXX GLSL type)); first a sampler then a texture  	BIND_ENUM_CONSTANT(UNIFORM_TYPE_TEXTURE); //only texture); (textureXX GLSL type) diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index ee39ee11ed..72afc7c621 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -467,9 +467,11 @@ public:  	// This ID is warranted to be unique for the same formats, does not need to be freed  	virtual FramebufferFormatID framebuffer_format_create(const Vector<AttachmentFormat> &p_format) = 0; +	virtual FramebufferFormatID framebuffer_format_create_empty(const Size2i &p_size) = 0;  	virtual TextureSamples framebuffer_format_get_texture_samples(FramebufferFormatID p_format) = 0;  	virtual RID framebuffer_create(const Vector<RID> &p_texture_attachments, FramebufferFormatID p_format_check = INVALID_ID) = 0; +	virtual RID framebuffer_create_empty(const Size2i &p_size, FramebufferFormatID p_format_check = INVALID_ID) = 0;  	virtual FramebufferFormatID framebuffer_get_format(RID p_framebuffer) = 0; @@ -618,8 +620,12 @@ public:  		UNIFORM_TYPE_MAX  	}; +	enum StorageBufferUsage { +		STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT = 1 +	}; +  	virtual RID uniform_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data = Vector<uint8_t>()) = 0; -	virtual RID storage_buffer_create(uint32_t p_size, const Vector<uint8_t> &p_data = Vector<uint8_t>()) = 0; +	virtual RID storage_buffer_create(uint32_t p_size, const Vector<uint8_t> &p_data = Vector<uint8_t>(), uint32_t p_usage = 0) = 0;  	virtual RID texture_buffer_create(uint32_t p_size_elements, DataFormat p_format, const Vector<uint8_t> &p_data = Vector<uint8_t>()) = 0;  	struct Uniform { @@ -940,8 +946,8 @@ public:  	typedef int64_t DrawListID;  	virtual DrawListID draw_list_begin_for_screen(DisplayServer::WindowID p_screen = 0, const Color &p_clear_color = Color()) = 0; -	virtual DrawListID draw_list_begin(RID p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values = Vector<Color>(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2()) = 0; -	virtual Error draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, DrawListID *r_split_ids, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values = Vector<Color>(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2()) = 0; +	virtual DrawListID draw_list_begin(RID p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values = Vector<Color>(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2(), const Vector<RID> &p_storage_textures = Vector<RID>()) = 0; +	virtual Error draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, DrawListID *r_split_ids, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values = Vector<Color>(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2(), const Vector<RID> &p_storage_textures = Vector<RID>()) = 0;  	virtual void draw_list_bind_render_pipeline(DrawListID p_list, RID p_render_pipeline) = 0;  	virtual void draw_list_bind_uniform_set(DrawListID p_list, RID p_uniform_set, uint32_t p_index) = 0; @@ -968,10 +974,14 @@ public:  	virtual void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index) = 0;  	virtual void compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size) = 0;  	virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) = 0; +	virtual void compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads, uint32_t p_x_local_group, uint32_t p_y_local_group, uint32_t p_z_local_group); +	virtual void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset) = 0;  	virtual void compute_list_add_barrier(ComputeListID p_list) = 0;  	virtual void compute_list_end() = 0; +	virtual void full_barrier() = 0; +  	/***************/  	/**** FREE! ****/  	/***************/ @@ -1070,7 +1080,7 @@ protected:  	RID _render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const Ref<RDPipelineRasterizationState> &p_rasterization_state, const Ref<RDPipelineMultisampleState> &p_multisample_state, const Ref<RDPipelineDepthStencilState> &p_depth_stencil_state, const Ref<RDPipelineColorBlendState> &p_blend_state, int p_dynamic_state_flags = 0); -	Vector<int64_t> _draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values = Vector<Color>(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2()); +	Vector<int64_t> _draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values = Vector<Color>(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2(), const TypedArray<RID> &p_storage_textures = TypedArray<RID>());  	void _draw_list_set_push_constant(DrawListID p_list, const Vector<uint8_t> &p_data, uint32_t p_data_size);  	void _compute_list_set_push_constant(ComputeListID p_list, const Vector<uint8_t> &p_data, uint32_t p_data_size);  }; @@ -1089,6 +1099,7 @@ VARIANT_ENUM_CAST(RenderingDevice::SamplerRepeatMode)  VARIANT_ENUM_CAST(RenderingDevice::SamplerBorderColor)  VARIANT_ENUM_CAST(RenderingDevice::VertexFrequency)  VARIANT_ENUM_CAST(RenderingDevice::IndexBufferFormat) +VARIANT_ENUM_CAST(RenderingDevice::StorageBufferUsage)  VARIANT_ENUM_CAST(RenderingDevice::UniformType)  VARIANT_ENUM_CAST(RenderingDevice::RenderPrimitive)  VARIANT_ENUM_CAST(RenderingDevice::PolygonCullMode) diff --git a/servers/rendering/rendering_server_raster.cpp b/servers/rendering/rendering_server_raster.cpp index d30160702b..b12e2ff3c1 100644 --- a/servers/rendering/rendering_server_raster.cpp +++ b/servers/rendering/rendering_server_raster.cpp @@ -222,6 +222,10 @@ bool RenderingServerRaster::has_feature(Features p_feature) const {  	return false;  } +void RenderingServerRaster::sdfgi_set_debug_probe_select(const Vector3 &p_position, const Vector3 &p_dir) { +	RSG::scene_render->sdfgi_set_debug_probe_select(p_position, p_dir); +} +  RID RenderingServerRaster::get_test_cube() {  	if (!test_cube.is_valid()) {  		test_cube = _make_test_cube(); diff --git a/servers/rendering/rendering_server_raster.h b/servers/rendering/rendering_server_raster.h index 8a3c55118d..27fc6b6f07 100644 --- a/servers/rendering/rendering_server_raster.h +++ b/servers/rendering/rendering_server_raster.h @@ -322,7 +322,8 @@ public:  	BIND2(light_set_negative, RID, bool)  	BIND2(light_set_cull_mask, RID, uint32_t)  	BIND2(light_set_reverse_cull_face_mode, RID, bool) -	BIND2(light_set_use_gi, RID, bool) +	BIND2(light_set_bake_mode, RID, LightBakeMode) +	BIND2(light_set_max_sdfgi_cascade, RID, uint32_t)  	BIND2(light_omni_set_shadow_mode, RID, LightOmniShadowMode) @@ -336,9 +337,9 @@ public:  	BIND2(reflection_probe_set_update_mode, RID, ReflectionProbeUpdateMode)  	BIND2(reflection_probe_set_intensity, RID, float) -	BIND2(reflection_probe_set_interior_ambient, RID, const Color &) -	BIND2(reflection_probe_set_interior_ambient_energy, RID, float) -	BIND2(reflection_probe_set_interior_ambient_probe_contribution, RID, float) +	BIND2(reflection_probe_set_ambient_color, RID, const Color &) +	BIND2(reflection_probe_set_ambient_energy, RID, float) +	BIND2(reflection_probe_set_ambient_mode, RID, ReflectionProbeAmbientMode)  	BIND2(reflection_probe_set_max_distance, RID, float)  	BIND2(reflection_probe_set_extents, RID, const Vector3 &)  	BIND2(reflection_probe_set_origin_offset, RID, const Vector3 &) @@ -523,6 +524,7 @@ public:  #define BINDBASE RSG::scene_render  	BIND1(directional_shadow_atlas_set_size, int) +	BIND1(gi_probe_set_quality, GIProbeQuality)  	/* SKY API */ @@ -564,9 +566,13 @@ public:  	BIND7(environment_set_fog_depth, RID, bool, float, float, float, bool, float)  	BIND5(environment_set_fog_height, RID, bool, float, float, float) +	BIND12(environment_set_sdfgi, RID, bool, EnvironmentSDFGICascades, float, EnvironmentSDFGIYScale, bool, bool, bool, bool, float, float, float) +	BIND1(environment_set_sdfgi_ray_count, EnvironmentSDFGIRayCount) +	BIND1(environment_set_sdfgi_frames_to_converge, EnvironmentSDFGIFramesToConverge) +  	BIND3R(Ref<Image>, environment_bake_panorama, RID, bool, const Size2i &) -	BIND2(screen_space_roughness_limiter_set_active, bool, float) +	BIND3(screen_space_roughness_limiter_set_active, bool, float, float)  	BIND1(sub_surface_scattering_set_quality, SubSurfaceScatteringQuality)  	BIND2(sub_surface_scattering_set_scale, float, float) @@ -793,6 +799,8 @@ public:  	virtual bool is_low_end() const; +	virtual void sdfgi_set_debug_probe_select(const Vector3 &p_position, const Vector3 &p_dir); +  	RenderingServerRaster();  	~RenderingServerRaster(); diff --git a/servers/rendering/rendering_server_scene.cpp b/servers/rendering/rendering_server_scene.cpp index 7b8504036e..75a5834791 100644 --- a/servers/rendering/rendering_server_scene.cpp +++ b/servers/rendering/rendering_server_scene.cpp @@ -369,6 +369,11 @@ void RenderingServerScene::instance_set_base(RID p_instance, RID p_base) {  		switch (instance->base_type) {  			case RS::INSTANCE_LIGHT: {  				InstanceLightData *light = static_cast<InstanceLightData *>(instance->base_data); + +				if (RSG::storage->light_get_type(instance->base) != RS::LIGHT_DIRECTIONAL && light->bake_mode == RS::LIGHT_BAKE_DYNAMIC) { +					instance->scenario->dynamic_lights.erase(light->instance); +				} +  #ifdef DEBUG_ENABLED  				if (light->geometries.size()) {  					ERR_PRINT("BUG, indexing did not unpair geometries from light."); @@ -976,7 +981,26 @@ void RenderingServerScene::_update_instance(Instance *p_instance) {  		InstanceLightData *light = static_cast<InstanceLightData *>(p_instance->base_data);  		RSG::scene_render->light_instance_set_transform(light->instance, p_instance->transform); +		RSG::scene_render->light_instance_set_aabb(light->instance, p_instance->transform.xform(p_instance->aabb));  		light->shadow_dirty = true; + +		RS::LightBakeMode bake_mode = RSG::storage->light_get_bake_mode(p_instance->base); +		if (RSG::storage->light_get_type(p_instance->base) != RS::LIGHT_DIRECTIONAL && bake_mode != light->bake_mode) { +			if (light->bake_mode == RS::LIGHT_BAKE_DYNAMIC) { +				p_instance->scenario->dynamic_lights.erase(light->instance); +			} + +			light->bake_mode = bake_mode; + +			if (light->bake_mode == RS::LIGHT_BAKE_DYNAMIC) { +				p_instance->scenario->dynamic_lights.push_back(light->instance); +			} +		} + +		uint32_t max_sdfgi_cascade = RSG::storage->light_get_max_sdfgi_cascade(p_instance->base); +		if (light->max_sdfgi_cascade != max_sdfgi_cascade) { +			light->max_sdfgi_cascade = max_sdfgi_cascade; //should most likely make sdfgi dirty in scenario +		}  	}  	if (p_instance->base_type == RS::INSTANCE_REFLECTION_PROBE) { @@ -1788,8 +1812,10 @@ void RenderingServerScene::render_camera(RID p_render_buffers, RID p_camera, RID  		} break;  	} -	_prepare_scene(camera->transform, camera_matrix, ortho, camera->vaspect, camera->env, camera->effects, camera->visible_layers, p_scenario, p_shadow_atlas, RID()); -	_render_scene(p_render_buffers, camera->transform, camera_matrix, ortho, camera->env, camera->effects, p_scenario, p_shadow_atlas, RID(), -1); +	RID environment = _render_get_environment(p_camera, p_scenario); + +	_prepare_scene(camera->transform, camera_matrix, ortho, camera->vaspect, p_render_buffers, environment, camera->visible_layers, p_scenario, p_shadow_atlas, RID()); +	_render_scene(p_render_buffers, camera->transform, camera_matrix, ortho, environment, camera->effects, p_scenario, p_shadow_atlas, RID(), -1);  #endif  } @@ -1808,6 +1834,8 @@ void RenderingServerScene::render_camera(RID p_render_buffers, Ref<XRInterface>  	Transform world_origin = XRServer::get_singleton()->get_world_origin();  	Transform cam_transform = p_interface->get_transform_for_eye(p_eye, world_origin); +	RID environment = _render_get_environment(p_camera, p_scenario); +  	// For stereo render we only prepare for our left eye and then reuse the outcome for our right eye  	if (p_eye == XRInterface::EYE_LEFT) {  		// Center our transform, we assume basis is equal. @@ -1865,17 +1893,17 @@ void RenderingServerScene::render_camera(RID p_render_buffers, Ref<XRInterface>  		mono_transform *= apply_z_shift;  		// now prepare our scene with our adjusted transform projection matrix -		_prepare_scene(mono_transform, combined_matrix, false, false, camera->env, camera->effects, camera->visible_layers, p_scenario, p_shadow_atlas, RID()); +		_prepare_scene(mono_transform, combined_matrix, false, false, p_render_buffers, environment, camera->visible_layers, p_scenario, p_shadow_atlas, RID());  	} else if (p_eye == XRInterface::EYE_MONO) {  		// For mono render, prepare as per usual -		_prepare_scene(cam_transform, camera_matrix, false, false, camera->env, camera->effects, camera->visible_layers, p_scenario, p_shadow_atlas, RID()); +		_prepare_scene(cam_transform, camera_matrix, false, false, p_render_buffers, environment, camera->visible_layers, p_scenario, p_shadow_atlas, RID());  	}  	// And render our scene... -	_render_scene(p_render_buffers, cam_transform, camera_matrix, false, camera->env, camera->effects, p_scenario, p_shadow_atlas, RID(), -1); +	_render_scene(p_render_buffers, cam_transform, camera_matrix, false, environment, camera->effects, p_scenario, p_shadow_atlas, RID(), -1);  }; -void RenderingServerScene::_prepare_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_force_environment, RID p_force_camera_effects, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, bool p_using_shadows) { +void RenderingServerScene::_prepare_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, bool p_using_shadows) {  	// Note, in stereo rendering:  	// - p_cam_transform will be a transform in the middle of our two eyes  	// - p_cam_projection is a wider frustrum that encompasses both eyes @@ -1887,6 +1915,10 @@ void RenderingServerScene::_prepare_scene(const Transform p_cam_transform, const  	RSG::scene_render->set_scene_pass(render_pass); +	if (p_render_buffers.is_valid()) { +		RSG::scene_render->sdfgi_update(p_render_buffers, p_environment, p_cam_transform.origin); //update conditions for SDFGI (whether its used or not) +	} +  	RENDER_TIMESTAMP("Frustum Culling");  	//rasterizer->set_camera(camera->transform, camera_matrix,ortho); @@ -2224,22 +2256,97 @@ void RenderingServerScene::_prepare_scene(const Transform p_cam_transform, const  			}  		}  	} + +	/* UPDATE SDFGI */ + +	if (p_render_buffers.is_valid()) { +		uint32_t cascade_index[8]; +		uint32_t cascade_sizes[8]; +		const RID *cascade_ptrs[8]; +		uint32_t cascade_count = 0; +		uint32_t sdfgi_light_cull_count = 0; + +		uint32_t prev_cascade = 0xFFFFFFFF; +		for (int i = 0; i < RSG::scene_render->sdfgi_get_pending_region_count(p_render_buffers); i++) { +			AABB region = RSG::scene_render->sdfgi_get_pending_region_bounds(p_render_buffers, i); +			uint32_t region_cascade = RSG::scene_render->sdfgi_get_pending_region_cascade(p_render_buffers, i); + +			if (region_cascade != prev_cascade) { +				cascade_sizes[cascade_count] = 0; +				cascade_index[cascade_count] = region_cascade; +				cascade_ptrs[cascade_count] = &sdfgi_light_cull_result[sdfgi_light_cull_count]; +				cascade_count++; +				sdfgi_light_cull_pass++; +				prev_cascade = region_cascade; +			} +			uint32_t sdfgi_cull_count = scenario->octree.cull_aabb(region, instance_shadow_cull_result, MAX_INSTANCE_CULL); + +			for (uint32_t j = 0; j < sdfgi_cull_count; j++) { +				Instance *ins = instance_shadow_cull_result[j]; + +				bool keep = false; + +				if (ins->base_type == RS::INSTANCE_LIGHT && ins->visible) { +					InstanceLightData *instance_light = (InstanceLightData *)ins->base_data; +					if (instance_light->bake_mode != RS::LIGHT_BAKE_STATIC || region_cascade > instance_light->max_sdfgi_cascade) { +						continue; +					} + +					if (sdfgi_light_cull_pass != instance_light->sdfgi_cascade_light_pass && sdfgi_light_cull_count < MAX_LIGHTS_CULLED) { +						instance_light->sdfgi_cascade_light_pass = sdfgi_light_cull_pass; +						sdfgi_light_cull_result[sdfgi_light_cull_count++] = instance_light->instance; +						cascade_sizes[cascade_count - 1]++; +					} +				} else if ((1 << ins->base_type) & RS::INSTANCE_GEOMETRY_MASK) { +					if (ins->baked_light) { +						keep = true; +					} +				} + +				if (!keep) { +					// remove, no reason to keep +					sdfgi_cull_count--; +					SWAP(instance_shadow_cull_result[j], instance_shadow_cull_result[sdfgi_cull_count]); +					j--; +				} +			} + +			RSG::scene_render->render_sdfgi(p_render_buffers, i, (RasterizerScene::InstanceBase **)instance_shadow_cull_result, sdfgi_cull_count); +			//have to save updated cascades, then update static lights. +		} + +		if (sdfgi_light_cull_count) { +			RSG::scene_render->render_sdfgi_static_lights(p_render_buffers, cascade_count, cascade_index, cascade_ptrs, cascade_sizes); +		} + +		RSG::scene_render->sdfgi_update_probes(p_render_buffers, p_environment, directional_light_ptr, directional_light_count, scenario->dynamic_lights.ptr(), scenario->dynamic_lights.size()); +	}  } -void RenderingServerScene::_render_scene(RID p_render_buffers, const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, RID p_force_environment, RID p_force_camera_effects, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass) { -	Scenario *scenario = scenario_owner.getornull(p_scenario); +RID RenderingServerScene::_render_get_environment(RID p_camera, RID p_scenario) { +	Camera *camera = camera_owner.getornull(p_camera); +	if (camera && RSG::scene_render->is_environment(camera->env)) { +		return camera->env; +	} -	/* ENVIRONMENT */ +	Scenario *scenario = scenario_owner.getornull(p_scenario); +	if (!scenario) { +		return RID(); +	} +	if (RSG::scene_render->is_environment(scenario->environment)) { +		return scenario->environment; +	} -	RID environment; -	if (p_force_environment.is_valid()) { //camera has more environment priority -		environment = p_force_environment; -	} else if (scenario->environment.is_valid()) { -		environment = scenario->environment; -	} else { -		environment = scenario->fallback_environment; +	if (RSG::scene_render->is_environment(scenario->fallback_environment)) { +		return scenario->fallback_environment;  	} +	return RID(); +} + +void RenderingServerScene::_render_scene(RID p_render_buffers, const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, RID p_environment, RID p_force_camera_effects, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass) { +	Scenario *scenario = scenario_owner.getornull(p_scenario); +  	RID camera_effects;  	if (p_force_camera_effects.is_valid()) {  		camera_effects = p_force_camera_effects; @@ -2249,7 +2356,7 @@ void RenderingServerScene::_render_scene(RID p_render_buffers, const Transform p  	/* PROCESS GEOMETRY AND DRAW SCENE */  	RENDER_TIMESTAMP("Render Scene "); -	RSG::scene_render->render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_orthogonal, (RasterizerScene::InstanceBase **)instance_cull_result, instance_cull_count, light_instance_cull_result, light_cull_count + directional_light_count, reflection_probe_instance_cull_result, reflection_probe_cull_count, gi_probe_instance_cull_result, gi_probe_cull_count, decal_instance_cull_result, decal_cull_count, (RasterizerScene::InstanceBase **)lightmap_cull_result, lightmap_cull_count, environment, camera_effects, p_shadow_atlas, p_reflection_probe.is_valid() ? RID() : scenario->reflection_atlas, p_reflection_probe, p_reflection_probe_pass); +	RSG::scene_render->render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_orthogonal, (RasterizerScene::InstanceBase **)instance_cull_result, instance_cull_count, light_instance_cull_result, light_cull_count + directional_light_count, reflection_probe_instance_cull_result, reflection_probe_cull_count, gi_probe_instance_cull_result, gi_probe_cull_count, decal_instance_cull_result, decal_cull_count, (RasterizerScene::InstanceBase **)lightmap_cull_result, lightmap_cull_count, p_environment, camera_effects, p_shadow_atlas, p_reflection_probe.is_valid() ? RID() : scenario->reflection_atlas, p_reflection_probe, p_reflection_probe_pass);  }  void RenderingServerScene::render_empty_scene(RID p_render_buffers, RID p_scenario, RID p_shadow_atlas) { diff --git a/servers/rendering/rendering_server_scene.h b/servers/rendering/rendering_server_scene.h index f2e2918f21..165c3784c1 100644 --- a/servers/rendering/rendering_server_scene.h +++ b/servers/rendering/rendering_server_scene.h @@ -33,6 +33,8 @@  #include "servers/rendering/rasterizer.h" +#include "core/local_vector.h" +#include "core/math/geometry_3d.h"  #include "core/math/octree.h"  #include "core/os/semaphore.h"  #include "core/os/thread.h" @@ -121,6 +123,8 @@ public:  		SelfList<Instance>::List instances; +		LocalVector<RID> dynamic_lights; +  		Scenario() { debug = RS::SCENARIO_DEBUG_DISABLED; }  	}; @@ -309,7 +313,13 @@ public:  		Instance *baked_light; +		RS::LightBakeMode bake_mode; +		uint32_t max_sdfgi_cascade = 2; + +		uint64_t sdfgi_cascade_light_pass = 0; +  		InstanceLightData() { +			bake_mode = RS::LIGHT_BAKE_DISABLED;  			shadow_dirty = true;  			D = nullptr;  			last_version = 0; @@ -379,7 +389,9 @@ public:  	Instance *instance_cull_result[MAX_INSTANCE_CULL];  	Instance *instance_shadow_cull_result[MAX_INSTANCE_CULL]; //used for generating shadowmaps  	Instance *light_cull_result[MAX_LIGHTS_CULLED]; +	RID sdfgi_light_cull_result[MAX_LIGHTS_CULLED];  	RID light_instance_cull_result[MAX_LIGHTS_CULLED]; +	uint64_t sdfgi_light_cull_pass = 0;  	int light_cull_count;  	int directional_light_count;  	RID reflection_probe_instance_cull_result[MAX_REFLECTION_PROBES_CULLED]; @@ -438,9 +450,11 @@ public:  	_FORCE_INLINE_ bool _light_instance_update_shadow(Instance *p_instance, const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_shadow_atlas, Scenario *p_scenario); +	RID _render_get_environment(RID p_camera, RID p_scenario); +  	bool _render_reflection_probe_step(Instance *p_instance, int p_step); -	void _prepare_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_force_environment, RID p_force_camera_effects, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, bool p_using_shadows = true); -	void _render_scene(RID p_render_buffers, const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, RID p_force_environment, RID p_force_camera_effects, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass); +	void _prepare_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, bool p_using_shadows = true); +	void _render_scene(RID p_render_buffers, const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, RID p_environment, RID p_force_camera_effects, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass);  	void render_empty_scene(RID p_render_buffers, RID p_scenario, RID p_shadow_atlas);  	void render_camera(RID p_render_buffers, RID p_camera, RID p_scenario, Size2 p_viewport_size, RID p_shadow_atlas); diff --git a/servers/rendering/rendering_server_wrap_mt.h b/servers/rendering/rendering_server_wrap_mt.h index a746aa52b2..5c03fbc0eb 100644 --- a/servers/rendering/rendering_server_wrap_mt.h +++ b/servers/rendering/rendering_server_wrap_mt.h @@ -237,7 +237,8 @@ public:  	FUNC2(light_set_negative, RID, bool)  	FUNC2(light_set_cull_mask, RID, uint32_t)  	FUNC2(light_set_reverse_cull_face_mode, RID, bool) -	FUNC2(light_set_use_gi, RID, bool) +	FUNC2(light_set_bake_mode, RID, LightBakeMode) +	FUNC2(light_set_max_sdfgi_cascade, RID, uint32_t)  	FUNC2(light_omni_set_shadow_mode, RID, LightOmniShadowMode) @@ -251,9 +252,9 @@ public:  	FUNC2(reflection_probe_set_update_mode, RID, ReflectionProbeUpdateMode)  	FUNC2(reflection_probe_set_intensity, RID, float) -	FUNC2(reflection_probe_set_interior_ambient, RID, const Color &) -	FUNC2(reflection_probe_set_interior_ambient_energy, RID, float) -	FUNC2(reflection_probe_set_interior_ambient_probe_contribution, RID, float) +	FUNC2(reflection_probe_set_ambient_color, RID, const Color &) +	FUNC2(reflection_probe_set_ambient_energy, RID, float) +	FUNC2(reflection_probe_set_ambient_mode, RID, ReflectionProbeAmbientMode)  	FUNC2(reflection_probe_set_max_distance, RID, float)  	FUNC2(reflection_probe_set_extents, RID, const Vector3 &)  	FUNC2(reflection_probe_set_origin_offset, RID, const Vector3 &) @@ -321,6 +322,8 @@ public:  	FUNC2(gi_probe_set_anisotropy_strength, RID, float)  	FUNC1RC(float, gi_probe_get_anisotropy_strength, RID) +	FUNC1(gi_probe_set_quality, GIProbeQuality) +  	/* LIGHTMAP CAPTURE */  	FUNCRID(lightmap) @@ -465,6 +468,10 @@ public:  	FUNC2(environment_set_ssao_quality, EnvironmentSSAOQuality, bool) +	FUNC12(environment_set_sdfgi, RID, bool, EnvironmentSDFGICascades, float, EnvironmentSDFGIYScale, bool, bool, bool, bool, float, float, float) +	FUNC1(environment_set_sdfgi_ray_count, EnvironmentSDFGIRayCount) +	FUNC1(environment_set_sdfgi_frames_to_converge, EnvironmentSDFGIFramesToConverge) +  	FUNC11(environment_set_glow, RID, bool, int, float, float, float, float, EnvironmentGlowBlendMode, float, float, float)  	FUNC1(environment_glow_set_use_bicubic_upscale, bool) @@ -478,7 +485,7 @@ public:  	FUNC3R(Ref<Image>, environment_bake_panorama, RID, bool, const Size2i &) -	FUNC2(screen_space_roughness_limiter_set_active, bool, float) +	FUNC3(screen_space_roughness_limiter_set_active, bool, float, float)  	FUNC1(sub_surface_scattering_set_quality, SubSurfaceScatteringQuality)  	FUNC2(sub_surface_scattering_set_scale, float, float) @@ -714,6 +721,10 @@ public:  		return rendering_server->get_frame_profile();  	} +	virtual void sdfgi_set_debug_probe_select(const Vector3 &p_position, const Vector3 &p_dir) { +		rendering_server->sdfgi_set_debug_probe_select(p_position, p_dir); +	} +  	RenderingServerWrapMT(RenderingServer *p_contained, bool p_create_thread);  	~RenderingServerWrapMT(); diff --git a/servers/rendering/shader_language.cpp b/servers/rendering/shader_language.cpp index 14a28554f9..809b03520b 100644 --- a/servers/rendering/shader_language.cpp +++ b/servers/rendering/shader_language.cpp @@ -5585,6 +5585,12 @@ Error ShaderLanguage::_parse_block(BlockNode *p_block, const Map<StringName, Bui  		} else if (tk.type == TK_CF_RETURN) {  			//check return type  			BlockNode *b = p_block; + +			if (b && b->parent_function && (b->parent_function->name == "vertex" || b->parent_function->name == "fragment" || b->parent_function->name == "light")) { +				_set_error(vformat("Using 'return' in '%s' processor function results in undefined behavior!", b->parent_function->name)); +				return ERR_PARSE_ERROR; +			} +  			while (b && !b->parent_function) {  				b = b->parent_block;  			} diff --git a/servers/rendering/shader_types.cpp b/servers/rendering/shader_types.cpp index 2601efa9e2..06cb6171a5 100644 --- a/servers/rendering/shader_types.cpp +++ b/servers/rendering/shader_types.cpp @@ -114,6 +114,7 @@ ShaderTypes::ShaderTypes() {  	shader_modes[RS::SHADER_SPATIAL].functions["fragment"].built_ins["AO_LIGHT_AFFECT"] = ShaderLanguage::TYPE_FLOAT;  	shader_modes[RS::SHADER_SPATIAL].functions["fragment"].built_ins["EMISSION"] = ShaderLanguage::TYPE_VEC3;  	shader_modes[RS::SHADER_SPATIAL].functions["fragment"].built_ins["SCREEN_TEXTURE"] = ShaderLanguage::TYPE_SAMPLER2D; +	shader_modes[RS::SHADER_SPATIAL].functions["fragment"].built_ins["NORMAL_ROUGHNESS_TEXTURE"] = ShaderLanguage::TYPE_SAMPLER2D;  	shader_modes[RS::SHADER_SPATIAL].functions["fragment"].built_ins["DEPTH_TEXTURE"] = ShaderLanguage::TYPE_SAMPLER2D;  	shader_modes[RS::SHADER_SPATIAL].functions["fragment"].built_ins["DEPTH"] = ShaderLanguage::TYPE_FLOAT;  	shader_modes[RS::SHADER_SPATIAL].functions["fragment"].built_ins["SCREEN_UV"] = ShaderLanguage::TYPE_VEC2; diff --git a/servers/rendering_server.cpp b/servers/rendering_server.cpp index 7edab1418d..d4d5080109 100644 --- a/servers/rendering_server.cpp +++ b/servers/rendering_server.cpp @@ -1608,7 +1608,7 @@ void RenderingServer::_bind_methods() {  	ClassDB::bind_method(D_METHOD("light_set_negative", "light", "enable"), &RenderingServer::light_set_negative);  	ClassDB::bind_method(D_METHOD("light_set_cull_mask", "light", "mask"), &RenderingServer::light_set_cull_mask);  	ClassDB::bind_method(D_METHOD("light_set_reverse_cull_face_mode", "light", "enabled"), &RenderingServer::light_set_reverse_cull_face_mode); -	ClassDB::bind_method(D_METHOD("light_set_use_gi", "light", "enabled"), &RenderingServer::light_set_use_gi); +	ClassDB::bind_method(D_METHOD("light_set_bake_mode", "light", "bake_mode"), &RenderingServer::light_set_bake_mode);  	ClassDB::bind_method(D_METHOD("light_omni_set_shadow_mode", "light", "mode"), &RenderingServer::light_omni_set_shadow_mode); @@ -1619,9 +1619,9 @@ void RenderingServer::_bind_methods() {  	ClassDB::bind_method(D_METHOD("reflection_probe_create"), &RenderingServer::reflection_probe_create);  	ClassDB::bind_method(D_METHOD("reflection_probe_set_update_mode", "probe", "mode"), &RenderingServer::reflection_probe_set_update_mode);  	ClassDB::bind_method(D_METHOD("reflection_probe_set_intensity", "probe", "intensity"), &RenderingServer::reflection_probe_set_intensity); -	ClassDB::bind_method(D_METHOD("reflection_probe_set_interior_ambient", "probe", "color"), &RenderingServer::reflection_probe_set_interior_ambient); -	ClassDB::bind_method(D_METHOD("reflection_probe_set_interior_ambient_energy", "probe", "energy"), &RenderingServer::reflection_probe_set_interior_ambient_energy); -	ClassDB::bind_method(D_METHOD("reflection_probe_set_interior_ambient_probe_contribution", "probe", "contrib"), &RenderingServer::reflection_probe_set_interior_ambient_probe_contribution); +	ClassDB::bind_method(D_METHOD("reflection_probe_set_ambient_mode", "probe", "mode"), &RenderingServer::reflection_probe_set_ambient_mode); +	ClassDB::bind_method(D_METHOD("reflection_probe_set_ambient_color", "probe", "color"), &RenderingServer::reflection_probe_set_ambient_color); +	ClassDB::bind_method(D_METHOD("reflection_probe_set_ambient_energy", "probe", "energy"), &RenderingServer::reflection_probe_set_ambient_energy);  	ClassDB::bind_method(D_METHOD("reflection_probe_set_max_distance", "probe", "distance"), &RenderingServer::reflection_probe_set_max_distance);  	ClassDB::bind_method(D_METHOD("reflection_probe_set_extents", "probe", "extents"), &RenderingServer::reflection_probe_set_extents);  	ClassDB::bind_method(D_METHOD("reflection_probe_set_origin_offset", "probe", "offset"), &RenderingServer::reflection_probe_set_origin_offset); @@ -2060,9 +2060,11 @@ void RenderingServer::_bind_methods() {  	BIND_ENUM_CONSTANT(VIEWPORT_DEBUG_DRAW_DIRECTIONAL_SHADOW_ATLAS);  	BIND_ENUM_CONSTANT(VIEWPORT_DEBUG_DRAW_SCENE_LUMINANCE);  	BIND_ENUM_CONSTANT(VIEWPORT_DEBUG_DRAW_SSAO); -	BIND_ENUM_CONSTANT(VIEWPORT_DEBUG_DRAW_ROUGHNESS_LIMITER);  	BIND_ENUM_CONSTANT(VIEWPORT_DEBUG_DRAW_PSSM_SPLITS);  	BIND_ENUM_CONSTANT(VIEWPORT_DEBUG_DRAW_DECAL_ATLAS); +	BIND_ENUM_CONSTANT(VIEWPORT_DEBUG_DRAW_SDFGI); +	BIND_ENUM_CONSTANT(VIEWPORT_DEBUG_DRAW_SDFGI_PROBES); +	BIND_ENUM_CONSTANT(VIEWPORT_DEBUG_DRAW_GI_BUFFER);  	BIND_ENUM_CONSTANT(SKY_MODE_QUALITY);  	BIND_ENUM_CONSTANT(SKY_MODE_REALTIME); @@ -2346,7 +2348,7 @@ RenderingServer::RenderingServer() {  	GLOBAL_DEF("rendering/quality/gi_probes/anisotropic", false);  	GLOBAL_DEF("rendering/quality/gi_probes/quality", 1); -	ProjectSettings::get_singleton()->set_custom_property_info("rendering/quality/gi_probes/quality", PropertyInfo(Variant::INT, "rendering/quality/gi_probes/quality", PROPERTY_HINT_ENUM, "Lowest (1 Cone - Fast),Medium (4 Cones - Average),High (6 Cones - Slow)")); +	ProjectSettings::get_singleton()->set_custom_property_info("rendering/quality/gi_probes/quality", PropertyInfo(Variant::INT, "rendering/quality/gi_probes/quality", PROPERTY_HINT_ENUM, "Low (4 Cones - Fast),High (6 Cones - Slow)"));  	GLOBAL_DEF("rendering/quality/shading/force_vertex_shading", false);  	GLOBAL_DEF("rendering/quality/shading/force_vertex_shading.mobile", true); @@ -2372,10 +2374,11 @@ RenderingServer::RenderingServer() {  	ProjectSettings::get_singleton()->set_custom_property_info("rendering/quality/ssao/quality", PropertyInfo(Variant::INT, "rendering/quality/ssao/quality", PROPERTY_HINT_ENUM, "Low (Fast),Medium (Average),High (Slow),Ultra (Slower)"));  	GLOBAL_DEF("rendering/quality/ssao/half_size", false); -	GLOBAL_DEF("rendering/quality/screen_filters/screen_space_roughness_limiter", 0); -	ProjectSettings::get_singleton()->set_custom_property_info("rendering/quality/screen_filters/screen_space_roughness_limiter", PropertyInfo(Variant::INT, "rendering/quality/screen_filters/screen_space_roughness_limiter", PROPERTY_HINT_ENUM, "Disabled (Fast),Enabled (Average)")); -	GLOBAL_DEF("rendering/quality/screen_filters/screen_space_roughness_limiter_curve", 1.0); -	ProjectSettings::get_singleton()->set_custom_property_info("rendering/quality/screen_filters/screen_space_roughness_limiter_curve", PropertyInfo(Variant::FLOAT, "rendering/quality/screen_filters/screen_space_roughness_limiter_curve", PROPERTY_HINT_EXP_EASING, "0.01,8,0.01")); +	GLOBAL_DEF("rendering/quality/screen_filters/screen_space_roughness_limiter_enable", true); +	GLOBAL_DEF("rendering/quality/screen_filters/screen_space_roughness_limiter_amount", 0.25); +	GLOBAL_DEF("rendering/quality/screen_filters/screen_space_roughness_limiter_limit", 0.18); +	ProjectSettings::get_singleton()->set_custom_property_info("rendering/quality/screen_filters/screen_space_roughness_limiter_amount", PropertyInfo(Variant::FLOAT, "rendering/quality/screen_filters/screen_space_roughness_limiter_amount", PROPERTY_HINT_RANGE, "0.01,4.0,0.01")); +	ProjectSettings::get_singleton()->set_custom_property_info("rendering/quality/screen_filters/screen_space_roughness_limiter_limit", PropertyInfo(Variant::FLOAT, "rendering/quality/screen_filters/screen_space_roughness_limiter_limit", PROPERTY_HINT_RANGE, "0.01,1.0,0.01"));  	GLOBAL_DEF("rendering/quality/glow/upscale_mode", 1);  	ProjectSettings::get_singleton()->set_custom_property_info("rendering/quality/glow/upscale_mode", PropertyInfo(Variant::INT, "rendering/quality/glow/upscale_mode", PROPERTY_HINT_ENUM, "Linear (Fast),Bicubic (Slow)")); @@ -2395,6 +2398,11 @@ RenderingServer::RenderingServer() {  	GLOBAL_DEF("rendering/lightmapper/probe_capture_update_speed", 15);  	ProjectSettings::get_singleton()->set_custom_property_info("rendering/lightmapper/probe_capture_update_speed", PropertyInfo(Variant::FLOAT, "rendering/lightmapper/probe_capture_update_speed", PROPERTY_HINT_RANGE, "0.001,256,0.001")); + +	GLOBAL_DEF("rendering/sdfgi/probe_ray_count", 2); +	ProjectSettings::get_singleton()->set_custom_property_info("rendering/sdfgi/probe_ray_count", PropertyInfo(Variant::INT, "rendering/sdfgi/probe_ray_count", PROPERTY_HINT_ENUM, "8 (Fastest),16,32,64,96,128 (Slowest)")); +	GLOBAL_DEF("rendering/sdfgi/frames_to_converge", 1); +	ProjectSettings::get_singleton()->set_custom_property_info("rendering/sdfgi/frames_to_converge", PropertyInfo(Variant::INT, "rendering/sdfgi/frames_to_converge", PROPERTY_HINT_ENUM, "5 (Less Latency but Lower Quality),10,15,20,25,30 (More Latency but Higher Quality)"));  }  RenderingServer::~RenderingServer() { diff --git a/servers/rendering_server.h b/servers/rendering_server.h index 56a8325630..9fdaa8a93e 100644 --- a/servers/rendering_server.h +++ b/servers/rendering_server.h @@ -406,7 +406,15 @@ public:  	virtual void light_set_negative(RID p_light, bool p_enable) = 0;  	virtual void light_set_cull_mask(RID p_light, uint32_t p_mask) = 0;  	virtual void light_set_reverse_cull_face_mode(RID p_light, bool p_enabled) = 0; -	virtual void light_set_use_gi(RID p_light, bool p_enable) = 0; + +	enum LightBakeMode { +		LIGHT_BAKE_DISABLED, +		LIGHT_BAKE_DYNAMIC, +		LIGHT_BAKE_STATIC, +	}; + +	virtual void light_set_bake_mode(RID p_light, LightBakeMode p_bake_mode) = 0; +	virtual void light_set_max_sdfgi_cascade(RID p_light, uint32_t p_cascade) = 0;  	// omni light  	enum LightOmniShadowMode { @@ -445,9 +453,16 @@ public:  	virtual void reflection_probe_set_update_mode(RID p_probe, ReflectionProbeUpdateMode p_mode) = 0;  	virtual void reflection_probe_set_intensity(RID p_probe, float p_intensity) = 0; -	virtual void reflection_probe_set_interior_ambient(RID p_probe, const Color &p_color) = 0; -	virtual void reflection_probe_set_interior_ambient_energy(RID p_probe, float p_energy) = 0; -	virtual void reflection_probe_set_interior_ambient_probe_contribution(RID p_probe, float p_contrib) = 0; + +	enum ReflectionProbeAmbientMode { +		REFLECTION_PROBE_AMBIENT_DISABLED, +		REFLECTION_PROBE_AMBIENT_ENVIRONMENT, +		REFLECTION_PROBE_AMBIENT_COLOR +	}; + +	virtual void reflection_probe_set_ambient_mode(RID p_probe, ReflectionProbeAmbientMode p_mode) = 0; +	virtual void reflection_probe_set_ambient_color(RID p_probe, const Color &p_color) = 0; +	virtual void reflection_probe_set_ambient_energy(RID p_probe, float p_energy) = 0;  	virtual void reflection_probe_set_max_distance(RID p_probe, float p_distance) = 0;  	virtual void reflection_probe_set_extents(RID p_probe, const Vector3 &p_extents) = 0;  	virtual void reflection_probe_set_origin_offset(RID p_probe, const Vector3 &p_offset) = 0; @@ -522,6 +537,13 @@ public:  	virtual void gi_probe_set_anisotropy_strength(RID p_gi_probe, float p_strength) = 0;  	virtual float gi_probe_get_anisotropy_strength(RID p_gi_probe) const = 0; +	enum GIProbeQuality { +		GI_PROBE_QUALITY_LOW, +		GI_PROBE_QUALITY_HIGH, +	}; + +	virtual void gi_probe_set_quality(GIProbeQuality) = 0; +  	/* LIGHTMAP */  	virtual RID lightmap_create() = 0; @@ -690,9 +712,12 @@ public:  		VIEWPORT_DEBUG_DRAW_DIRECTIONAL_SHADOW_ATLAS,  		VIEWPORT_DEBUG_DRAW_SCENE_LUMINANCE,  		VIEWPORT_DEBUG_DRAW_SSAO, -		VIEWPORT_DEBUG_DRAW_ROUGHNESS_LIMITER,  		VIEWPORT_DEBUG_DRAW_PSSM_SPLITS,  		VIEWPORT_DEBUG_DRAW_DECAL_ATLAS, +		VIEWPORT_DEBUG_DRAW_SDFGI, +		VIEWPORT_DEBUG_DRAW_SDFGI_PROBES, +		VIEWPORT_DEBUG_DRAW_GI_BUFFER, +  	};  	virtual void viewport_set_debug_draw(RID p_viewport, ViewportDebugDraw p_draw) = 0; @@ -807,13 +832,51 @@ public:  	virtual void environment_set_ssao_quality(EnvironmentSSAOQuality p_quality, bool p_half_size) = 0; +	enum EnvironmentSDFGICascades { +		ENV_SDFGI_CASCADES_4, +		ENV_SDFGI_CASCADES_6, +		ENV_SDFGI_CASCADES_8, +	}; + +	enum EnvironmentSDFGIYScale { +		ENV_SDFGI_Y_SCALE_DISABLED, +		ENV_SDFGI_Y_SCALE_75_PERCENT, +		ENV_SDFGI_Y_SCALE_50_PERCENT +	}; + +	virtual void environment_set_sdfgi(RID p_env, bool p_enable, EnvironmentSDFGICascades p_cascades, float p_min_cell_size, EnvironmentSDFGIYScale p_y_scale, bool p_use_occlusion, bool p_use_multibounce, bool p_read_sky, bool p_enhance_ssr, float p_energy, float p_normal_bias, float p_probe_bias) = 0; + +	enum EnvironmentSDFGIRayCount { +		ENV_SDFGI_RAY_COUNT_8, +		ENV_SDFGI_RAY_COUNT_16, +		ENV_SDFGI_RAY_COUNT_32, +		ENV_SDFGI_RAY_COUNT_64, +		ENV_SDFGI_RAY_COUNT_96, +		ENV_SDFGI_RAY_COUNT_128, +		ENV_SDFGI_RAY_COUNT_MAX, +	}; + +	virtual void environment_set_sdfgi_ray_count(EnvironmentSDFGIRayCount p_ray_count) = 0; + +	enum EnvironmentSDFGIFramesToConverge { +		ENV_SDFGI_CONVERGE_IN_5_FRAMES, +		ENV_SDFGI_CONVERGE_IN_10_FRAMES, +		ENV_SDFGI_CONVERGE_IN_15_FRAMES, +		ENV_SDFGI_CONVERGE_IN_20_FRAMES, +		ENV_SDFGI_CONVERGE_IN_25_FRAMES, +		ENV_SDFGI_CONVERGE_IN_30_FRAMES, +		ENV_SDFGI_CONVERGE_MAX +	}; + +	virtual void environment_set_sdfgi_frames_to_converge(EnvironmentSDFGIFramesToConverge p_frames) = 0; +  	virtual void environment_set_fog(RID p_env, bool p_enable, const Color &p_color, const Color &p_sun_color, float p_sun_amount) = 0;  	virtual void environment_set_fog_depth(RID p_env, bool p_enable, float p_depth_begin, float p_depth_end, float p_depth_curve, bool p_transmit, float p_transmit_curve) = 0;  	virtual void environment_set_fog_height(RID p_env, bool p_enable, float p_min_height, float p_max_height, float p_height_curve) = 0;  	virtual Ref<Image> environment_bake_panorama(RID p_env, bool p_bake_irradiance, const Size2i &p_size) = 0; -	virtual void screen_space_roughness_limiter_set_active(bool p_enable, float p_curve) = 0; +	virtual void screen_space_roughness_limiter_set_active(bool p_enable, float p_amount, float p_limit) = 0;  	enum SubSurfaceScatteringQuality {  		SUB_SURFACE_SCATTERING_QUALITY_DISABLED, @@ -1207,6 +1270,8 @@ public:  	virtual RID get_test_texture();  	virtual RID get_white_texture(); +	virtual void sdfgi_set_debug_probe_select(const Vector3 &p_position, const Vector3 &p_dir) = 0; +  	virtual RID make_sphere_mesh(int p_lats, int p_lons, float p_radius);  	virtual void mesh_add_surface_from_mesh_data(RID p_mesh, const Geometry3D::MeshData &p_mesh_data); @@ -1248,10 +1313,12 @@ VARIANT_ENUM_CAST(RenderingServer::BlendShapeMode);  VARIANT_ENUM_CAST(RenderingServer::MultimeshTransformFormat);  VARIANT_ENUM_CAST(RenderingServer::LightType);  VARIANT_ENUM_CAST(RenderingServer::LightParam); +VARIANT_ENUM_CAST(RenderingServer::LightBakeMode);  VARIANT_ENUM_CAST(RenderingServer::LightOmniShadowMode);  VARIANT_ENUM_CAST(RenderingServer::LightDirectionalShadowMode);  VARIANT_ENUM_CAST(RenderingServer::LightDirectionalShadowDepthRangeMode);  VARIANT_ENUM_CAST(RenderingServer::ReflectionProbeUpdateMode); +VARIANT_ENUM_CAST(RenderingServer::ReflectionProbeAmbientMode);  VARIANT_ENUM_CAST(RenderingServer::DecalTexture);  VARIANT_ENUM_CAST(RenderingServer::ParticlesDrawOrder);  VARIANT_ENUM_CAST(RenderingServer::ViewportUpdateMode);  |