5 files changed, 590 insertions, 0 deletions
diff --git a/servers/rendering/rasterizer_rd/shaders/SCsub b/servers/rendering/rasterizer_rd/shaders/SCsub
index 6e852e2dc5..ade0418bd2 100644
--- a/servers/rendering/rasterizer_rd/shaders/SCsub
+++ b/servers/rendering/rasterizer_rd/shaders/SCsub
@@ -22,3 +22,7 @@ if "RD_GLSL" in env["BUILDERS"]:
     env.RD_GLSL("ssao_minify.glsl")
     env.RD_GLSL("ssao_blur.glsl")
     env.RD_GLSL("roughness_limiter.glsl")
+    env.RD_GLSL("screen_space_reflection.glsl")
+    env.RD_GLSL("screen_space_reflection_filter.glsl")
+    env.RD_GLSL("screen_space_reflection_scale.glsl")
+    env.RD_GLSL("specular_merge.glsl")
diff --git a/servers/rendering/rasterizer_rd/shaders/screen_space_reflection.glsl b/servers/rendering/rasterizer_rd/shaders/screen_space_reflection.glsl
new file mode 100644
index 0000000000..e3c26c9b72
--- /dev/null
+++ b/servers/rendering/rasterizer_rd/shaders/screen_space_reflection.glsl
@@ -0,0 +1,262 @@
+/* clang-format off */
+[compute]
+
+#version 450
+
+VERSION_DEFINES
+
+
+
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+/* clang-format on */
+
+layout(rgba16f, set = 0, binding = 0) uniform restrict readonly image2D source_diffuse;
+layout(r32f, set = 0, binding = 1) uniform restrict readonly image2D source_depth;
+layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly image2D ssr_image;
+#ifdef MODE_ROUGH
+layout(r8, set = 1, binding = 1) uniform restrict writeonly image2D blur_radius_image;
+#endif
+layout(rgba8, set = 2, binding = 0) uniform restrict readonly image2D source_normal;
+layout(set = 3, binding = 0) uniform sampler2D source_metallic;
+#ifdef MODE_ROUGH
+layout(set = 3, binding = 1) uniform sampler2D source_roughness;
+#endif
+
+layout(push_constant, binding = 2, std430) uniform Params {
+
+	vec4 proj_info;
+
+	ivec2 screen_size;
+	float camera_z_near;
+	float camera_z_far;
+
+	int num_steps;
+	float depth_tolerance;
+	float distance_fade;
+	float curve_fade_in;
+
+	bool orthogonal;
+	float filter_mipmap_levels;
+	bool use_half_res;
+	uint metallic_mask;
+
+	mat4 projection;
+}
+params;
+
+vec2 view_to_screen(vec3 view_pos, out float w) {
+	vec4 projected = params.projection * vec4(view_pos, 1.0);
+	projected.xyz /= projected.w;
+	projected.xy = projected.xy * 0.5 + 0.5;
+	w = projected.w;
+	return projected.xy;
+}
+
+#define M_PI 3.14159265359
+
+vec3 reconstructCSPosition(vec2 S, float z) {
+	if (params.orthogonal) {
+		return vec3((S.xy * params.proj_info.xy + params.proj_info.zw), z);
+	} else {
+		return vec3((S.xy * params.proj_info.xy + params.proj_info.zw) * z, z);
+	}
+}
+
+void main() {
+
+	// Pixel being shaded
+	ivec2 ssC = ivec2(gl_GlobalInvocationID.xy);
+
+	if (any(greaterThan(ssC, params.screen_size))) { //too large, do nothing
+		return;
+	}
+
+	vec2 pixel_size = 1.0 / vec2(params.screen_size);
+	vec2 uv = vec2(ssC) * pixel_size;
+
+	uv += pixel_size * 0.5;
+
+	float base_depth = imageLoad(source_depth, ssC).r;
+
+	// World space point being shaded
+	vec3 vertex = reconstructCSPosition(uv * vec2(params.screen_size), base_depth);
+
+	vec3 normal = imageLoad(source_normal, ssC).xyz * 2.0 - 1.0;
+	normal = normalize(normal);
+	normal.y = -normal.y; //because this code reads flipped
+
+	vec3 view_dir = normalize(vertex);
+	vec3 ray_dir = normalize(reflect(view_dir, normal));
+
+	if (dot(ray_dir, normal) < 0.001) {
+		imageStore(ssr_image, ssC, vec4(0.0));
+		return;
+	}
+	//ray_dir = normalize(view_dir - normal * dot(normal,view_dir) * 2.0);
+	//ray_dir = normalize(vec3(1.0, 1.0, -1.0));
+
+	////////////////
+
+	// make ray length and clip it against the near plane (don't want to trace beyond visible)
+	float ray_len = (vertex.z + ray_dir.z * params.camera_z_far) > -params.camera_z_near ? (-params.camera_z_near - vertex.z) / ray_dir.z : params.camera_z_far;
+	vec3 ray_end = vertex + ray_dir * ray_len;
+
+	float w_begin;
+	vec2 vp_line_begin = view_to_screen(vertex, w_begin);
+	float w_end;
+	vec2 vp_line_end = view_to_screen(ray_end, w_end);
+	vec2 vp_line_dir = vp_line_end - vp_line_begin;
+
+	// we need to interpolate w along the ray, to generate perspective correct reflections
+	w_begin = 1.0 / w_begin;
+	w_end = 1.0 / w_end;
+
+	float z_begin = vertex.z * w_begin;
+	float z_end = ray_end.z * w_end;
+
+	vec2 line_begin = vp_line_begin / pixel_size;
+	vec2 line_dir = vp_line_dir / pixel_size;
+	float z_dir = z_end - z_begin;
+	float w_dir = w_end - w_begin;
+
+	// clip the line to the viewport edges
+
+	float scale_max_x = min(1.0, 0.99 * (1.0 - vp_line_begin.x) / max(1e-5, vp_line_dir.x));
+	float scale_max_y = min(1.0, 0.99 * (1.0 - vp_line_begin.y) / max(1e-5, vp_line_dir.y));
+	float scale_min_x = min(1.0, 0.99 * vp_line_begin.x / max(1e-5, -vp_line_dir.x));
+	float scale_min_y = min(1.0, 0.99 * vp_line_begin.y / max(1e-5, -vp_line_dir.y));
+	float line_clip = min(scale_max_x, scale_max_y) * min(scale_min_x, scale_min_y);
+	line_dir *= line_clip;
+	z_dir *= line_clip;
+	w_dir *= line_clip;
+
+	// clip z and w advance to line advance
+	vec2 line_advance = normalize(line_dir); // down to pixel
+	float step_size = length(line_advance) / length(line_dir);
+	float z_advance = z_dir * step_size; // adapt z advance to line advance
+	float w_advance = w_dir * step_size; // adapt w advance to line advance
+
+	// make line advance faster if direction is closer to pixel edges (this avoids sampling the same pixel twice)
+	float advance_angle_adj = 1.0 / max(abs(line_advance.x), abs(line_advance.y));
+	line_advance *= advance_angle_adj; // adapt z advance to line advance
+	z_advance *= advance_angle_adj;
+	w_advance *= advance_angle_adj;
+
+	vec2 pos = line_begin;
+	float z = z_begin;
+	float w = w_begin;
+	float z_from = z / w;
+	float z_to = z_from;
+	float depth;
+	vec2 prev_pos = pos;
+
+	bool found = false;
+
+	float steps_taken = 0.0;
+
+	for (int i = 0; i < params.num_steps; i++) {
+
+		pos += line_advance;
+		z += z_advance;
+		w += w_advance;
+
+		// convert to linear depth
+
+		depth = imageLoad(source_depth, ivec2(pos - 0.5)).r;
+
+		if (-depth >= params.camera_z_far) { //went beyond camera
+			break;
+		}
+
+		z_from = z_to;
+		z_to = z / w;
+
+		if (depth > z_to) {
+			// if depth was surpassed
+			if (depth <= max(z_to, z_from) + params.depth_tolerance) {
+				// check the depth tolerance
+				//check that normal is valid
+				found = true;
+			}
+			break;
+		}
+
+		steps_taken += 1.0;
+		prev_pos = pos;
+	}
+
+	if (found) {
+
+		float margin_blend = 1.0;
+
+		vec2 margin = vec2((params.screen_size.x + params.screen_size.y) * 0.5 * 0.05); // make a uniform margin
+		if (any(bvec4(lessThan(pos, -margin), greaterThan(pos, params.screen_size + margin)))) {
+			// clip outside screen + margin
+			imageStore(ssr_image, ssC, vec4(0.0));
+			return;
+		}
+
+		{
+			//blend fading out towards external margin
+			vec2 margin_grad = mix(pos - params.screen_size, -pos, lessThan(pos, vec2(0.0)));
+			margin_blend = 1.0 - smoothstep(0.0, margin.x, max(margin_grad.x, margin_grad.y));
+			//margin_blend = 1.0;
+		}
+
+		vec2 final_pos;
+		float grad;
+		grad = steps_taken / float(params.num_steps);
+		float initial_fade = params.curve_fade_in == 0.0 ? 1.0 : pow(clamp(grad, 0.0, 1.0), params.curve_fade_in);
+		float fade = pow(clamp(1.0 - grad, 0.0, 1.0), params.distance_fade) * initial_fade;
+		final_pos = pos;
+
+		vec4 final_color;
+
+#ifdef MODE_ROUGH
+
+		// if roughness is enabled, do screen space cone tracing
+		float blur_radius = 0.0;
+		float roughness = texelFetch(source_roughness, ssC << 1, 0).r;
+
+		if (roughness > 0.001) {
+
+			float cone_angle = min(roughness, 0.999) * M_PI * 0.5;
+			float cone_len = length(final_pos - line_begin);
+			float op_len = 2.0 * tan(cone_angle) * cone_len; // opposite side of iso triangle
+			{
+				// fit to sphere inside cone (sphere ends at end of cone), something like this:
+				// ___
+				// \O/
+				//  V
+				//
+				// as it avoids bleeding from beyond the reflection as much as possible. As a plus
+				// it also makes the rough reflection more elongated.
+				float a = op_len;
+				float h = cone_len;
+				float a2 = a * a;
+				float fh2 = 4.0f * h * h;
+				blur_radius = (a * (sqrt(a2 + fh2) - a)) / (4.0f * h);
+			}
+		}
+
+		final_color = imageLoad(source_diffuse, ivec2((final_pos - 0.5) * pixel_size));
+
+		imageStore(blur_radius_image, ssC, vec4(blur_radius / 255.0)); //stored in r8
+
+#endif
+
+		final_color = vec4(imageLoad(source_diffuse, ivec2(final_pos - 0.5)).rgb, fade * margin_blend);
+		//change blend by metallic
+		vec4 metallic_mask = unpackUnorm4x8(params.metallic_mask);
+		final_color.a *= dot(metallic_mask, texelFetch(source_metallic, ssC << 1, 0));
+
+		imageStore(ssr_image, ssC, final_color);
+
+	} else {
+#ifdef MODE_ROUGH
+		imageStore(blur_radius_image, ssC, vec4(0.0));
+#endif
+		imageStore(ssr_image, ssC, vec4(0.0));
+	}
+}
diff --git a/servers/rendering/rasterizer_rd/shaders/screen_space_reflection_filter.glsl b/servers/rendering/rasterizer_rd/shaders/screen_space_reflection_filter.glsl
new file mode 100644
index 0000000000..671e289ed0
--- /dev/null
+++ b/servers/rendering/rasterizer_rd/shaders/screen_space_reflection_filter.glsl
@@ -0,0 +1,169 @@
+/* clang-format off */
+[compute]
+
+#version 450
+
+VERSION_DEFINES
+
+
+
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+/* clang-format on */
+
+layout(rgba16f, set = 0, binding = 0) uniform restrict readonly image2D source_ssr;
+layout(r8, set = 0, binding = 1) uniform restrict readonly image2D source_radius;
+layout(rgba8, set = 1, binding = 0) uniform restrict readonly image2D source_normal;
+
+layout(rgba16f, set = 2, binding = 0) uniform restrict writeonly image2D dest_ssr;
+#ifndef VERTICAL_PASS
+layout(r8, set = 2, binding = 1) uniform restrict writeonly image2D dest_radius;
+#endif
+layout(r32f, set = 3, binding = 0) uniform restrict readonly image2D source_depth;
+
+layout(push_constant, binding = 2, std430) uniform Params {
+
+	vec4 proj_info;
+
+	bool orthogonal;
+	float edge_tolerance;
+	int increment;
+	uint pad;
+
+	ivec2 screen_size;
+	bool vertical;
+	uint steps;
+}
+params;
+
+#define GAUSS_TABLE_SIZE 15
+
+const float gauss_table[GAUSS_TABLE_SIZE + 1] = float[](
+		0.1847392078702266,
+		0.16595854345772326,
+		0.12031364177766891,
+		0.07038755277896766,
+		0.03322925565155569,
+		0.012657819729901945,
+		0.0038903040680094217,
+		0.0009646503390864025,
+		0.00019297087402915717,
+		0.000031139936308099136,
+		0.000004053309048174758,
+		4.255228059965837e-7,
+		3.602517634249573e-8,
+		2.4592560765896795e-9,
+		1.3534945386863618e-10,
+		0.0 //one more for interpolation
+);
+
+float gauss_weight(float p_val) {
+
+	float idxf;
+	float c = modf(max(0.0, p_val * float(GAUSS_TABLE_SIZE)), idxf);
+	int idx = int(idxf);
+	if (idx >= GAUSS_TABLE_SIZE + 1) {
+		return 0.0;
+	}
+
+	return mix(gauss_table[idx], gauss_table[idx + 1], c);
+}
+
+#define GAUSS_WEIGHT(m_val) gauss_table[clamp(int(m_val * float(GAUSS_TABLE_SIZE - 1)), 0, GAUSS_TABLE_SIZE - 1)]
+
+#define M_PI 3.14159265359
+
+vec3 reconstructCSPosition(vec2 S, float z) {
+	if (params.orthogonal) {
+		return vec3((S.xy * params.proj_info.xy + params.proj_info.zw), z);
+	} else {
+		return vec3((S.xy * params.proj_info.xy + params.proj_info.zw) * z, z);
+	}
+}
+
+void do_filter(inout vec4 accum, inout float accum_radius, inout float divisor, ivec2 texcoord, ivec2 increment, vec3 p_pos, vec3 normal, float p_limit_radius) {
+
+	for (int i = 1; i < params.steps; i++) {
+		float d = float(i * params.increment);
+		ivec2 tc = texcoord + increment * i;
+		float depth = imageLoad(source_depth, tc).r;
+		vec3 view_pos = reconstructCSPosition(vec2(tc) + 0.5, depth);
+		vec3 view_normal = normalize(imageLoad(source_normal, tc).rgb * 2.0 - 1.0);
+		view_normal.y = -view_normal.y;
+
+		float r = imageLoad(source_radius, tc).r;
+		float radius = round(r * 255.0);
+
+		float angle_n = 1.0 - abs(dot(normal, view_normal));
+		if (angle_n > params.edge_tolerance) {
+			break;
+		}
+
+		float angle = abs(dot(normal, normalize(view_pos - p_pos)));
+
+		if (angle > params.edge_tolerance) {
+			break;
+		}
+
+		float contrib = 0.0;
+		if (d < radius) {
+			contrib += gauss_weight(d / radius);
+		}
+
+		if (contrib > 0.0) {
+			accum += imageLoad(source_ssr, tc) * contrib;
+#ifndef VERTICAL_PASS
+			accum_radius += r * contrib;
+#endif
+			divisor += contrib;
+		}
+	}
+}
+
+void main() {
+
+	// Pixel being shaded
+	ivec2 ssC = ivec2(gl_GlobalInvocationID.xy);
+
+	if (any(greaterThan(ssC, params.screen_size))) { //too large, do nothing
+		return;
+	}
+
+	float base_contrib = gauss_table[0];
+
+	vec4 accum = imageLoad(source_ssr, ssC);
+
+	float accum_radius = imageLoad(source_radius, ssC).r;
+	float radius = accum_radius * 255.0;
+
+	float divisor = gauss_table[0];
+	accum *= divisor;
+	accum_radius *= divisor;
+#ifdef VERTICAL_PASS
+	ivec2 direction = ivec2(0, params.increment);
+#else
+	ivec2 direction = ivec2(params.increment, 0);
+#endif
+	float depth = imageLoad(source_depth, ssC).r;
+	vec3 pos = reconstructCSPosition(vec2(ssC) + 0.5, depth);
+	vec3 normal = imageLoad(source_normal, ssC).xyz * 2.0 - 1.0;
+	normal = normalize(normal);
+	normal.y = -normal.y;
+
+	do_filter(accum, accum_radius, divisor, ssC, direction, pos, normal, radius);
+	do_filter(accum, accum_radius, divisor, ssC, -direction, pos, normal, radius);
+
+	if (divisor > 0.0) {
+		accum /= divisor;
+		accum_radius /= divisor;
+	} else {
+		accum = vec4(0.0);
+		accum_radius = 0.0;
+	}
+
+	imageStore(dest_ssr, ssC, accum);
+
+#ifndef VERTICAL_PASS
+	imageStore(dest_radius, ssC, vec4(accum_radius));
+#endif
+}
diff --git a/servers/rendering/rasterizer_rd/shaders/screen_space_reflection_scale.glsl b/servers/rendering/rasterizer_rd/shaders/screen_space_reflection_scale.glsl
new file mode 100644
index 0000000000..cec6c14c76
--- /dev/null
+++ b/servers/rendering/rasterizer_rd/shaders/screen_space_reflection_scale.glsl
@@ -0,0 +1,96 @@
+/* clang-format off */
+[compute]
+
+#version 450
+
+VERSION_DEFINES
+
+
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+/* clang-format on */
+
+layout(set = 0, binding = 0) uniform sampler2D source_ssr;
+layout(set = 1, binding = 0) uniform sampler2D source_depth;
+layout(set = 1, binding = 1) uniform sampler2D source_normal;
+layout(rgba16f, set = 2, binding = 0) uniform restrict writeonly image2D dest_ssr;
+layout(r32f, set = 3, binding = 0) uniform restrict writeonly image2D dest_depth;
+layout(rgba8, set = 3, binding = 1) uniform restrict writeonly image2D dest_normal;
+
+layout(push_constant, binding = 1, std430) uniform Params {
+
+	ivec2 screen_size;
+	float camera_z_near;
+	float camera_z_far;
+
+	bool orthogonal;
+	bool filtered;
+	uint pad[2];
+}
+params;
+
+void main() {
+
+	// Pixel being shaded
+	ivec2 ssC = ivec2(gl_GlobalInvocationID.xy);
+
+	if (any(greaterThan(ssC, params.screen_size))) { //too large, do nothing
+		return;
+	}
+	//do not filter, SSR will generate arctifacts if this is done
+
+	float divisor = 0.0;
+	vec4 color;
+	float depth;
+	vec3 normal;
+
+	if (params.filtered) {
+
+		color = vec4(0.0);
+		depth = 0.0;
+		normal = vec3(0.0);
+
+		for (int i = 0; i < 4; i++) {
+
+			ivec2 ofs = ssC << 1;
+			if (bool(i & 1)) {
+				ofs.x += 1;
+			}
+			if (bool(i & 2)) {
+				ofs.y += 1;
+			}
+			color += texelFetch(source_ssr, ofs, 0);
+			float d = texelFetch(source_depth, ofs, 0).r;
+			normal += texelFetch(source_normal, ofs, 0).xyz * 2.0 - 1.0;
+
+			d = d * 2.0 - 1.0;
+			if (params.orthogonal) {
+				d = ((d + (params.camera_z_far + params.camera_z_near) / (params.camera_z_far - params.camera_z_near)) * (params.camera_z_far - params.camera_z_near)) / 2.0;
+			} else {
+				d = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - d * (params.camera_z_far - params.camera_z_near));
+			}
+			depth += -d;
+		}
+
+		color /= 4.0;
+		depth /= 4.0;
+		normal = normalize(normal / 4.0) * 0.5 + 0.5;
+
+	} else {
+		color = texelFetch(source_ssr, ssC << 1, 0);
+		depth = texelFetch(source_depth, ssC << 1, 0).r;
+		normal = texelFetch(source_normal, ssC << 1, 0).xyz;
+
+		depth = depth * 2.0 - 1.0;
+		if (params.orthogonal) {
+			depth = ((depth + (params.camera_z_far + params.camera_z_near) / (params.camera_z_far - params.camera_z_near)) * (params.camera_z_far - params.camera_z_near)) / 2.0;
+		} else {
+			depth = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - depth * (params.camera_z_far - params.camera_z_near));
+		}
+		depth = -depth;
+	}
+
+	imageStore(dest_ssr, ssC, color);
+	imageStore(dest_depth, ssC, vec4(depth));
+	imageStore(dest_normal, ssC, vec4(normal, 0.0));
+}
diff --git a/servers/rendering/rasterizer_rd/shaders/specular_merge.glsl b/servers/rendering/rasterizer_rd/shaders/specular_merge.glsl
new file mode 100644
index 0000000000..b28250318e
--- /dev/null
+++ b/servers/rendering/rasterizer_rd/shaders/specular_merge.glsl
@@ -0,0 +1,59 @@
+/* clang-format off */
+[vertex]
+
+#version 450
+
+VERSION_DEFINES
+
+layout(location = 0) out vec2 uv_interp;
+/* clang-format on */
+
+void main() {
+
+	vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0));
+	uv_interp = base_arr[gl_VertexIndex];
+
+	gl_Position = vec4(uv_interp * 2.0 - 1.0, 0.0, 1.0);
+}
+
+/* clang-format off */
+[fragment]
+
+#version 450
+
+VERSION_DEFINES
+
+layout(location = 0) in vec2 uv_interp;
+/* clang-format on */
+
+layout(set = 0, binding = 0) uniform sampler2D specular;
+
+#ifdef MODE_SSR
+
+layout(set = 1, binding = 0) uniform sampler2D ssr;
+
+#endif
+
+#ifdef MODE_MERGE
+
+layout(set = 2, binding = 0) uniform sampler2D diffuse;
+
+#endif
+
+layout(location = 0) out vec4 frag_color;
+
+void main() {
+
+	frag_color.rgb = texture(specular, uv_interp).rgb;
+	frag_color.a = 0.0;
+#ifdef MODE_SSR
+
+	vec4 ssr = texture(ssr, uv_interp);
+	frag_color.rgb = mix(frag_color.rgb, ssr.rgb, ssr.a);
+#endif
+
+#ifdef MODE_MERGE
+	frag_color += texture(diffuse, uv_interp);
+#endif
+	//added using additive blend
+}