summaryrefslogtreecommitdiff
path: root/servers/rendering/renderer_rd/shaders
diff options
context:
space:
mode:
Diffstat (limited to 'servers/rendering/renderer_rd/shaders')
-rw-r--r--servers/rendering/renderer_rd/shaders/SCsub8
-rw-r--r--servers/rendering/renderer_rd/shaders/canvas.glsl18
-rw-r--r--servers/rendering/renderer_rd/shaders/cluster_data_inc.glsl20
-rw-r--r--servers/rendering/renderer_rd/shaders/cluster_debug.glsl115
-rw-r--r--servers/rendering/renderer_rd/shaders/cluster_render.glsl168
-rw-r--r--servers/rendering/renderer_rd/shaders/cluster_store.glsl119
-rw-r--r--servers/rendering/renderer_rd/shaders/cube_to_dp.glsl45
-rw-r--r--servers/rendering/renderer_rd/shaders/gi.glsl75
-rw-r--r--servers/rendering/renderer_rd/shaders/giprobe.glsl11
-rw-r--r--servers/rendering/renderer_rd/shaders/particles.glsl2
-rw-r--r--servers/rendering/renderer_rd/shaders/resolve.glsl112
-rw-r--r--servers/rendering/renderer_rd/shaders/scene_forward.glsl1713
-rw-r--r--servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl114
-rw-r--r--servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl107
-rw-r--r--servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl160
-rw-r--r--servers/rendering/renderer_rd/shaders/sdfgi_fields.glsl2
-rw-r--r--servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl184
-rw-r--r--servers/rendering/renderer_rd/shaders/skeleton.glsl199
-rw-r--r--servers/rendering/renderer_rd/shaders/ssao.glsl601
-rw-r--r--servers/rendering/renderer_rd/shaders/ssao_blur.glsl245
-rw-r--r--servers/rendering/renderer_rd/shaders/ssao_downsample.glsl206
-rw-r--r--servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl126
-rw-r--r--servers/rendering/renderer_rd/shaders/ssao_interleave.glsl119
-rw-r--r--servers/rendering/renderer_rd/shaders/ssao_minify.glsl45
-rw-r--r--servers/rendering/renderer_rd/shaders/volumetric_fog.glsl260
25 files changed, 3283 insertions, 1491 deletions
diff --git a/servers/rendering/renderer_rd/shaders/SCsub b/servers/rendering/renderer_rd/shaders/SCsub
index 1fe43b25f6..1b0197c1c1 100644
--- a/servers/rendering/renderer_rd/shaders/SCsub
+++ b/servers/rendering/renderer_rd/shaders/SCsub
@@ -21,8 +21,10 @@ if "RD_GLSL" in env["BUILDERS"]:
env.RD_GLSL("luminance_reduce.glsl")
env.RD_GLSL("bokeh_dof.glsl")
env.RD_GLSL("ssao.glsl")
- env.RD_GLSL("ssao_minify.glsl")
+ env.RD_GLSL("ssao_downsample.glsl")
+ env.RD_GLSL("ssao_importance_map.glsl")
env.RD_GLSL("ssao_blur.glsl")
+ env.RD_GLSL("ssao_interleave.glsl")
env.RD_GLSL("roughness_limiter.glsl")
env.RD_GLSL("screen_space_reflection.glsl")
env.RD_GLSL("screen_space_reflection_filter.glsl")
@@ -41,3 +43,7 @@ if "RD_GLSL" in env["BUILDERS"]:
env.RD_GLSL("particles.glsl")
env.RD_GLSL("particles_copy.glsl")
env.RD_GLSL("sort.glsl")
+ env.RD_GLSL("skeleton.glsl")
+ env.RD_GLSL("cluster_render.glsl")
+ env.RD_GLSL("cluster_store.glsl")
+ env.RD_GLSL("cluster_debug.glsl")
diff --git a/servers/rendering/renderer_rd/shaders/canvas.glsl b/servers/rendering/renderer_rd/shaders/canvas.glsl
index 7808e7ed52..3b39edc70e 100644
--- a/servers/rendering/renderer_rd/shaders/canvas.glsl
+++ b/servers/rendering/renderer_rd/shaders/canvas.glsl
@@ -396,7 +396,7 @@ vec4 light_shadow_compute(uint light_base, vec4 light_color, vec4 shadow_uv
vec4 shadow_color = unpackUnorm4x8(light_array.data[light_base].shadow_color);
#ifdef LIGHT_SHADER_CODE_USED
- shadow_color *= shadow_modulate;
+ shadow_color.rgb *= shadow_modulate;
#endif
shadow_color.a *= light_color.a; //respect light alpha
@@ -497,9 +497,9 @@ void main() {
vec2 shadow_vertex = vertex;
{
- float normal_depth = 1.0;
+ float normal_map_depth = 1.0;
-#if defined(NORMALMAP_USED)
+#if defined(NORMAL_MAP_USED)
vec3 normal_map = vec3(0.0, 0.0, 1.0);
normal_used = true;
#endif
@@ -510,8 +510,8 @@ FRAGMENT_SHADER_CODE
/* clang-format on */
-#if defined(NORMALMAP_USED)
- normal = mix(vec3(0.0, 0.0, 1.0), normal_map * vec3(2.0, -2.0, 1.0) - vec3(1.0, -1.0, 0.0), normal_depth);
+#if defined(NORMAL_MAP_USED)
+ normal = mix(vec3(0.0, 0.0, 1.0), normal_map * vec3(2.0, -2.0, 1.0) - vec3(1.0, -1.0, 0.0), normal_map_depth);
#endif
}
@@ -546,7 +546,7 @@ FRAGMENT_SHADER_CODE
#ifdef LIGHT_SHADER_CODE_USED
vec4 shadow_modulate = vec4(1.0);
- light_color = light_compute(light_vertex, direction, normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, color, uv, true);
+ light_color = light_compute(light_vertex, vec3(direction, light_array.data[light_base].height), normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, uv, color, true);
#else
if (normal_used) {
@@ -563,7 +563,7 @@ FRAGMENT_SHADER_CODE
light_color = light_shadow_compute(light_base, light_color, shadow_uv
#ifdef LIGHT_SHADER_CODE_USED
,
- shadow_modulate
+ shadow_modulate.rgb
#endif
);
}
@@ -605,7 +605,7 @@ FRAGMENT_SHADER_CODE
vec3 light_position = vec3(light_array.data[light_base].position, light_array.data[light_base].height);
light_color.rgb *= light_base_color.rgb;
- light_color = light_compute(light_vertex, light_position, normal, light_color, light_base_color.a, specular_shininess, shadow_modulate, screen_uv, color, uv, false);
+ light_color = light_compute(light_vertex, light_position, normal, light_color, light_base_color.a, specular_shininess, shadow_modulate, screen_uv, uv, color, false);
#else
light_color.rgb *= light_base_color.rgb * light_base_color.a;
@@ -659,7 +659,7 @@ FRAGMENT_SHADER_CODE
light_color = light_shadow_compute(light_base, light_color, shadow_uv
#ifdef LIGHT_SHADER_CODE_USED
,
- shadow_modulate
+ shadow_modulate.rgb
#endif
);
}
diff --git a/servers/rendering/renderer_rd/shaders/cluster_data_inc.glsl b/servers/rendering/renderer_rd/shaders/cluster_data_inc.glsl
index e723468dd8..3a4bf4da07 100644
--- a/servers/rendering/renderer_rd/shaders/cluster_data_inc.glsl
+++ b/servers/rendering/renderer_rd/shaders/cluster_data_inc.glsl
@@ -6,12 +6,18 @@
struct LightData { //this structure needs to be as packed as possible
vec3 position;
float inv_radius;
+
vec3 direction;
float size;
- uint attenuation_energy; //attenuation
- uint color_specular; //rgb color, a specular (8 bit unorm)
- uint cone_attenuation_angle; // attenuation and angle, (16bit float)
- uint shadow_color_enabled; //shadow rgb color, a>0.5 enabled (8bit unorm)
+
+ vec3 color;
+ float attenuation;
+
+ float cone_attenuation;
+ float cone_angle;
+ float specular_amount;
+ bool shadow_enabled;
+
vec4 atlas_rect; // rect in the shadow atlas
mat4 shadow_matrix;
float shadow_bias;
@@ -34,9 +40,13 @@ struct ReflectionData {
float index;
vec3 box_offset;
uint mask;
- vec4 params; // intensity, 0, interior , boxproject
vec3 ambient; // ambient color
+ float intensity;
+ bool exterior;
+ bool box_project;
uint ambient_mode;
+ uint pad;
+ //0-8 is intensity,8-9 is ambient, mode
mat4 local_matrix; // up to here for spot and omni, rest is for directional
// notes: for ambientblend, use distance to edge to blend between already existing global environment
};
diff --git a/servers/rendering/renderer_rd/shaders/cluster_debug.glsl b/servers/rendering/renderer_rd/shaders/cluster_debug.glsl
new file mode 100644
index 0000000000..70a875192c
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/cluster_debug.glsl
@@ -0,0 +1,115 @@
+#[compute]
+
+#version 450
+
+VERSION_DEFINES
+
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+const vec3 usage_gradient[33] = vec3[]( // 1 (none) + 32
+ vec3(0.14, 0.17, 0.23),
+ vec3(0.24, 0.44, 0.83),
+ vec3(0.23, 0.57, 0.84),
+ vec3(0.22, 0.71, 0.84),
+ vec3(0.22, 0.85, 0.83),
+ vec3(0.21, 0.85, 0.72),
+ vec3(0.21, 0.85, 0.57),
+ vec3(0.20, 0.85, 0.42),
+ vec3(0.20, 0.85, 0.27),
+ vec3(0.27, 0.86, 0.19),
+ vec3(0.51, 0.85, 0.19),
+ vec3(0.57, 0.86, 0.19),
+ vec3(0.62, 0.85, 0.19),
+ vec3(0.67, 0.86, 0.20),
+ vec3(0.73, 0.85, 0.20),
+ vec3(0.78, 0.85, 0.20),
+ vec3(0.83, 0.85, 0.20),
+ vec3(0.85, 0.82, 0.20),
+ vec3(0.85, 0.76, 0.20),
+ vec3(0.85, 0.81, 0.20),
+ vec3(0.85, 0.65, 0.20),
+ vec3(0.84, 0.60, 0.21),
+ vec3(0.84, 0.56, 0.21),
+ vec3(0.84, 0.51, 0.21),
+ vec3(0.84, 0.46, 0.21),
+ vec3(0.84, 0.41, 0.21),
+ vec3(0.84, 0.36, 0.21),
+ vec3(0.84, 0.31, 0.21),
+ vec3(0.84, 0.27, 0.21),
+ vec3(0.83, 0.22, 0.22),
+ vec3(0.83, 0.22, 0.27),
+ vec3(0.83, 0.22, 0.32),
+ vec3(1.00, 0.63, 0.70));
+layout(push_constant, binding = 0, std430) uniform Params {
+ uvec2 screen_size;
+ uvec2 cluster_screen_size;
+
+ uint cluster_shift;
+ uint cluster_type;
+ float z_near;
+ float z_far;
+
+ bool orthogonal;
+ uint max_cluster_element_count_div_32;
+ uint pad1;
+ uint pad2;
+}
+params;
+
+layout(set = 0, binding = 1, std430) buffer restrict readonly ClusterData {
+ uint data[];
+}
+cluster_data;
+
+layout(rgba16f, set = 0, binding = 2) uniform restrict writeonly image2D screen_buffer;
+layout(set = 0, binding = 3) uniform texture2D depth_buffer;
+layout(set = 0, binding = 4) uniform sampler depth_buffer_sampler;
+
+void main() {
+ uvec2 screen_pos = gl_GlobalInvocationID.xy;
+ if (any(greaterThanEqual(screen_pos, params.screen_size))) {
+ return;
+ }
+
+ uvec2 cluster_pos = screen_pos >> params.cluster_shift;
+
+ uint offset = cluster_pos.y * params.cluster_screen_size.x + cluster_pos.x;
+ offset += params.cluster_screen_size.x * params.cluster_screen_size.y * params.cluster_type;
+ offset *= (params.max_cluster_element_count_div_32 + 32);
+
+ //depth buffers generally can't be accessed via image API
+ float depth = texelFetch(sampler2D(depth_buffer, depth_buffer_sampler), ivec2(screen_pos), 0).r * 2.0 - 1.0;
+
+ if (params.orthogonal) {
+ depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0;
+ } else {
+ depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near));
+ }
+ depth /= params.z_far;
+
+ uint slice = uint(clamp(floor(depth * 32.0), 0.0, 31.0));
+ uint slice_minmax = cluster_data.data[offset + params.max_cluster_element_count_div_32 + slice];
+ uint item_min = slice_minmax & 0xFFFF;
+ uint item_max = slice_minmax >> 16;
+
+ uint item_count = 0;
+ for (uint i = 0; i < params.max_cluster_element_count_div_32; i++) {
+ uint slice_bits = cluster_data.data[offset + i];
+ while (slice_bits != 0) {
+ uint bit = findLSB(slice_bits);
+ uint item = i * 32 + bit;
+ if ((item >= item_min && item < item_max)) {
+ item_count++;
+ }
+ slice_bits &= ~(1 << bit);
+ }
+ }
+
+ item_count = min(item_count, 32);
+
+ vec3 color = usage_gradient[item_count];
+
+ color = mix(color * 1.2, color * 0.3, float(slice) / 31.0);
+
+ imageStore(screen_buffer, ivec2(screen_pos), vec4(color, 1.0));
+}
diff --git a/servers/rendering/renderer_rd/shaders/cluster_render.glsl b/servers/rendering/renderer_rd/shaders/cluster_render.glsl
new file mode 100644
index 0000000000..8723ea78e4
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/cluster_render.glsl
@@ -0,0 +1,168 @@
+#[vertex]
+
+#version 450
+
+VERSION_DEFINES
+
+layout(location = 0) in vec3 vertex_attrib;
+
+layout(location = 0) out float depth_interp;
+layout(location = 1) out flat uint element_index;
+
+layout(push_constant, binding = 0, std430) uniform Params {
+ uint base_index;
+ uint pad0;
+ uint pad1;
+ uint pad2;
+}
+params;
+
+layout(set = 0, binding = 1, std140) uniform State {
+ mat4 projection;
+
+ float inv_z_far;
+ uint screen_to_clusters_shift; // shift to obtain coordinates in block indices
+ uint cluster_screen_width; //
+ uint cluster_data_size; // how much data for a single cluster takes
+
+ uint cluster_depth_offset;
+ uint pad0;
+ uint pad1;
+ uint pad2;
+}
+state;
+
+struct RenderElement {
+ uint type; //0-4
+ bool touches_near;
+ bool touches_far;
+ uint original_index;
+ mat3x4 transform_inv;
+ vec3 scale;
+ uint pad;
+};
+
+layout(set = 0, binding = 2, std430) buffer restrict readonly RenderElements {
+ RenderElement data[];
+}
+render_elements;
+
+void main() {
+ element_index = params.base_index + gl_InstanceIndex;
+
+ vec3 vertex = vertex_attrib;
+ vertex *= render_elements.data[element_index].scale;
+
+ vertex = vec4(vertex, 1.0) * render_elements.data[element_index].transform_inv;
+ depth_interp = -vertex.z;
+
+ gl_Position = state.projection * vec4(vertex, 1.0);
+}
+
+#[fragment]
+
+#version 450
+
+VERSION_DEFINES
+
+#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic) && defined(GL_KHR_shader_subgroup_vote)
+
+#extension GL_KHR_shader_subgroup_ballot : enable
+#extension GL_KHR_shader_subgroup_arithmetic : enable
+#extension GL_KHR_shader_subgroup_vote : enable
+
+#define USE_SUBGROUPS
+#endif
+
+layout(location = 0) in float depth_interp;
+layout(location = 1) in flat uint element_index;
+
+layout(set = 0, binding = 1, std140) uniform State {
+ mat4 projection;
+ float inv_z_far;
+ uint screen_to_clusters_shift; // shift to obtain coordinates in block indices
+ uint cluster_screen_width; //
+ uint cluster_data_size; // how much data for a single cluster takes
+ uint cluster_depth_offset;
+ uint pad0;
+ uint pad1;
+ uint pad2;
+}
+state;
+
+//cluster data is layout linearly, each cell contains the follow information:
+// - list of bits for every element to mark as used, so (max_elem_count/32)*4 uints
+// - a uint for each element to mark the depth bits used when rendering (0-31)
+
+layout(set = 0, binding = 3, std430) buffer restrict ClusterRender {
+ uint data[];
+}
+cluster_render;
+
+void main() {
+ //convert from screen to cluster
+ uvec2 cluster = uvec2(gl_FragCoord.xy) >> state.screen_to_clusters_shift;
+
+ //get linear cluster offset from screen poss
+ uint cluster_offset = cluster.x + state.cluster_screen_width * cluster.y;
+ //multiply by data size to position at the beginning of the element list for this cluster
+ cluster_offset *= state.cluster_data_size;
+
+ //find the current element in the list and plot the bit to mark it as used
+ uint usage_write_offset = cluster_offset + (element_index >> 5);
+ uint usage_write_bit = 1 << (element_index & 0x1F);
+
+#ifdef USE_SUBGROUPS
+
+ uint cluster_thread_group_index;
+
+ if (!gl_HelperInvocation) {
+ //http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf
+
+ uvec4 mask;
+
+ while (true) {
+ // find the cluster offset of the first active thread
+ // threads that did break; go inactive and no longer count
+ uint first = subgroupBroadcastFirst(cluster_offset);
+ // update the mask for thread that match this cluster
+ mask = subgroupBallot(first == cluster_offset);
+ if (first == cluster_offset) {
+ // This thread belongs to the group of threads that match this offset,
+ // so exit the loop.
+ break;
+ }
+ }
+
+ cluster_thread_group_index = subgroupBallotExclusiveBitCount(mask);
+
+ if (cluster_thread_group_index == 0) {
+ atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
+ }
+ }
+#else
+ if (!gl_HelperInvocation) {
+ atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
+ }
+#endif
+ //find the current element in the depth usage list and mark the current depth as used
+ float unit_depth = depth_interp * state.inv_z_far;
+
+ uint z_bit = clamp(uint(floor(unit_depth * 32.0)), 0, 31);
+
+ uint z_write_offset = cluster_offset + state.cluster_depth_offset + element_index;
+ uint z_write_bit = 1 << z_bit;
+
+#ifdef USE_SUBGROUPS
+ if (!gl_HelperInvocation) {
+ z_write_bit = subgroupOr(z_write_bit); //merge all Zs
+ if (cluster_thread_group_index == 0) {
+ atomicOr(cluster_render.data[z_write_offset], z_write_bit);
+ }
+ }
+#else
+ if (!gl_HelperInvocation) {
+ atomicOr(cluster_render.data[z_write_offset], z_write_bit);
+ }
+#endif
+}
diff --git a/servers/rendering/renderer_rd/shaders/cluster_store.glsl b/servers/rendering/renderer_rd/shaders/cluster_store.glsl
new file mode 100644
index 0000000000..5be0893c4f
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/cluster_store.glsl
@@ -0,0 +1,119 @@
+#[compute]
+
+#version 450
+
+VERSION_DEFINES
+
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+layout(push_constant, binding = 0, std430) uniform Params {
+ uint cluster_render_data_size; // how much data for a single cluster takes
+ uint max_render_element_count_div_32; //divided by 32
+ uvec2 cluster_screen_size;
+ uint render_element_count_div_32; //divided by 32
+
+ uint max_cluster_element_count_div_32; //divided by 32
+ uint pad1;
+ uint pad2;
+}
+params;
+
+layout(set = 0, binding = 1, std430) buffer restrict readonly ClusterRender {
+ uint data[];
+}
+cluster_render;
+
+layout(set = 0, binding = 2, std430) buffer restrict ClusterStore {
+ uint data[];
+}
+cluster_store;
+
+struct RenderElement {
+ uint type; //0-4
+ bool touches_near;
+ bool touches_far;
+ uint original_index;
+ mat3x4 transform_inv;
+ vec3 scale;
+ uint pad;
+};
+
+layout(set = 0, binding = 3, std430) buffer restrict readonly RenderElements {
+ RenderElement data[];
+}
+render_elements;
+
+void main() {
+ uvec2 pos = gl_GlobalInvocationID.xy;
+ if (any(greaterThanEqual(pos, params.cluster_screen_size))) {
+ return;
+ }
+
+ //counter for each type of render_element
+
+ //base offset for this cluster
+ uint base_offset = (pos.x + params.cluster_screen_size.x * pos.y);
+ uint src_offset = base_offset * params.cluster_render_data_size;
+
+ uint render_element_offset = 0;
+
+ //check all render_elements and see which one was written to
+ while (render_element_offset < params.render_element_count_div_32) {
+ uint bits = cluster_render.data[src_offset + render_element_offset];
+ while (bits != 0) {
+ //if bits exist, check the render_element
+ uint index_bit = findLSB(bits);
+ uint index = render_element_offset * 32 + index_bit;
+ uint type = render_elements.data[index].type;
+
+ uint z_range_offset = src_offset + params.max_render_element_count_div_32 + index;
+ uint z_range = cluster_render.data[z_range_offset];
+
+ //if object was written, z was written, but check just in case
+ if (z_range != 0) { //should always be > 0
+
+ uint from_z = findLSB(z_range);
+ uint to_z = findMSB(z_range) + 1;
+
+ if (render_elements.data[index].touches_near) {
+ from_z = 0;
+ }
+
+ if (render_elements.data[index].touches_far) {
+ to_z = 32;
+ }
+
+ // find cluster offset in the buffer used for indexing in the renderer
+ uint dst_offset = (base_offset + type * (params.cluster_screen_size.x * params.cluster_screen_size.y)) * (params.max_cluster_element_count_div_32 + 32);
+
+ uint orig_index = render_elements.data[index].original_index;
+ //store this index in the Z slices by setting the relevant bit
+ for (uint i = from_z; i < to_z; i++) {
+ uint slice_ofs = dst_offset + params.max_cluster_element_count_div_32 + i;
+
+ uint minmax = cluster_store.data[slice_ofs];
+
+ if (minmax == 0) {
+ minmax = 0xFFFF; //min 0, max 0xFFFF
+ }
+
+ uint elem_min = min(orig_index, minmax & 0xFFFF);
+ uint elem_max = max(orig_index + 1, minmax >> 16); //always store plus one, so zero means range is empty when not written to
+
+ minmax = elem_min | (elem_max << 16);
+ cluster_store.data[slice_ofs] = minmax;
+ }
+
+ uint store_word = orig_index >> 5;
+ uint store_bit = orig_index & 0x1F;
+
+ //store the actual render_element index at the end, so the rendering code can reference it
+ cluster_store.data[dst_offset + store_word] |= 1 << store_bit;
+ }
+
+ bits &= ~(1 << index_bit); //clear the bit to continue iterating
+ }
+
+ render_element_offset++;
+ }
+}
diff --git a/servers/rendering/renderer_rd/shaders/cube_to_dp.glsl b/servers/rendering/renderer_rd/shaders/cube_to_dp.glsl
index 54d67db6c6..c3ac0bee57 100644
--- a/servers/rendering/renderer_rd/shaders/cube_to_dp.glsl
+++ b/servers/rendering/renderer_rd/shaders/cube_to_dp.glsl
@@ -1,33 +1,48 @@
-#[compute]
+#[vertex]
#version 450
VERSION_DEFINES
-layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+layout(push_constant, binding = 1, std430) uniform Params {
+ float z_far;
+ float z_near;
+ bool z_flip;
+ uint pad;
+ vec4 screen_rect;
+}
+params;
+
+layout(location = 0) out vec2 uv_interp;
+
+void main() {
+ vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0));
+ uv_interp = base_arr[gl_VertexIndex];
+ vec2 screen_pos = uv_interp * params.screen_rect.zw + params.screen_rect.xy;
+ gl_Position = vec4(screen_pos * 2.0 - 1.0, 0.0, 1.0);
+}
+
+#[fragment]
+
+#version 450
+
+VERSION_DEFINES
+
+layout(location = 0) in vec2 uv_interp;
layout(set = 0, binding = 0) uniform samplerCube source_cube;
layout(push_constant, binding = 1, std430) uniform Params {
- ivec2 screen_size;
- ivec2 offset;
- float bias;
float z_far;
float z_near;
bool z_flip;
+ uint pad;
+ vec4 screen_rect;
}
params;
-layout(r32f, set = 1, binding = 0) uniform restrict writeonly image2D depth_buffer;
-
void main() {
- ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
- if (any(greaterThan(pos, params.screen_size))) { //too large, do nothing
- return;
- }
-
- vec2 pixel_size = 1.0 / vec2(params.screen_size);
- vec2 uv = (vec2(pos) + 0.5) * pixel_size;
+ vec2 uv = uv_interp;
vec3 normal = vec3(uv * 2.0 - 1.0, 0.0);
@@ -65,5 +80,5 @@ void main() {
float linear_depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near));
depth = (linear_depth * depth_fix) / params.z_far;
- imageStore(depth_buffer, pos + params.offset, vec4(depth));
+ gl_FragDepth = depth;
}
diff --git a/servers/rendering/renderer_rd/shaders/gi.glsl b/servers/rendering/renderer_rd/shaders/gi.glsl
index 8011dadc72..35522103df 100644
--- a/servers/rendering/renderer_rd/shaders/gi.glsl
+++ b/servers/rendering/renderer_rd/shaders/gi.glsl
@@ -97,13 +97,12 @@ layout(push_constant, binding = 0, std430) uniform Params {
vec4 proj_info;
+ vec3 ao_color;
uint max_giprobes;
+
bool high_quality_vct;
- bool use_sdfgi;
bool orthogonal;
-
- vec3 ao_color;
- uint pad;
+ uint pad[2];
mat3x4 cam_rotation;
}
@@ -331,7 +330,7 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o
}
ambient_light.rgb = diffuse;
-#if 1
+
if (roughness < 0.2) {
vec3 pos_to_uvw = 1.0 / sdfgi.grid_size;
vec4 light_accum = vec4(0.0);
@@ -363,7 +362,6 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o
//ray_pos += ray_dir * (bias / sdfgi.cascades[cascade].to_cell); //bias to avoid self occlusion
ray_pos += (ray_dir * 1.0 / max(abs_ray_dir.x, max(abs_ray_dir.y, abs_ray_dir.z)) + cam_normal * 1.4) * bias / sdfgi.cascades[cascade].to_cell;
}
-
float softness = 0.2 + min(1.0, roughness * 5.0) * 4.0; //approximation to roughness so it does not seem like a hard fade
while (length(ray_pos) < max_distance) {
for (uint i = 0; i < sdfgi.max_cascades; i++) {
@@ -434,8 +432,6 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o
}
}
-#endif
-
reflection_light.rgb = specular;
ambient_light.rgb *= sdfgi.energy;
@@ -597,35 +593,24 @@ vec4 fetch_normal_and_roughness(ivec2 pos) {
return normal_roughness;
}
-void main() {
- // Pixel being shaded
- ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
- if (any(greaterThanEqual(pos, params.screen_size))) { //too large, do nothing
- return;
- }
-
- vec3 vertex = reconstruct_position(pos);
- vertex.y = -vertex.y;
-
+void process_gi(ivec2 pos, vec3 vertex, inout vec4 ambient_light, inout vec4 reflection_light) {
vec4 normal_roughness = fetch_normal_and_roughness(pos);
- vec3 normal = normal_roughness.xyz;
- vec4 ambient_light = vec4(0.0), reflection_light = vec4(0.0);
+ vec3 normal = normal_roughness.xyz;
if (normal.length() > 0.5) {
//valid normal, can do GI
float roughness = normal_roughness.w;
-
vertex = mat3(params.cam_rotation) * vertex;
normal = normalize(mat3(params.cam_rotation) * normal);
-
vec3 reflection = normalize(reflect(normalize(vertex), normal));
- if (params.use_sdfgi) {
- sdfgi_process(vertex, normal, reflection, roughness, ambient_light, reflection_light);
- }
+#ifdef USE_SDFGI
+ sdfgi_process(vertex, normal, reflection, roughness, ambient_light, reflection_light);
+#endif
- if (params.max_giprobes > 0) {
+#ifdef USE_GIPROBES
+ {
uvec2 giprobe_tex = texelFetch(usampler2D(giprobe_buffer, linear_sampler), pos, 0).rg;
roughness *= roughness;
//find arbitrary tangent and bitangent, then build a matrix
@@ -648,16 +633,40 @@ void main() {
spec_accum /= blend_accum;
}
- if (params.use_sdfgi) {
- reflection_light = blend_color(spec_accum, reflection_light);
- ambient_light = blend_color(amb_accum, ambient_light);
- } else {
- reflection_light = spec_accum;
- ambient_light = amb_accum;
- }
+#ifdef USE_SDFGI
+ reflection_light = blend_color(spec_accum, reflection_light);
+ ambient_light = blend_color(amb_accum, ambient_light);
+#else
+ reflection_light = spec_accum;
+ ambient_light = amb_accum;
+#endif
}
+#endif
+ }
+}
+
+void main() {
+ ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
+
+#ifdef MODE_HALF_RES
+ pos <<= 1;
+#endif
+ if (any(greaterThanEqual(pos, params.screen_size))) { //too large, do nothing
+ return;
}
+ vec4 ambient_light = vec4(0.0);
+ vec4 reflection_light = vec4(0.0);
+
+ vec3 vertex = reconstruct_position(pos);
+ vertex.y = -vertex.y;
+
+ process_gi(pos, vertex, ambient_light, reflection_light);
+
+#ifdef MODE_HALF_RES
+ pos >>= 1;
+#endif
+
imageStore(ambient_buffer, pos, ambient_light);
imageStore(reflection_buffer, pos, reflection_light);
}
diff --git a/servers/rendering/renderer_rd/shaders/giprobe.glsl b/servers/rendering/renderer_rd/shaders/giprobe.glsl
index ea4237a45e..4f4753d147 100644
--- a/servers/rendering/renderer_rd/shaders/giprobe.glsl
+++ b/servers/rendering/renderer_rd/shaders/giprobe.glsl
@@ -208,6 +208,15 @@ float raymarch(float distance, float distance_adv, vec3 from, vec3 direction) {
return occlusion; //max(0.0,distance);
}
+float get_omni_attenuation(float distance, float inv_range, float decay) {
+ float nd = distance * inv_range;
+ nd *= nd;
+ nd *= nd; // nd^4
+ nd = max(1.0 - nd, 0.0);
+ nd *= nd; // nd^2
+ return nd * pow(max(distance, 0.0001), -decay);
+}
+
bool compute_light_vector(uint light, vec3 pos, out float attenuation, out vec3 light_pos) {
if (lights.data[light].type == LIGHT_TYPE_DIRECTIONAL) {
light_pos = pos - lights.data[light].direction * length(vec3(params.limits));
@@ -220,7 +229,7 @@ bool compute_light_vector(uint light, vec3 pos, out float attenuation, out vec3
return false;
}
- attenuation = pow(clamp(1.0 - distance / lights.data[light].radius, 0.0001, 1.0), lights.data[light].attenuation);
+ attenuation = get_omni_attenuation(distance, 1.0 / lights.data[light].radius, lights.data[light].attenuation);
if (lights.data[light].type == LIGHT_TYPE_SPOT) {
vec3 rel = normalize(pos - light_pos);
diff --git a/servers/rendering/renderer_rd/shaders/particles.glsl b/servers/rendering/renderer_rd/shaders/particles.glsl
index 926c7ef9fc..cb6d8dc7f6 100644
--- a/servers/rendering/renderer_rd/shaders/particles.glsl
+++ b/servers/rendering/renderer_rd/shaders/particles.glsl
@@ -173,7 +173,7 @@ uint hash(uint x) {
return x;
}
-bool emit_particle(mat4 p_xform, vec3 p_velocity, vec4 p_color, vec4 p_custom, uint p_flags) {
+bool emit_subparticle(mat4 p_xform, vec3 p_velocity, vec4 p_color, vec4 p_custom, uint p_flags) {
if (!params.can_emit) {
return false;
}
diff --git a/servers/rendering/renderer_rd/shaders/resolve.glsl b/servers/rendering/renderer_rd/shaders/resolve.glsl
index 9429a66dc9..e83c4ca93b 100644
--- a/servers/rendering/renderer_rd/shaders/resolve.glsl
+++ b/servers/rendering/renderer_rd/shaders/resolve.glsl
@@ -58,6 +58,116 @@ void main() {
#else
+#if 1
+
+ vec4 group1;
+ vec4 group2;
+ vec4 group3;
+ vec4 group4;
+ int best_index = 0;
+
+ //2X
+ group1.x = texelFetch(source_depth, pos, 0).r;
+ group1.y = texelFetch(source_depth, pos, 1).r;
+
+ //4X
+ if (params.sample_count >= 4) {
+ group1.z = texelFetch(source_depth, pos, 2).r;
+ group1.w = texelFetch(source_depth, pos, 3).r;
+ }
+ //8X
+ if (params.sample_count >= 8) {
+ group2.x = texelFetch(source_depth, pos, 4).r;
+ group2.y = texelFetch(source_depth, pos, 5).r;
+ group2.z = texelFetch(source_depth, pos, 6).r;
+ group2.w = texelFetch(source_depth, pos, 7).r;
+ }
+ //16X
+ if (params.sample_count >= 16) {
+ group3.x = texelFetch(source_depth, pos, 8).r;
+ group3.y = texelFetch(source_depth, pos, 9).r;
+ group3.z = texelFetch(source_depth, pos, 10).r;
+ group3.w = texelFetch(source_depth, pos, 11).r;
+
+ group4.x = texelFetch(source_depth, pos, 12).r;
+ group4.y = texelFetch(source_depth, pos, 13).r;
+ group4.z = texelFetch(source_depth, pos, 14).r;
+ group4.w = texelFetch(source_depth, pos, 15).r;
+ }
+
+ if (params.sample_count == 2) {
+ best_index = (pos.x & 1) ^ ((pos.y >> 1) & 1); //not much can be done here
+ } else if (params.sample_count == 4) {
+ vec4 freq = vec4(equal(group1, vec4(group1.x)));
+ freq += vec4(equal(group1, vec4(group1.y)));
+ freq += vec4(equal(group1, vec4(group1.z)));
+ freq += vec4(equal(group1, vec4(group1.w)));
+
+ float min_f = freq.x;
+ best_index = 0;
+ if (freq.y < min_f) {
+ best_index = 1;
+ min_f = freq.y;
+ }
+ if (freq.z < min_f) {
+ best_index = 2;
+ min_f = freq.z;
+ }
+ if (freq.w < min_f) {
+ best_index = 3;
+ }
+ } else if (params.sample_count == 8) {
+ vec4 freq0 = vec4(equal(group1, vec4(group1.x)));
+ vec4 freq1 = vec4(equal(group2, vec4(group1.x)));
+ freq0 += vec4(equal(group1, vec4(group1.y)));
+ freq1 += vec4(equal(group2, vec4(group1.y)));
+ freq0 += vec4(equal(group1, vec4(group1.z)));
+ freq1 += vec4(equal(group2, vec4(group1.z)));
+ freq0 += vec4(equal(group1, vec4(group1.w)));
+ freq1 += vec4(equal(group2, vec4(group1.w)));
+ freq0 += vec4(equal(group1, vec4(group2.x)));
+ freq1 += vec4(equal(group2, vec4(group2.x)));
+ freq0 += vec4(equal(group1, vec4(group2.y)));
+ freq1 += vec4(equal(group2, vec4(group2.y)));
+ freq0 += vec4(equal(group1, vec4(group2.z)));
+ freq1 += vec4(equal(group2, vec4(group2.z)));
+ freq0 += vec4(equal(group1, vec4(group2.w)));
+ freq1 += vec4(equal(group2, vec4(group2.w)));
+
+ float min_f0 = freq0.x;
+ int best_index0 = 0;
+ if (freq0.y < min_f0) {
+ best_index0 = 1;
+ min_f0 = freq0.y;
+ }
+ if (freq0.z < min_f0) {
+ best_index0 = 2;
+ min_f0 = freq0.z;
+ }
+ if (freq0.w < min_f0) {
+ best_index0 = 3;
+ min_f0 = freq0.w;
+ }
+
+ float min_f1 = freq1.x;
+ int best_index1 = 4;
+ if (freq1.y < min_f1) {
+ best_index1 = 5;
+ min_f1 = freq1.y;
+ }
+ if (freq1.z < min_f1) {
+ best_index1 = 6;
+ min_f1 = freq1.z;
+ }
+ if (freq1.w < min_f1) {
+ best_index1 = 7;
+ min_f1 = freq1.w;
+ }
+
+ best_index = mix(best_index0, best_index1, min_f0 < min_f1);
+ }
+
+#else
float depths[16];
int depth_indices[16];
int depth_amount[16];
@@ -91,7 +201,7 @@ void main() {
depth_least = depth_amount[j];
}
}
-
+#endif
best_depth = texelFetch(source_depth, pos, best_index).r;
best_normal_roughness = texelFetch(source_normal_roughness, pos, best_index);
#ifdef GIPROBE_RESOLVE
diff --git a/servers/rendering/renderer_rd/shaders/scene_forward.glsl b/servers/rendering/renderer_rd/shaders/scene_forward.glsl
index 5b01cb1f82..ea203c8abe 100644
--- a/servers/rendering/renderer_rd/shaders/scene_forward.glsl
+++ b/servers/rendering/renderer_rd/shaders/scene_forward.glsl
@@ -9,8 +9,14 @@ VERSION_DEFINES
/* INPUT ATTRIBS */
layout(location = 0) in vec3 vertex_attrib;
+
+//only for pure render depth when normal is not used
+
+#ifdef NORMAL_USED
layout(location = 1) in vec3 normal_attrib;
-#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED)
+#endif
+
+#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED)
layout(location = 2) in vec4 tangent_attrib;
#endif
@@ -18,7 +24,9 @@ layout(location = 2) in vec4 tangent_attrib;
layout(location = 3) in vec4 color_attrib;
#endif
+#ifdef UV_USED
layout(location = 4) in vec2 uv_attrib;
+#endif
#if defined(UV2_USED) || defined(USE_LIGHTMAP) || defined(MODE_RENDER_MATERIAL)
layout(location = 5) in vec2 uv2_attrib;
@@ -51,19 +59,24 @@ layout(location = 11) in vec4 weight_attrib;
/* Varyings */
layout(location = 0) out vec3 vertex_interp;
+
+#ifdef NORMAL_USED
layout(location = 1) out vec3 normal_interp;
+#endif
#if defined(COLOR_USED)
layout(location = 2) out vec4 color_interp;
#endif
+#ifdef UV_USED
layout(location = 3) out vec2 uv_interp;
+#endif
#if defined(UV2_USED) || defined(USE_LIGHTMAP)
layout(location = 4) out vec2 uv2_interp;
#endif
-#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED)
+#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED)
layout(location = 5) out vec3 tangent_interp;
layout(location = 6) out vec3 binormal_interp;
#endif
@@ -84,8 +97,6 @@ VERTEX_SHADER_GLOBALS
invariant gl_Position;
-layout(location = 7) flat out uint instance_index;
-
#ifdef MODE_DUAL_PARABOLOID
layout(location = 8) out float dp_clip;
@@ -93,22 +104,27 @@ layout(location = 8) out float dp_clip;
#endif
void main() {
- instance_index = draw_call.instance_index;
vec4 instance_custom = vec4(0.0);
#if defined(COLOR_USED)
color_interp = color_attrib;
#endif
- mat4 world_matrix = instances.data[instance_index].transform;
- mat3 world_normal_matrix = mat3(instances.data[instance_index].normal_transform);
+ mat4 world_matrix = draw_call.transform;
+
+ mat3 world_normal_matrix;
+ if (bool(draw_call.flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) {
+ world_normal_matrix = inverse(mat3(world_matrix));
+ } else {
+ world_normal_matrix = mat3(world_matrix);
+ }
- if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH)) {
+ if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH)) {
//multimesh, instances are for it
- uint offset = (instances.data[instance_index].flags >> INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT) & INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK;
+ uint offset = (draw_call.flags >> INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT) & INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK;
offset *= gl_InstanceIndex;
mat4 matrix;
- if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) {
+ if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) {
matrix = mat4(transforms.data[offset + 0], transforms.data[offset + 1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
offset += 2;
} else {
@@ -116,14 +132,14 @@ void main() {
offset += 3;
}
- if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_HAS_COLOR)) {
+ if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_HAS_COLOR)) {
#ifdef COLOR_USED
color_interp *= transforms.data[offset];
#endif
offset += 1;
}
- if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA)) {
+ if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA)) {
instance_custom = transforms.data[offset];
}
@@ -131,23 +147,21 @@ void main() {
matrix = transpose(matrix);
world_matrix = world_matrix * matrix;
world_normal_matrix = world_normal_matrix * mat3(matrix);
-
- } else {
- //not a multimesh, instances are for multiple draw calls
- instance_index += gl_InstanceIndex;
}
vec3 vertex = vertex_attrib;
+#ifdef NORMAL_USED
vec3 normal = normal_attrib * 2.0 - 1.0;
+#endif
-#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED)
+#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED)
vec3 tangent = tangent_attrib.xyz * 2.0 - 1.0;
float binormalf = tangent_attrib.a * 2.0 - 1.0;
vec3 binormal = normalize(cross(normal, tangent) * binormalf);
#endif
#if 0
- if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_SKELETON)) {
+ if (bool(draw_call.flags & INSTANCE_FLAGS_SKELETON)) {
//multimesh, instances are for it
uvec2 bones_01 = uvec2(bone_attrib.x & 0xFFFF, bone_attrib.x >> 16) * 3;
@@ -164,14 +178,17 @@ void main() {
vertex = (vec4(vertex, 1.0) * m).xyz;
normal = (vec4(normal, 0.0) * m).xyz;
-#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED)
+#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED)
tangent = (vec4(tangent, 0.0) * m).xyz;
binormal = (vec4(binormal, 0.0) * m).xyz;
#endif
}
#endif
+
+#ifdef UV_USED
uv_interp = uv_attrib;
+#endif
#if defined(UV2_USED) || defined(USE_LIGHTMAP)
uv2_interp = uv2_attrib;
@@ -190,7 +207,7 @@ void main() {
normal = world_normal_matrix * normal;
-#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED)
+#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED)
tangent = world_normal_matrix * tangent;
binormal = world_normal_matrix * binormal;
@@ -215,10 +232,13 @@ VERTEX_SHADER_CODE
#if !defined(SKIP_TRANSFORM_USED) && !defined(VERTEX_WORLD_COORDS_USED)
vertex = (modelview * vec4(vertex, 1.0)).xyz;
+#ifdef NORMAL_USED
normal = modelview_normal * normal;
#endif
-#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED)
+#endif
+
+#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED)
binormal = modelview_normal * binormal;
tangent = modelview_normal * tangent;
@@ -230,7 +250,7 @@ VERTEX_SHADER_CODE
vertex = (scene_data.inv_camera_matrix * vec4(vertex, 1.0)).xyz;
normal = mat3(scene_data.inverse_normal_matrix) * normal;
-#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED)
+#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED)
binormal = mat3(scene_data.camera_inverse_binormal_matrix) * binormal;
tangent = mat3(scene_data.camera_inverse_tangent_matrix) * tangent;
@@ -238,9 +258,11 @@ VERTEX_SHADER_CODE
#endif
vertex_interp = vertex;
+#ifdef NORMAL_USED
normal_interp = normal;
+#endif
-#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED)
+#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED)
tangent_interp = tangent;
binormal_interp = binormal;
#endif
@@ -250,7 +272,6 @@ VERTEX_SHADER_CODE
#ifdef MODE_DUAL_PARABOLOID
vertex_interp.z *= scene_data.dual_paraboloid_side;
- normal_interp.z *= scene_data.dual_paraboloid_side;
dp_clip = vertex_interp.z; //this attempts to avoid noise caused by objects sent to the other parabolloid side due to bias
@@ -283,7 +304,7 @@ VERTEX_SHADER_CODE
#endif
#ifdef MODE_RENDER_MATERIAL
if (scene_data.material_uv2_mode) {
- gl_Position.xy = (uv2_attrib.xy + draw_call.bake_uv2_offset) * 2.0 - 1.0;
+ gl_Position.xy = (uv2_attrib.xy + draw_call.lightmap_uv_scale.xy) * 2.0 - 1.0;
gl_Position.z = 0.00001;
gl_Position.w = 1.0;
}
@@ -301,25 +322,28 @@ VERSION_DEFINES
/* Varyings */
layout(location = 0) in vec3 vertex_interp;
+
+#ifdef NORMAL_USED
layout(location = 1) in vec3 normal_interp;
+#endif
#if defined(COLOR_USED)
layout(location = 2) in vec4 color_interp;
#endif
+#ifdef UV_USED
layout(location = 3) in vec2 uv_interp;
+#endif
#if defined(UV2_USED) || defined(USE_LIGHTMAP)
layout(location = 4) in vec2 uv2_interp;
#endif
-#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED)
+#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED)
layout(location = 5) in vec3 tangent_interp;
layout(location = 6) in vec3 binormal_interp;
#endif
-layout(location = 7) flat in uint instance_index;
-
#ifdef MODE_DUAL_PARABOLOID
layout(location = 8) in float dp_clip;
@@ -328,8 +352,7 @@ layout(location = 8) in float dp_clip;
//defines to keep compatibility with vertex
-#define world_matrix instances.data[instance_index].transform
-#define world_normal_matrix instances.data[instance_index].normal_transform
+#define world_matrix draw_call.transform
#define projection_matrix scene_data.projection_matrix
#if defined(ENABLE_SSS) && defined(ENABLE_TRANSMITTANCE)
@@ -518,7 +541,7 @@ vec3 F0(float metallic, float specular, vec3 albedo) {
return mix(vec3(dielectric), albedo, vec3(metallic));
}
-void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float attenuation, vec3 shadow_attenuation, vec3 diffuse_color, float roughness, float metallic, float specular, float specular_blob_intensity,
+void light_compute(vec3 N, vec3 L, vec3 V, vec3 light_color, float attenuation, vec3 f0, uint orms, float specular_amount,
#ifdef LIGHT_BACKLIGHT_USED
vec3 backlight,
#endif
@@ -530,7 +553,7 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float atte
float transmittance_z,
#endif
#ifdef LIGHT_RIM_USED
- float rim, float rim_tint,
+ float rim, float rim_tint, vec3 rim_color,
#endif
#ifdef LIGHT_CLEARCOAT_USED
float clearcoat, float clearcoat_gloss,
@@ -538,6 +561,9 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float atte
#ifdef LIGHT_ANISOTROPY_USED
vec3 B, vec3 T, float anisotropy,
#endif
+#ifdef USE_SOFT_SHADOWS
+ float A,
+#endif
#ifdef USE_SHADOW_TO_OPACITY
inout float alpha,
#endif
@@ -547,7 +573,6 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float atte
// light is written by the light shader
vec3 normal = N;
- vec3 albedo = diffuse_color;
vec3 light = L;
vec3 view = V;
@@ -558,7 +583,12 @@ LIGHT_SHADER_CODE
/* clang-format on */
#else
+
+#ifdef USE_SOFT_SHADOWS
float NdotL = min(A + dot(N, L), 1.0);
+#else
+ float NdotL = dot(N, L);
+#endif
float cNdotL = max(NdotL, 0.0); // clamped NdotL
float NdotV = dot(N, V);
float cNdotV = max(NdotV, 0.0);
@@ -568,14 +598,25 @@ LIGHT_SHADER_CODE
#endif
#if defined(SPECULAR_BLINN) || defined(SPECULAR_SCHLICK_GGX) || defined(LIGHT_CLEARCOAT_USED)
+#ifdef USE_SOFT_SHADOWS
float cNdotH = clamp(A + dot(N, H), 0.0, 1.0);
+#else
+ float cNdotH = clamp(dot(N, H), 0.0, 1.0);
+#endif
#endif
#if defined(DIFFUSE_BURLEY) || defined(SPECULAR_SCHLICK_GGX) || defined(LIGHT_CLEARCOAT_USED)
+#ifdef USE_SOFT_SHADOWS
float cLdotH = clamp(A + dot(L, H), 0.0, 1.0);
+#else
+ float cLdotH = clamp(dot(L, H), 0.0, 1.0);
+#endif
#endif
+ float metallic = unpackUnorm4x8(orms).z;
if (metallic < 1.0) {
+ float roughness = unpackUnorm4x8(orms).y;
+
#if defined(DIFFUSE_OREN_NAYAR)
vec3 diffuse_brdf_NL;
#else
@@ -585,23 +626,6 @@ LIGHT_SHADER_CODE
#if defined(DIFFUSE_LAMBERT_WRAP)
// energy conserving lambert wrap shader
diffuse_brdf_NL = max(0.0, (NdotL + roughness) / ((1.0 + roughness) * (1.0 + roughness)));
-
-#elif defined(DIFFUSE_OREN_NAYAR)
-
- {
- // see http://mimosa-pudica.net/improved-oren-nayar.html
- float LdotV = dot(L, V);
-
- float s = LdotV - NdotL * NdotV;
- float t = mix(1.0, max(NdotL, NdotV), step(0.0, s));
-
- float sigma2 = roughness * roughness; // TODO: this needs checking
- vec3 A = 1.0 + sigma2 * (-0.5 / (sigma2 + 0.33) + 0.17 * diffuse_color / (sigma2 + 0.13));
- float B = 0.45 * sigma2 / (sigma2 + 0.09);
-
- diffuse_brdf_NL = cNdotL * (A + vec3(B) * s / t) * (1.0 / M_PI);
- }
-
#elif defined(DIFFUSE_TOON)
diffuse_brdf_NL = smoothstep(-roughness, max(roughness, 0.01), NdotL);
@@ -629,15 +653,15 @@ LIGHT_SHADER_CODE
diffuse_brdf_NL = cNdotL * (1.0 / M_PI);
#endif
- diffuse_light += light_color * diffuse_color * shadow_attenuation * diffuse_brdf_NL * attenuation;
+ diffuse_light += light_color * diffuse_brdf_NL * attenuation;
#if defined(LIGHT_BACKLIGHT_USED)
- diffuse_light += light_color * diffuse_color * (vec3(1.0 / M_PI) - diffuse_brdf_NL) * backlight * attenuation;
+ diffuse_light += light_color * (vec3(1.0 / M_PI) - diffuse_brdf_NL) * backlight * attenuation;
#endif
#if defined(LIGHT_RIM_USED)
float rim_light = pow(max(0.0, 1.0 - cNdotV), max(0.0, (1.0 - roughness) * 16.0));
- diffuse_light += rim_light * rim * mix(vec3(1.0), diffuse_color, rim_tint) * light_color;
+ diffuse_light += rim_light * rim * mix(vec3(1.0), rim_color, rim_tint) * light_color;
#endif
#ifdef LIGHT_TRANSMITTANCE_USED
@@ -655,7 +679,7 @@ LIGHT_SHADER_CODE
vec3(0.358, 0.004, 0.0) * exp(dd / 1.99) +
vec3(0.078, 0.0, 0.0) * exp(dd / 7.41);
- diffuse_light += profile * transmittance_color.a * diffuse_color * light_color * clamp(transmittance_boost - NdotL, 0.0, 1.0) * (1.0 / M_PI) * attenuation;
+ diffuse_light += profile * transmittance_color.a * light_color * clamp(transmittance_boost - NdotL, 0.0, 1.0) * (1.0 / M_PI);
}
#else
@@ -665,7 +689,7 @@ LIGHT_SHADER_CODE
fade = pow(max(0.0, 1.0 - fade), transmittance_curve);
fade *= clamp(transmittance_boost - NdotL, 0.0, 1.0);
- diffuse_light += diffuse_color * transmittance_color.rgb * light_color * (1.0 / M_PI) * transmittance_color.a * fade * attenuation;
+ diffuse_light += transmittance_color.rgb * light_color * (1.0 / M_PI) * transmittance_color.a * fade;
}
#endif //SSS_MODE_SKIN
@@ -673,6 +697,7 @@ LIGHT_SHADER_CODE
#endif //LIGHT_TRANSMITTANCE_USED
}
+ float roughness = unpackUnorm4x8(orms).y;
if (roughness > 0.0) { // FIXME: roughness == 0 should not disable specular light entirely
// D
@@ -685,7 +710,7 @@ LIGHT_SHADER_CODE
blinn *= (shininess + 8.0) * (1.0 / (8.0 * M_PI));
float intensity = blinn;
- specular_light += light_color * shadow_attenuation * intensity * specular_blob_intensity * attenuation;
+ specular_light += light_color * intensity * attenuation * specular_amount;
#elif defined(SPECULAR_PHONG)
@@ -696,7 +721,7 @@ LIGHT_SHADER_CODE
phong *= (shininess + 8.0) * (1.0 / (8.0 * M_PI));
float intensity = (phong) / max(4.0 * cNdotV * cNdotL, 0.75);
- specular_light += light_color * shadow_attenuation * intensity * specular_blob_intensity * attenuation;
+ specular_light += light_color * intensity * attenuation * specular_amount;
#elif defined(SPECULAR_TOON)
@@ -705,7 +730,7 @@ LIGHT_SHADER_CODE
float mid = 1.0 - roughness;
mid *= mid;
float intensity = smoothstep(mid - roughness * 0.5, mid + roughness * 0.5, RdotV) * mid;
- diffuse_light += light_color * shadow_attenuation * intensity * specular_blob_intensity * attenuation; // write to diffuse_light, as in toon shading you generally want no reflection
+ diffuse_light += light_color * intensity * attenuation * specular_amount; // write to diffuse_light, as in toon shading you generally want no reflection
#elif defined(SPECULAR_DISABLED)
// none..
@@ -730,13 +755,12 @@ LIGHT_SHADER_CODE
float G = G_GGX_2cos(cNdotL, alpha_ggx) * G_GGX_2cos(cNdotV, alpha_ggx);
#endif
// F
- vec3 f0 = F0(metallic, specular, diffuse_color);
float cLdotH5 = SchlickFresnel(cLdotH);
vec3 F = mix(vec3(cLdotH5), vec3(1.0), f0);
vec3 specular_brdf_NL = cNdotL * D * F * G;
- specular_light += specular_brdf_NL * light_color * shadow_attenuation * specular_blob_intensity * attenuation;
+ specular_light += specular_brdf_NL * light_color * attenuation * specular_amount;
#endif
#if defined(LIGHT_CLEARCOAT_USED)
@@ -750,12 +774,12 @@ LIGHT_SHADER_CODE
float clearcoat_specular_brdf_NL = 0.25 * clearcoat * Gr * Fr * Dr * cNdotL;
- specular_light += clearcoat_specular_brdf_NL * light_color * shadow_attenuation * specular_blob_intensity * attenuation;
+ specular_light += clearcoat_specular_brdf_NL * light_color * attenuation * specular_amount;
#endif
}
#ifdef USE_SHADOW_TO_OPACITY
- alpha = min(alpha, clamp(1.0 - length(shadow_attenuation * attenuation), 0.0, 1.0));
+ alpha = min(alpha, clamp(1.0 - attenuation), 0.0, 1.0));
#endif
#endif //defined(USE_LIGHT_SHADER_CODE)
@@ -868,69 +892,39 @@ float sample_directional_soft_shadow(texture2D shadow, vec3 pssm_coord, vec2 tex
#endif //USE_NO_SHADOWS
-void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 albedo, float roughness, float metallic, float specular, float p_blob_intensity,
-#ifdef LIGHT_BACKLIGHT_USED
- vec3 backlight,
-#endif
-#ifdef LIGHT_TRANSMITTANCE_USED
- vec4 transmittance_color,
- float transmittance_depth,
- float transmittance_curve,
- float transmittance_boost,
-#endif
-#ifdef LIGHT_RIM_USED
- float rim, float rim_tint,
-#endif
-#ifdef LIGHT_CLEARCOAT_USED
- float clearcoat, float clearcoat_gloss,
-#endif
-#ifdef LIGHT_ANISOTROPY_USED
- vec3 binormal, vec3 tangent, float anisotropy,
-#endif
-#ifdef USE_SHADOW_TO_OPACITY
- inout float alpha,
-#endif
- inout vec3 diffuse_light, inout vec3 specular_light) {
- vec3 light_rel_vec = lights.data[idx].position - vertex;
- float light_length = length(light_rel_vec);
- float normalized_distance = light_length * lights.data[idx].inv_radius;
- vec2 attenuation_energy = unpackHalf2x16(lights.data[idx].attenuation_energy);
- float omni_attenuation = pow(max(1.0 - normalized_distance, 0.0), attenuation_energy.x);
- float light_attenuation = omni_attenuation;
- vec3 shadow_attenuation = vec3(1.0);
- vec4 color_specular = unpackUnorm4x8(lights.data[idx].color_specular);
- color_specular.rgb *= attenuation_energy.y;
- float size_A = 0.0;
-
- if (lights.data[idx].size > 0.0) {
- float t = lights.data[idx].size / max(0.001, light_length);
- size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t));
- }
-
-#ifdef LIGHT_TRANSMITTANCE_USED
- float transmittance_z = transmittance_depth; //no transmittance by default
-#endif
+float get_omni_attenuation(float distance, float inv_range, float decay) {
+ float nd = distance * inv_range;
+ nd *= nd;
+ nd *= nd; // nd^4
+ nd = max(1.0 - nd, 0.0);
+ nd *= nd; // nd^2
+ return nd * pow(max(distance, 0.0001), -decay);
+}
+float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) {
#ifndef USE_NO_SHADOWS
- vec4 shadow_color_enabled = unpackUnorm4x8(lights.data[idx].shadow_color_enabled);
- if (shadow_color_enabled.w > 0.5) {
+ if (omni_lights.data[idx].shadow_enabled) {
// there is a shadowmap
+ vec3 light_rel_vec = omni_lights.data[idx].position - vertex;
+ float light_length = length(light_rel_vec);
+
vec4 v = vec4(vertex, 1.0);
- vec4 splane = (lights.data[idx].shadow_matrix * v);
+ vec4 splane = (omni_lights.data[idx].shadow_matrix * v);
float shadow_len = length(splane.xyz); //need to remember shadow len from here
{
- vec3 nofs = normal_interp * lights.data[idx].shadow_normal_bias / lights.data[idx].inv_radius;
+ vec3 nofs = normal_interp * omni_lights.data[idx].shadow_normal_bias / omni_lights.data[idx].inv_radius;
nofs *= (1.0 - max(0.0, dot(normalize(light_rel_vec), normalize(normal_interp))));
v.xyz += nofs;
- splane = (lights.data[idx].shadow_matrix * v);
+ splane = (omni_lights.data[idx].shadow_matrix * v);
}
float shadow;
- if (lights.data[idx].soft_shadow_size > 0.0) {
+#ifdef USE_SOFT_SHADOWS
+ if (omni_lights.data[idx].soft_shadow_size > 0.0) {
//soft shadow
//find blocker
@@ -950,10 +944,10 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0);
vec3 tangent = normalize(cross(v0, normal));
vec3 bitangent = normalize(cross(tangent, normal));
- float z_norm = shadow_len * lights.data[idx].inv_radius;
+ float z_norm = shadow_len * omni_lights.data[idx].inv_radius;
- tangent *= lights.data[idx].soft_shadow_size * lights.data[idx].soft_shadow_scale;
- bitangent *= lights.data[idx].soft_shadow_size * lights.data[idx].soft_shadow_scale;
+ tangent *= omni_lights.data[idx].soft_shadow_size * omni_lights.data[idx].soft_shadow_scale;
+ bitangent *= omni_lights.data[idx].soft_shadow_size * omni_lights.data[idx].soft_shadow_scale;
for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) {
vec2 disk = disk_rotation * scene_data.penumbra_shadow_kernel[i].xy;
@@ -961,7 +955,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
vec3 pos = splane.xyz + tangent * disk.x + bitangent * disk.y;
pos = normalize(pos);
- vec4 uv_rect = lights.data[idx].atlas_rect;
+ vec4 uv_rect = omni_lights.data[idx].atlas_rect;
if (pos.z >= 0.0) {
pos.z += 1.0;
@@ -989,7 +983,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
tangent *= penumbra;
bitangent *= penumbra;
- z_norm -= lights.data[idx].inv_radius * lights.data[idx].shadow_bias;
+ z_norm -= omni_lights.data[idx].inv_radius * omni_lights.data[idx].shadow_bias;
shadow = 0.0;
for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) {
@@ -997,7 +991,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
vec3 pos = splane.xyz + tangent * disk.x + bitangent * disk.y;
pos = normalize(pos);
- vec4 uv_rect = lights.data[idx].atlas_rect;
+ vec4 uv_rect = omni_lights.data[idx].atlas_rect;
if (pos.z >= 0.0) {
pos.z += 1.0;
@@ -1020,8 +1014,9 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
shadow = 1.0;
}
} else {
+#endif
splane.xyz = normalize(splane.xyz);
- vec4 clamp_rect = lights.data[idx].atlas_rect;
+ vec4 clamp_rect = omni_lights.data[idx].atlas_rect;
if (splane.z >= 0.0) {
splane.z += 1.0;
@@ -1035,101 +1030,149 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
splane.xy /= splane.z;
splane.xy = splane.xy * 0.5 + 0.5;
- splane.z = (shadow_len - lights.data[idx].shadow_bias) * lights.data[idx].inv_radius;
+ splane.z = (shadow_len - omni_lights.data[idx].shadow_bias) * omni_lights.data[idx].inv_radius;
splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw;
splane.w = 1.0; //needed? i think it should be 1 already
- shadow = sample_pcf_shadow(shadow_atlas, lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, splane);
+ shadow = sample_pcf_shadow(shadow_atlas, omni_lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, splane);
+#ifdef USE_SOFT_SHADOWS
}
+#endif
+ return shadow;
+ }
+#endif
+
+ return 1.0;
+}
+
+void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 f0, uint orms, float shadow,
+#ifdef LIGHT_BACKLIGHT_USED
+ vec3 backlight,
+#endif
#ifdef LIGHT_TRANSMITTANCE_USED
- {
- vec4 clamp_rect = lights.data[idx].atlas_rect;
+ vec4 transmittance_color,
+ float transmittance_depth,
+ float transmittance_curve,
+ float transmittance_boost,
+#endif
+#ifdef LIGHT_RIM_USED
+ float rim, float rim_tint, vec3 rim_color,
+#endif
+#ifdef LIGHT_CLEARCOAT_USED
+ float clearcoat, float clearcoat_gloss,
+#endif
+#ifdef LIGHT_ANISOTROPY_USED
+ vec3 binormal, vec3 tangent, float anisotropy,
+#endif
+#ifdef USE_SHADOW_TO_OPACITY
+ inout float alpha,
+#endif
+ inout vec3 diffuse_light, inout vec3 specular_light) {
+ vec3 light_rel_vec = omni_lights.data[idx].position - vertex;
+ float light_length = length(light_rel_vec);
+ float omni_attenuation = get_omni_attenuation(light_length, omni_lights.data[idx].inv_radius, omni_lights.data[idx].attenuation);
+ float light_attenuation = omni_attenuation;
+ vec3 color = omni_lights.data[idx].color;
- //redo shadowmapping, but shrink the model a bit to avoid arctifacts
- splane = (lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * lights.data[idx].transmittance_bias, 1.0));
+#ifdef USE_SOFT_SHADOWS
+ float size_A = 0.0;
- shadow_len = length(splane.xyz);
- splane = normalize(splane.xyz);
+ if (omni_lights.data[idx].size > 0.0) {
+ float t = omni_lights.data[idx].size / max(0.001, light_length);
+ size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t));
+ }
+#endif
- if (splane.z >= 0.0) {
- splane.z += 1.0;
+#ifdef LIGHT_TRANSMITTANCE_USED
+ float transmittance_z = transmittance_depth; //no transmittance by default
+ transmittance_color.a *= light_attenuation;
+ {
+ vec4 clamp_rect = omni_lights.data[idx].atlas_rect;
- } else {
- splane.z = 1.0 - splane.z;
- }
+ //redo shadowmapping, but shrink the model a bit to avoid arctifacts
+ vec4 splane = (omni_lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * omni_lights.data[idx].transmittance_bias, 1.0));
- splane.xy /= splane.z;
- splane.xy = splane.xy * 0.5 + 0.5;
- splane.z = shadow_len * lights.data[idx].inv_radius;
- splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw;
- splane.w = 1.0; //needed? i think it should be 1 already
+ shadow_len = length(splane.xyz);
+ splane = normalize(splane.xyz);
- float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r;
- transmittance_z = (splane.z - shadow_z) / lights.data[idx].inv_radius;
+ if (splane.z >= 0.0) {
+ splane.z += 1.0;
+
+ } else {
+ splane.z = 1.0 - splane.z;
}
-#endif
- vec3 no_shadow = vec3(1.0);
+ splane.xy /= splane.z;
+ splane.xy = splane.xy * 0.5 + 0.5;
+ splane.z = shadow_len * omni_lights.data[idx].inv_radius;
+ splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw;
+ splane.w = 1.0; //needed? i think it should be 1 already
- if (lights.data[idx].projector_rect != vec4(0.0)) {
- vec3 local_v = (lights.data[idx].shadow_matrix * vec4(vertex, 1.0)).xyz;
- local_v = normalize(local_v);
+ float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r;
+ transmittance_z = (splane.z - shadow_z) / omni_lights.data[idx].inv_radius;
+ }
+#endif
- vec4 atlas_rect = lights.data[idx].projector_rect;
+#if 0
- if (local_v.z >= 0.0) {
- local_v.z += 1.0;
- atlas_rect.y += atlas_rect.w;
+ if (omni_lights.data[idx].projector_rect != vec4(0.0)) {
+ vec3 local_v = (omni_lights.data[idx].shadow_matrix * vec4(vertex, 1.0)).xyz;
+ local_v = normalize(local_v);
- } else {
- local_v.z = 1.0 - local_v.z;
- }
+ vec4 atlas_rect = omni_lights.data[idx].projector_rect;
- local_v.xy /= local_v.z;
- local_v.xy = local_v.xy * 0.5 + 0.5;
- vec2 proj_uv = local_v.xy * atlas_rect.zw;
+ if (local_v.z >= 0.0) {
+ local_v.z += 1.0;
+ atlas_rect.y += atlas_rect.w;
- vec2 proj_uv_ddx;
- vec2 proj_uv_ddy;
- {
- vec3 local_v_ddx = (lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddx, 1.0)).xyz;
- local_v_ddx = normalize(local_v_ddx);
+ } else {
+ local_v.z = 1.0 - local_v.z;
+ }
- if (local_v_ddx.z >= 0.0) {
- local_v_ddx.z += 1.0;
- } else {
- local_v_ddx.z = 1.0 - local_v_ddx.z;
- }
+ local_v.xy /= local_v.z;
+ local_v.xy = local_v.xy * 0.5 + 0.5;
+ vec2 proj_uv = local_v.xy * atlas_rect.zw;
- local_v_ddx.xy /= local_v_ddx.z;
- local_v_ddx.xy = local_v_ddx.xy * 0.5 + 0.5;
+ vec2 proj_uv_ddx;
+ vec2 proj_uv_ddy;
+ {
+ vec3 local_v_ddx = (omni_lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddx, 1.0)).xyz;
+ local_v_ddx = normalize(local_v_ddx);
- proj_uv_ddx = local_v_ddx.xy * atlas_rect.zw - proj_uv;
+ if (local_v_ddx.z >= 0.0) {
+ local_v_ddx.z += 1.0;
+ } else {
+ local_v_ddx.z = 1.0 - local_v_ddx.z;
+ }
- vec3 local_v_ddy = (lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddy, 1.0)).xyz;
- local_v_ddy = normalize(local_v_ddy);
+ local_v_ddx.xy /= local_v_ddx.z;
+ local_v_ddx.xy = local_v_ddx.xy * 0.5 + 0.5;
- if (local_v_ddy.z >= 0.0) {
- local_v_ddy.z += 1.0;
- } else {
- local_v_ddy.z = 1.0 - local_v_ddy.z;
- }
+ proj_uv_ddx = local_v_ddx.xy * atlas_rect.zw - proj_uv;
- local_v_ddy.xy /= local_v_ddy.z;
- local_v_ddy.xy = local_v_ddy.xy * 0.5 + 0.5;
+ vec3 local_v_ddy = (omni_lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddy, 1.0)).xyz;
+ local_v_ddy = normalize(local_v_ddy);
- proj_uv_ddy = local_v_ddy.xy * atlas_rect.zw - proj_uv;
+ if (local_v_ddy.z >= 0.0) {
+ local_v_ddy.z += 1.0;
+ } else {
+ local_v_ddy.z = 1.0 - local_v_ddy.z;
}
- vec4 proj = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), proj_uv + atlas_rect.xy, proj_uv_ddx, proj_uv_ddy);
- no_shadow = mix(no_shadow, proj.rgb, proj.a);
+ local_v_ddy.xy /= local_v_ddy.z;
+ local_v_ddy.xy = local_v_ddy.xy * 0.5 + 0.5;
+
+ proj_uv_ddy = local_v_ddy.xy * atlas_rect.zw - proj_uv;
}
- shadow_attenuation = mix(shadow_color_enabled.rgb, no_shadow, shadow);
+ vec4 proj = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), proj_uv + atlas_rect.xy, proj_uv_ddx, proj_uv_ddy);
+ no_shadow = mix(no_shadow, proj.rgb, proj.a);
}
-#endif //USE_NO_SHADOWS
+#endif
+
+ light_attenuation *= shadow;
- light_compute(normal, normalize(light_rel_vec), eye_vec, size_A, color_specular.rgb, light_attenuation, shadow_attenuation, albedo, roughness, metallic, specular, color_specular.a * p_blob_intensity,
+ light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms, omni_lights.data[idx].specular_amount,
#ifdef LIGHT_BACKLIGHT_USED
backlight,
#endif
@@ -1141,7 +1184,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
transmittance_z,
#endif
#ifdef LIGHT_RIM_USED
- rim * omni_attenuation, rim_tint,
+ rim * omni_attenuation, rim_tint, rim_color,
#endif
#ifdef LIGHT_CLEARCOAT_USED
clearcoat, clearcoat_gloss,
@@ -1149,6 +1192,9 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
#ifdef LIGHT_ANISOTROPY_USED
binormal, tangent, anisotropy,
#endif
+#ifdef USE_SOFT_SHADOWS
+ size_A,
+#endif
#ifdef USE_SHADOW_TO_OPACITY
alpha,
#endif
@@ -1156,89 +1202,39 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
specular_light);
}
-void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 albedo, float roughness, float metallic, float specular, float p_blob_intensity,
-#ifdef LIGHT_BACKLIGHT_USED
- vec3 backlight,
-#endif
-#ifdef LIGHT_TRANSMITTANCE_USED
- vec4 transmittance_color,
- float transmittance_depth,
- float transmittance_curve,
- float transmittance_boost,
-#endif
-#ifdef LIGHT_RIM_USED
- float rim, float rim_tint,
-#endif
-#ifdef LIGHT_CLEARCOAT_USED
- float clearcoat, float clearcoat_gloss,
-#endif
-#ifdef LIGHT_ANISOTROPY_USED
- vec3 binormal, vec3 tangent, float anisotropy,
-#endif
-#ifdef USE_SHADOW_TO_OPACITY
- inout float alpha,
-#endif
- inout vec3 diffuse_light,
- inout vec3 specular_light) {
- vec3 light_rel_vec = lights.data[idx].position - vertex;
- float light_length = length(light_rel_vec);
- float normalized_distance = light_length * lights.data[idx].inv_radius;
- vec2 attenuation_energy = unpackHalf2x16(lights.data[idx].attenuation_energy);
- float spot_attenuation = pow(max(1.0 - normalized_distance, 0.001), attenuation_energy.x);
- vec3 spot_dir = lights.data[idx].direction;
- vec2 spot_att_angle = unpackHalf2x16(lights.data[idx].cone_attenuation_angle);
- float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_att_angle.y);
- float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_att_angle.y));
- spot_attenuation *= 1.0 - pow(spot_rim, spot_att_angle.x);
- float light_attenuation = spot_attenuation;
- vec3 shadow_attenuation = vec3(1.0);
- vec4 color_specular = unpackUnorm4x8(lights.data[idx].color_specular);
- color_specular.rgb *= attenuation_energy.y;
-
- float size_A = 0.0;
-
- if (lights.data[idx].size > 0.0) {
- float t = lights.data[idx].size / max(0.001, light_length);
- size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t));
- }
-/*
- if (lights.data[idx].atlas_rect!=vec4(0.0)) {
- //use projector texture
- }
- */
-#ifdef LIGHT_TRANSMITTANCE_USED
- float transmittance_z = transmittance_depth;
-#endif
-
+float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) {
#ifndef USE_NO_SHADOWS
- vec4 shadow_color_enabled = unpackUnorm4x8(lights.data[idx].shadow_color_enabled);
- if (shadow_color_enabled.w > 0.5) {
+ if (spot_lights.data[idx].shadow_enabled) {
+ vec3 light_rel_vec = spot_lights.data[idx].position - vertex;
+ float light_length = length(light_rel_vec);
+ vec3 spot_dir = spot_lights.data[idx].direction;
//there is a shadowmap
vec4 v = vec4(vertex, 1.0);
- v.xyz -= spot_dir * lights.data[idx].shadow_bias;
+ v.xyz -= spot_dir * spot_lights.data[idx].shadow_bias;
- float z_norm = dot(spot_dir, -light_rel_vec) * lights.data[idx].inv_radius;
+ float z_norm = dot(spot_dir, -light_rel_vec) * spot_lights.data[idx].inv_radius;
float depth_bias_scale = 1.0 / (max(0.0001, z_norm)); //the closer to the light origin, the more you have to offset to reach 1px in the map
- vec3 normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(spot_dir, -normalize(normal_interp)))) * lights.data[idx].shadow_normal_bias * depth_bias_scale;
+ vec3 normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(spot_dir, -normalize(normal_interp)))) * spot_lights.data[idx].shadow_normal_bias * depth_bias_scale;
normal_bias -= spot_dir * dot(spot_dir, normal_bias); //only XY, no Z
v.xyz += normal_bias;
//adjust with bias
- z_norm = dot(spot_dir, v.xyz - lights.data[idx].position) * lights.data[idx].inv_radius;
+ z_norm = dot(spot_dir, v.xyz - spot_lights.data[idx].position) * spot_lights.data[idx].inv_radius;
float shadow;
- vec4 splane = (lights.data[idx].shadow_matrix * v);
+ vec4 splane = (spot_lights.data[idx].shadow_matrix * v);
splane /= splane.w;
- if (lights.data[idx].soft_shadow_size > 0.0) {
+#ifdef USE_SOFT_SHADOWS
+ if (spot_lights.data[idx].soft_shadow_size > 0.0) {
//soft shadow
//find blocker
- vec2 shadow_uv = splane.xy * lights.data[idx].atlas_rect.zw + lights.data[idx].atlas_rect.xy;
+ vec2 shadow_uv = splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy;
float blocker_count = 0.0;
float blocker_average = 0.0;
@@ -1251,11 +1247,11 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
disk_rotation = mat2(vec2(cr, -sr), vec2(sr, cr));
}
- float uv_size = lights.data[idx].soft_shadow_size * z_norm * lights.data[idx].soft_shadow_scale;
- vec2 clamp_max = lights.data[idx].atlas_rect.xy + lights.data[idx].atlas_rect.zw;
+ float uv_size = spot_lights.data[idx].soft_shadow_size * z_norm * spot_lights.data[idx].soft_shadow_scale;
+ vec2 clamp_max = spot_lights.data[idx].atlas_rect.xy + spot_lights.data[idx].atlas_rect.zw;
for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) {
vec2 suv = shadow_uv + (disk_rotation * scene_data.penumbra_shadow_kernel[i].xy) * uv_size;
- suv = clamp(suv, lights.data[idx].atlas_rect.xy, clamp_max);
+ suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max);
float d = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), suv, 0.0).r;
if (d < z_norm) {
blocker_average += d;
@@ -1272,7 +1268,7 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
shadow = 0.0;
for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) {
vec2 suv = shadow_uv + (disk_rotation * scene_data.penumbra_shadow_kernel[i].xy) * uv_size;
- suv = clamp(suv, lights.data[idx].atlas_rect.xy, clamp_max);
+ suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max);
shadow += textureProj(sampler2DShadow(shadow_atlas, shadow_sampler), vec4(suv, z_norm, 1.0));
}
@@ -1284,54 +1280,93 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
}
} else {
+#endif
//hard shadow
- vec4 shadow_uv = vec4(splane.xy * lights.data[idx].atlas_rect.zw + lights.data[idx].atlas_rect.xy, z_norm, 1.0);
+ vec4 shadow_uv = vec4(splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy, splane.z, 1.0);
- shadow = sample_pcf_shadow(shadow_atlas, lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, shadow_uv);
+ shadow = sample_pcf_shadow(shadow_atlas, spot_lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, shadow_uv);
+#ifdef USE_SOFT_SHADOWS
}
+#endif
- vec3 no_shadow = vec3(1.0);
+ return shadow;
+ }
- if (lights.data[idx].projector_rect != vec4(0.0)) {
- splane = (lights.data[idx].shadow_matrix * vec4(vertex, 1.0));
- splane /= splane.w;
+#endif //USE_NO_SHADOWS
- vec2 proj_uv = splane.xy * lights.data[idx].projector_rect.zw;
+ return 1.0;
+}
- //ensure we have proper mipmaps
- vec4 splane_ddx = (lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddx, 1.0));
- splane_ddx /= splane_ddx.w;
- vec2 proj_uv_ddx = splane_ddx.xy * lights.data[idx].projector_rect.zw - proj_uv;
+void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 f0, uint orms, float shadow,
+#ifdef LIGHT_BACKLIGHT_USED
+ vec3 backlight,
+#endif
+#ifdef LIGHT_TRANSMITTANCE_USED
+ vec4 transmittance_color,
+ float transmittance_depth,
+ float transmittance_curve,
+ float transmittance_boost,
+#endif
+#ifdef LIGHT_RIM_USED
+ float rim, float rim_tint, vec3 rim_color,
+#endif
+#ifdef LIGHT_CLEARCOAT_USED
+ float clearcoat, float clearcoat_gloss,
+#endif
+#ifdef LIGHT_ANISOTROPY_USED
+ vec3 binormal, vec3 tangent, float anisotropy,
+#endif
+#ifdef USE_SHADOW_TO_OPACITY
+ inout float alpha,
+#endif
+ inout vec3 diffuse_light,
+ inout vec3 specular_light) {
+ vec3 light_rel_vec = spot_lights.data[idx].position - vertex;
+ float light_length = length(light_rel_vec);
+ float spot_attenuation = get_omni_attenuation(light_length, spot_lights.data[idx].inv_radius, spot_lights.data[idx].attenuation);
+ vec3 spot_dir = spot_lights.data[idx].direction;
+ float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_lights.data[idx].cone_angle);
+ float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_lights.data[idx].cone_angle));
+ spot_attenuation *= 1.0 - pow(spot_rim, spot_lights.data[idx].cone_attenuation);
+ float light_attenuation = spot_attenuation;
+ vec3 color = spot_lights.data[idx].color;
+ float specular_amount = spot_lights.data[idx].specular_amount;
- vec4 splane_ddy = (lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddy, 1.0));
- splane_ddy /= splane_ddy.w;
- vec2 proj_uv_ddy = splane_ddy.xy * lights.data[idx].projector_rect.zw - proj_uv;
+#ifdef USE_SOFT_SHADOWS
+ float size_A = 0.0;
- vec4 proj = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), proj_uv + lights.data[idx].projector_rect.xy, proj_uv_ddx, proj_uv_ddy);
- no_shadow = mix(no_shadow, proj.rgb, proj.a);
- }
+ if (spot_lights.data[idx].size > 0.0) {
+ float t = spot_lights.data[idx].size / max(0.001, light_length);
+ size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t));
+ }
+#endif
- shadow_attenuation = mix(shadow_color_enabled.rgb, no_shadow, shadow);
+ /*
+ if (spot_lights.data[idx].atlas_rect!=vec4(0.0)) {
+ //use projector texture
+ }
+ */
#ifdef LIGHT_TRANSMITTANCE_USED
- {
- splane = (lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * lights.data[idx].transmittance_bias, 1.0));
- splane /= splane.w;
- splane.xy = splane.xy * lights.data[idx].atlas_rect.zw + lights.data[idx].atlas_rect.xy;
-
- float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r;
- //reconstruct depth
- shadow_z /= lights.data[idx].inv_radius;
- //distance to light plane
- float z = dot(spot_dir, -light_rel_vec);
- transmittance_z = z - shadow_z;
- }
-#endif //LIGHT_TRANSMITTANCE_USED
+ float transmittance_z = transmittance_depth;
+ transmittance_color.a *= light_attenuation;
+ {
+ splane = (spot_lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * spot_lights.data[idx].transmittance_bias, 1.0));
+ splane /= splane.w;
+ splane.xy = splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy;
+
+ float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r;
+ //reconstruct depth
+ shadow_z /= spot_lights.data[idx].inv_radius;
+ //distance to light plane
+ float z = dot(spot_dir, -light_rel_vec);
+ transmittance_z = z - shadow_z;
}
+#endif //LIGHT_TRANSMITTANCE_USED
-#endif //USE_NO_SHADOWS
+ light_attenuation *= shadow;
- light_compute(normal, normalize(light_rel_vec), eye_vec, size_A, color_specular.rgb, light_attenuation, shadow_attenuation, albedo, roughness, metallic, specular, color_specular.a * p_blob_intensity,
+ light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms, spot_lights.data[idx].specular_amount,
#ifdef LIGHT_BACKLIGHT_USED
backlight,
#endif
@@ -1343,7 +1378,7 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
transmittance_z,
#endif
#ifdef LIGHT_RIM_USED
- rim * spot_attenuation, rim_tint,
+ rim * spot_attenuation, rim_tint, rim_color,
#endif
#ifdef LIGHT_CLEARCOAT_USED
clearcoat, clearcoat_gloss,
@@ -1351,6 +1386,9 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
#ifdef LIGHT_ANISOTROPY_USED
binormal, tangent, anisotropy,
#endif
+#ifdef USE_SOFT_SHADOW
+ size_A,
+#endif
#ifdef USE_SHADOW_TO_OPACITY
alpha,
#endif
@@ -1374,11 +1412,11 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 normal, float roughnes
blend *= blend;
blend = max(0.0, 1.0 - blend);
- if (reflections.data[ref_index].params.x > 0.0) { // compute reflection
+ if (reflections.data[ref_index].intensity > 0.0) { // compute reflection
vec3 local_ref_vec = (reflections.data[ref_index].local_matrix * vec4(ref_vec, 0.0)).xyz;
- if (reflections.data[ref_index].params.w > 0.5) { //box project
+ if (reflections.data[ref_index].box_project) { //box project
vec3 nrdir = normalize(local_ref_vec);
vec3 rbmax = (box_extents - local_pos) / nrdir;
@@ -1395,11 +1433,11 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 normal, float roughnes
reflection.rgb = textureLod(samplerCubeArray(reflection_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(local_ref_vec, reflections.data[ref_index].index), roughness * MAX_ROUGHNESS_LOD).rgb;
- if (reflections.data[ref_index].params.z < 0.5) {
+ if (reflections.data[ref_index].exterior) {
reflection.rgb = mix(specular_light, reflection.rgb, blend);
}
- reflection.rgb *= reflections.data[ref_index].params.x;
+ reflection.rgb *= reflections.data[ref_index].intensity; //intensity
reflection.a = blend;
reflection.rgb *= reflection.a;
@@ -1418,7 +1456,7 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 normal, float roughnes
ambient_out.rgb = textureLod(samplerCubeArray(reflection_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(local_amb_vec, reflections.data[ref_index].index), MAX_ROUGHNESS_LOD).rgb;
ambient_out.a = blend;
- if (reflections.data[ref_index].params.z < 0.5) { //interior
+ if (reflections.data[ref_index].exterior) {
ambient_out.rgb = mix(ambient_light, ambient_out.rgb, blend);
}
@@ -1429,7 +1467,7 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 normal, float roughnes
vec4 ambient_out;
ambient_out.a = blend;
ambient_out.rgb = reflections.data[ref_index].ambient;
- if (reflections.data[ref_index].params.z < 0.5) {
+ if (reflections.data[ref_index].exterior) {
ambient_out.rgb = mix(ambient_light, ambient_out.rgb, blend);
}
ambient_out.rgb *= ambient_out.a;
@@ -1747,7 +1785,43 @@ vec4 fog_process(vec3 vertex) {
return vec4(fog_color, fog_amount);
}
+void cluster_get_item_range(uint p_offset, out uint item_min, out uint item_max, out uint item_from, out uint item_to) {
+ uint item_min_max = cluster_buffer.data[p_offset];
+ item_min = item_min_max & 0xFFFF;
+ item_max = item_min_max >> 16;
+ ;
+
+ item_from = item_min >> 5;
+ item_to = (item_max == 0) ? 0 : ((item_max - 1) >> 5) + 1; //side effect of how it is stored, as item_max 0 means no elements
+}
+
+uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) {
+ int local_min = clamp(int(z_min) - int(i) * 32, 0, 31);
+ int mask_width = min(int(z_max) - int(z_min), 32 - local_min);
+ return bitfieldInsert(uint(0), uint(0xFFFFFFFF), local_min, mask_width);
+}
+
+float blur_shadow(float shadow) {
+ return shadow;
+#if 0
+ //disabling for now, will investigate later
+ float interp_shadow = shadow;
+ if (gl_HelperInvocation) {
+ interp_shadow = -4.0; // technically anything below -4 will do but just to make sure
+ }
+
+ uvec2 fc2 = uvec2(gl_FragCoord.xy);
+ interp_shadow -= dFdx(interp_shadow) * (float(fc2.x & 1) - 0.5);
+ interp_shadow -= dFdy(interp_shadow) * (float(fc2.y & 1) - 0.5);
+
+ if (interp_shadow >= 0.0) {
+ shadow = interp_shadow;
+ }
+ return shadow;
#endif
+}
+
+#endif //!MODE_RENDER DEPTH
void main() {
#ifdef MODE_DUAL_PARABOLOID
@@ -1775,9 +1849,7 @@ void main() {
float clearcoat_gloss = 0.0;
float anisotropy = 0.0;
vec2 anisotropy_flow = vec2(1.0, 0.0);
-#if defined(CUSTOM_FOG_USED)
- vec4 custom_fog = vec4(0.0);
-#endif
+ vec4 fog = vec4(0.0);
#if defined(CUSTOM_RADIANCE_USED)
vec4 custom_radiance = vec4(0.0);
#endif
@@ -1785,20 +1857,20 @@ void main() {
vec4 custom_irradiance = vec4(0.0);
#endif
-#if defined(AO_USED)
float ao = 1.0;
float ao_light_affect = 0.0;
-#endif
float alpha = 1.0;
-#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED)
+#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED)
vec3 binormal = normalize(binormal_interp);
vec3 tangent = normalize(tangent_interp);
#else
vec3 binormal = vec3(0.0);
vec3 tangent = vec3(0.0);
#endif
+
+#ifdef NORMAL_USED
vec3 normal = normalize(normal_interp);
#if defined(DO_SIDE_CHECK)
@@ -1807,7 +1879,11 @@ void main() {
}
#endif
+#endif //NORMAL_USED
+
+#ifdef UV_USED
vec2 uv = uv_interp;
+#endif
#if defined(UV2_USED) || defined(USE_LIGHTMAP)
vec2 uv2 = uv2_interp;
@@ -1817,12 +1893,12 @@ void main() {
vec4 color = color_interp;
#endif
-#if defined(NORMALMAP_USED)
+#if defined(NORMAL_MAP_USED)
- vec3 normalmap = vec3(0.5);
+ vec3 normal_map = vec3(0.5);
#endif
- float normaldepth = 1.0;
+ float normal_map_depth = 1.0;
vec2 screen_uv = gl_FragCoord.xy * scene_data.screen_pixel_size + scene_data.screen_pixel_size * 0.5; //account for center
@@ -1893,12 +1969,12 @@ FRAGMENT_SHADER_CODE
#endif // !USE_SHADOW_TO_OPACITY
-#ifdef NORMALMAP_USED
+#ifdef NORMAL_MAP_USED
- normalmap.xy = normalmap.xy * 2.0 - 1.0;
- normalmap.z = sqrt(max(0.0, 1.0 - dot(normalmap.xy, normalmap.xy))); //always ignore Z, as it can be RG packed, Z may be pos/neg, etc.
+ normal_map.xy = normal_map.xy * 2.0 - 1.0;
+ normal_map.z = sqrt(max(0.0, 1.0 - dot(normal_map.xy, normal_map.xy))); //always ignore Z, as it can be RG packed, Z may be pos/neg, etc.
- normal = normalize(mix(normal, tangent * normalmap.x + binormal * normalmap.y + normal * normalmap.z, normaldepth));
+ normal = normalize(mix(normal, tangent * normal_map.x + binormal * normal_map.y + normal * normal_map.z, normal_map_depth));
#endif
@@ -1920,80 +1996,151 @@ FRAGMENT_SHADER_CODE
discard;
}
#endif
+
+ /////////////////////// FOG //////////////////////
+#ifndef MODE_RENDER_DEPTH
+
+#ifndef CUSTOM_FOG_USED
+ // fog must be processed as early as possible and then packed.
+ // to maximize VGPR usage
+ // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky.
+
+ if (scene_data.fog_enabled) {
+ fog = fog_process(vertex);
+ }
+
+#ifndef LOW_END_MODE
+ if (scene_data.volumetric_fog_enabled) {
+ vec4 volumetric_fog = volumetric_fog_process(screen_uv, -vertex.z);
+ if (scene_data.fog_enabled) {
+ //must use the full blending equation here to blend fogs
+ vec4 res;
+ float sa = 1.0 - volumetric_fog.a;
+ res.a = fog.a * sa + volumetric_fog.a;
+ if (res.a == 0.0) {
+ res.rgb = vec3(0.0);
+ } else {
+ res.rgb = (fog.rgb * fog.a * sa + volumetric_fog.rgb * volumetric_fog.a) / res.a;
+ }
+ fog = res;
+ } else {
+ fog = volumetric_fog;
+ }
+ }
+#endif //!LOW_END_MODE
+#endif //!CUSTOM_FOG_USED
+
+ uint fog_rg = packHalf2x16(fog.rg);
+ uint fog_ba = packHalf2x16(fog.ba);
+
+#endif //!MODE_RENDER_DEPTH
+
/////////////////////// DECALS ////////////////////////////////
#ifndef MODE_RENDER_DEPTH
- uvec4 cluster_cell = texture(usampler3D(cluster_texture, material_samplers[SAMPLER_NEAREST_CLAMP]), vec3(screen_uv, (abs(vertex.z) - scene_data.z_near) / (scene_data.z_far - scene_data.z_near)));
+ uvec2 cluster_pos = uvec2(gl_FragCoord.xy) >> scene_data.cluster_shift;
+ uint cluster_offset = (scene_data.cluster_width * cluster_pos.y + cluster_pos.x) * (scene_data.max_cluster_element_count_div_32 + 32);
+
+ uint cluster_z = uint(clamp((-vertex.z / scene_data.z_far) * 32.0, 0.0, 31.0));
+
//used for interpolating anything cluster related
vec3 vertex_ddx = dFdx(vertex);
vec3 vertex_ddy = dFdy(vertex);
{ // process decals
- uint decal_count = cluster_cell.w >> CLUSTER_COUNTER_SHIFT;
- uint decal_pointer = cluster_cell.w & CLUSTER_POINTER_MASK;
+ uint cluster_decal_offset = cluster_offset + scene_data.cluster_type_size * 2;
- //do outside for performance and avoiding arctifacts
+ uint item_min;
+ uint item_max;
+ uint item_from;
+ uint item_to;
- for (uint i = 0; i < decal_count; i++) {
- uint decal_index = cluster_data.indices[decal_pointer + i];
- if (!bool(decals.data[decal_index].mask & instances.data[instance_index].layer_mask)) {
- continue; //not masked
- }
+ cluster_get_item_range(cluster_decal_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
- vec3 uv_local = (decals.data[decal_index].xform * vec4(vertex, 1.0)).xyz;
- if (any(lessThan(uv_local, vec3(0.0, -1.0, 0.0))) || any(greaterThan(uv_local, vec3(1.0)))) {
- continue; //out of decal
- }
+#ifdef USE_SUBGROUPS
+ item_from = subgroupBroadcastFirst(subgroupMin(item_from));
+ item_to = subgroupBroadcastFirst(subgroupMax(item_to));
+#endif
- //we need ddx/ddy for mipmaps, so simulate them
- vec2 ddx = (decals.data[decal_index].xform * vec4(vertex_ddx, 0.0)).xz;
- vec2 ddy = (decals.data[decal_index].xform * vec4(vertex_ddy, 0.0)).xz;
+ for (uint i = item_from; i < item_to; i++) {
+ uint mask = cluster_buffer.data[cluster_decal_offset + i];
+ mask &= cluster_get_range_clip_mask(i, item_min, item_max);
+#ifdef USE_SUBGROUPS
+ uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
+#else
+ uint merged_mask = mask;
+#endif
- float fade = pow(1.0 - (uv_local.y > 0.0 ? uv_local.y : -uv_local.y), uv_local.y > 0.0 ? decals.data[decal_index].upper_fade : decals.data[decal_index].lower_fade);
+ while (merged_mask != 0) {
+ uint bit = findMSB(merged_mask);
+ merged_mask &= ~(1 << bit);
+#ifdef USE_SUBGROUPS
+ if (((1 << bit) & mask) == 0) { //do not process if not originally here
+ continue;
+ }
+#endif
+ uint decal_index = 32 * i + bit;
- if (decals.data[decal_index].normal_fade > 0.0) {
- fade *= smoothstep(decals.data[decal_index].normal_fade, 1.0, dot(normal_interp, decals.data[decal_index].normal) * 0.5 + 0.5);
- }
+ if (!bool(decals.data[decal_index].mask & draw_call.layer_mask)) {
+ continue; //not masked
+ }
- if (decals.data[decal_index].albedo_rect != vec4(0.0)) {
- //has albedo
- vec4 decal_albedo = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].albedo_rect.zw + decals.data[decal_index].albedo_rect.xy, ddx * decals.data[decal_index].albedo_rect.zw, ddy * decals.data[decal_index].albedo_rect.zw);
- decal_albedo *= decals.data[decal_index].modulate;
- decal_albedo.a *= fade;
- albedo = mix(albedo, decal_albedo.rgb, decal_albedo.a * decals.data[decal_index].albedo_mix);
-
- if (decals.data[decal_index].normal_rect != vec4(0.0)) {
- vec3 decal_normal = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].normal_rect.zw + decals.data[decal_index].normal_rect.xy, ddx * decals.data[decal_index].normal_rect.zw, ddy * decals.data[decal_index].normal_rect.zw).xyz;
- decal_normal.xy = decal_normal.xy * vec2(2.0, -2.0) - vec2(1.0, -1.0); //users prefer flipped y normal maps in most authoring software
- decal_normal.z = sqrt(max(0.0, 1.0 - dot(decal_normal.xy, decal_normal.xy)));
- //convert to view space, use xzy because y is up
- decal_normal = (decals.data[decal_index].normal_xform * decal_normal.xzy).xyz;
-
- normal = normalize(mix(normal, decal_normal, decal_albedo.a));
+ vec3 uv_local = (decals.data[decal_index].xform * vec4(vertex, 1.0)).xyz;
+ if (any(lessThan(uv_local, vec3(0.0, -1.0, 0.0))) || any(greaterThan(uv_local, vec3(1.0)))) {
+ continue; //out of decal
}
- if (decals.data[decal_index].orm_rect != vec4(0.0)) {
- vec3 decal_orm = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].orm_rect.zw + decals.data[decal_index].orm_rect.xy, ddx * decals.data[decal_index].orm_rect.zw, ddy * decals.data[decal_index].orm_rect.zw).xyz;
-#if defined(AO_USED)
- ao = mix(ao, decal_orm.r, decal_albedo.a);
-#endif
- roughness = mix(roughness, decal_orm.g, decal_albedo.a);
- metallic = mix(metallic, decal_orm.b, decal_albedo.a);
+ //we need ddx/ddy for mipmaps, so simulate them
+ vec2 ddx = (decals.data[decal_index].xform * vec4(vertex_ddx, 0.0)).xz;
+ vec2 ddy = (decals.data[decal_index].xform * vec4(vertex_ddy, 0.0)).xz;
+
+ float fade = pow(1.0 - (uv_local.y > 0.0 ? uv_local.y : -uv_local.y), uv_local.y > 0.0 ? decals.data[decal_index].upper_fade : decals.data[decal_index].lower_fade);
+
+ if (decals.data[decal_index].normal_fade > 0.0) {
+ fade *= smoothstep(decals.data[decal_index].normal_fade, 1.0, dot(normal_interp, decals.data[decal_index].normal) * 0.5 + 0.5);
}
- }
- if (decals.data[decal_index].emission_rect != vec4(0.0)) {
- //emission is additive, so its independent from albedo
- emission += textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].emission_energy * fade;
+ if (decals.data[decal_index].albedo_rect != vec4(0.0)) {
+ //has albedo
+ vec4 decal_albedo = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].albedo_rect.zw + decals.data[decal_index].albedo_rect.xy, ddx * decals.data[decal_index].albedo_rect.zw, ddy * decals.data[decal_index].albedo_rect.zw);
+ decal_albedo *= decals.data[decal_index].modulate;
+ decal_albedo.a *= fade;
+ albedo = mix(albedo, decal_albedo.rgb, decal_albedo.a * decals.data[decal_index].albedo_mix);
+
+ if (decals.data[decal_index].normal_rect != vec4(0.0)) {
+ vec3 decal_normal = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].normal_rect.zw + decals.data[decal_index].normal_rect.xy, ddx * decals.data[decal_index].normal_rect.zw, ddy * decals.data[decal_index].normal_rect.zw).xyz;
+ decal_normal.xy = decal_normal.xy * vec2(2.0, -2.0) - vec2(1.0, -1.0); //users prefer flipped y normal maps in most authoring software
+ decal_normal.z = sqrt(max(0.0, 1.0 - dot(decal_normal.xy, decal_normal.xy)));
+ //convert to view space, use xzy because y is up
+ decal_normal = (decals.data[decal_index].normal_xform * decal_normal.xzy).xyz;
+
+ normal = normalize(mix(normal, decal_normal, decal_albedo.a));
+ }
+
+ if (decals.data[decal_index].orm_rect != vec4(0.0)) {
+ vec3 decal_orm = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].orm_rect.zw + decals.data[decal_index].orm_rect.xy, ddx * decals.data[decal_index].orm_rect.zw, ddy * decals.data[decal_index].orm_rect.zw).xyz;
+ ao = mix(ao, decal_orm.r, decal_albedo.a);
+ roughness = mix(roughness, decal_orm.g, decal_albedo.a);
+ metallic = mix(metallic, decal_orm.b, decal_albedo.a);
+ }
+ }
+
+ if (decals.data[decal_index].emission_rect != vec4(0.0)) {
+ //emission is additive, so its independent from albedo
+ emission += textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].emission_energy * fade;
+ }
}
}
}
+ //pack albedo until needed again, saves 2 VGPRs in the meantime
+
#endif //not render depth
/////////////////////// LIGHTING //////////////////////////////
+#ifdef NORMAL_USED
if (scene_data.roughness_limiter_enabled) {
//http://www.jp.square-enix.com/tech/library/pdf/ImprovedGeometricSpecularAA.pdf
float roughness2 = roughness * roughness;
@@ -2003,6 +2150,7 @@ FRAGMENT_SHADER_CODE
float filteredRoughness2 = min(1.0, roughness2 + kernelRoughness2);
roughness = sqrt(filteredRoughness2);
}
+#endif
//apply energy conservation
vec3 specular_light = vec3(0.0, 0.0, 0.0);
@@ -2056,19 +2204,14 @@ FRAGMENT_SHADER_CODE
//radiance
- float specular_blob_intensity = 1.0;
-
-#if defined(SPECULAR_TOON)
- specular_blob_intensity *= specular * 2.0;
-#endif
-
+/// GI ///
#if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED)
#ifdef USE_LIGHTMAP
//lightmap
- if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE)) { //has lightmap capture
- uint index = instances.data[instance_index].gi_offset;
+ if (bool(draw_call.flags & INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE)) { //has lightmap capture
+ uint index = draw_call.gi_offset;
vec3 wnormal = mat3(scene_data.camera_matrix) * normal;
const float c1 = 0.429043;
@@ -2087,12 +2230,12 @@ FRAGMENT_SHADER_CODE
2.0 * c2 * lightmap_captures.data[index].sh[1].rgb * wnormal.y +
2.0 * c2 * lightmap_captures.data[index].sh[2].rgb * wnormal.z);
- } else if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap
- bool uses_sh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP);
- uint ofs = instances.data[instance_index].gi_offset & 0xFFF;
+ } else if (bool(draw_call.flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap
+ bool uses_sh = bool(draw_call.flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP);
+ uint ofs = draw_call.gi_offset & 0xFFFF;
vec3 uvw;
- uvw.xy = uv2 * instances.data[instance_index].lightmap_uv_scale.zw + instances.data[instance_index].lightmap_uv_scale.xy;
- uvw.z = float((instances.data[instance_index].gi_offset >> 12) & 0xFF);
+ uvw.xy = uv2 * draw_call.lightmap_uv_scale.zw + draw_call.lightmap_uv_scale.xy;
+ uvw.z = float((draw_call.gi_offset >> 16) & 0xFFFF);
if (uses_sh) {
uvw.z *= 4.0; //SH textures use 4 times more data
@@ -2101,7 +2244,7 @@ FRAGMENT_SHADER_CODE
vec3 lm_light_l1_0 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 2.0), 0.0).rgb;
vec3 lm_light_l1p1 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 3.0), 0.0).rgb;
- uint idx = instances.data[instance_index].gi_offset >> 20;
+ uint idx = draw_call.gi_offset >> 20;
vec3 n = normalize(lightmaps.data[idx].normal_xform * normal);
ambient_light += lm_light_l0 * 0.282095f;
@@ -2121,7 +2264,7 @@ FRAGMENT_SHADER_CODE
}
#elif defined(USE_FORWARD_GI)
- if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture
+ if (bool(draw_call.flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture
//make vertex orientation the world one, but still align to camera
vec3 cam_pos = mat3(scene_data.camera_matrix) * vertex;
@@ -2193,9 +2336,9 @@ FRAGMENT_SHADER_CODE
}
}
- if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes
+ if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes
- uint index1 = instances.data[instance_index].gi_offset & 0xFFFF;
+ uint index1 = draw_call.gi_offset & 0xFFFF;
vec3 ref_vec = normalize(reflect(normalize(vertex), normal));
//find arbitrary tangent and bitangent, then build a matrix
vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0);
@@ -2207,7 +2350,7 @@ FRAGMENT_SHADER_CODE
vec4 spec_accum = vec4(0.0);
gi_probe_compute(index1, vertex, normal, ref_vec, normal_mat, roughness * roughness, ambient_light, specular_light, spec_accum, amb_accum);
- uint index2 = instances.data[instance_index].gi_offset >> 16;
+ uint index2 = draw_call.gi_offset >> 16;
if (index2 != 0xFFFF) {
gi_probe_compute(index2, vertex, normal, ref_vec, normal_mat, roughness * roughness, ambient_light, specular_light, spec_accum, amb_accum);
@@ -2226,19 +2369,19 @@ FRAGMENT_SHADER_CODE
}
#elif !defined(LOW_END_MODE)
- if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers
+ if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers
- ivec2 coord;
+ vec2 coord;
if (scene_data.gi_upscale_for_msaa) {
- ivec2 base_coord = ivec2(gl_FragCoord.xy);
- ivec2 closest_coord = base_coord;
- float closest_ang = dot(normal, texelFetch(sampler2D(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), base_coord, 0).xyz * 2.0 - 1.0);
+ vec2 base_coord = screen_uv;
+ vec2 closest_coord = base_coord;
+ float closest_ang = dot(normal, textureLod(sampler2D(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), base_coord, 0.0).xyz * 2.0 - 1.0);
for (int i = 0; i < 4; i++) {
- const ivec2 neighbours[4] = ivec2[](ivec2(-1, 0), ivec2(1, 0), ivec2(0, -1), ivec2(0, 1));
- ivec2 neighbour_coord = base_coord + neighbours[i];
- float neighbour_ang = dot(normal, texelFetch(sampler2D(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), neighbour_coord, 0).xyz * 2.0 - 1.0);
+ const vec2 neighbours[4] = vec2[](vec2(-1, 0), vec2(1, 0), vec2(0, -1), vec2(0, 1));
+ vec2 neighbour_coord = base_coord + neighbours[i] * scene_data.screen_pixel_size;
+ float neighbour_ang = dot(normal, textureLod(sampler2D(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), neighbour_coord, 0.0).xyz * 2.0 - 1.0);
if (neighbour_ang > closest_ang) {
closest_ang = neighbour_ang;
closest_coord = neighbour_coord;
@@ -2248,28 +2391,69 @@ FRAGMENT_SHADER_CODE
coord = closest_coord;
} else {
- coord = ivec2(gl_FragCoord.xy);
+ coord = screen_uv;
}
- vec4 buffer_ambient = texelFetch(sampler2D(ambient_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), coord, 0);
- vec4 buffer_reflection = texelFetch(sampler2D(reflection_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), coord, 0);
+ vec4 buffer_ambient = textureLod(sampler2D(ambient_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), coord, 0.0);
+ vec4 buffer_reflection = textureLod(sampler2D(reflection_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), coord, 0.0);
ambient_light = mix(ambient_light, buffer_ambient.rgb, buffer_ambient.a);
specular_light = mix(specular_light, buffer_reflection.rgb, buffer_reflection.a);
}
#endif
+#ifndef LOW_END_MODE
+ if (scene_data.ssao_enabled) {
+ float ssao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), screen_uv).r;
+ ao = min(ao, ssao);
+ ao_light_affect = mix(ao_light_affect, max(ao_light_affect, scene_data.ssao_light_affect), scene_data.ssao_ao_affect);
+ }
+#endif //LOW_END_MODE
+
{ // process reflections
vec4 reflection_accum = vec4(0.0, 0.0, 0.0, 0.0);
vec4 ambient_accum = vec4(0.0, 0.0, 0.0, 0.0);
- uint reflection_probe_count = cluster_cell.z >> CLUSTER_COUNTER_SHIFT;
- uint reflection_probe_pointer = cluster_cell.z & CLUSTER_POINTER_MASK;
+ uint cluster_reflection_offset = cluster_offset + scene_data.cluster_type_size * 3;
+
+ uint item_min;
+ uint item_max;
+ uint item_from;
+ uint item_to;
+
+ cluster_get_item_range(cluster_reflection_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
+
+#ifdef USE_SUBGROUPS
+ item_from = subgroupBroadcastFirst(subgroupMin(item_from));
+ item_to = subgroupBroadcastFirst(subgroupMax(item_to));
+#endif
- for (uint i = 0; i < reflection_probe_count; i++) {
- uint ref_index = cluster_data.indices[reflection_probe_pointer + i];
- reflection_process(ref_index, vertex, normal, roughness, ambient_light, specular_light, ambient_accum, reflection_accum);
+ for (uint i = item_from; i < item_to; i++) {
+ uint mask = cluster_buffer.data[cluster_reflection_offset + i];
+ mask &= cluster_get_range_clip_mask(i, item_min, item_max);
+#ifdef USE_SUBGROUPS
+ uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
+#else
+ uint merged_mask = mask;
+#endif
+
+ while (merged_mask != 0) {
+ uint bit = findMSB(merged_mask);
+ merged_mask &= ~(1 << bit);
+#ifdef USE_SUBGROUPS
+ if (((1 << bit) & mask) == 0) { //do not process if not originally here
+ continue;
+ }
+#endif
+ uint reflection_index = 32 * i + bit;
+
+ if (!bool(reflections.data[reflection_index].mask & draw_call.layer_mask)) {
+ continue; //not masked
+ }
+
+ reflection_process(reflection_index, vertex, normal, roughness, ambient_light, specular_light, ambient_accum, reflection_accum);
+ }
}
if (reflection_accum.a > 0.0) {
@@ -2283,6 +2467,16 @@ FRAGMENT_SHADER_CODE
#endif
}
+ //finalize ambient light here
+ ambient_light *= albedo.rgb;
+ ambient_light *= ao;
+
+ // convert ao to direct light ao
+ ao = mix(1.0, ao, ao_light_affect);
+
+ //this saves some VGPRs
+ vec3 f0 = F0(metallic, specular, albedo);
+
{
#if defined(DIFFUSE_TOON)
//simplify for toon, as
@@ -2300,24 +2494,39 @@ FRAGMENT_SHADER_CODE
float a004 = min(r.x * r.x, exp2(-9.28 * ndotv)) * r.x + r.y;
vec2 env = vec2(-1.04, 1.04) * a004 + r.zw;
- vec3 f0 = F0(metallic, specular, albedo);
specular_light *= env.x * f0 + env.y;
#endif
}
+#endif //GI !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED)
+
+#if !defined(MODE_RENDER_DEPTH)
+ //this saves some VGPRs
+ uint orms = packUnorm4x8(vec4(ao, roughness, metallic, specular));
+#endif
+
+// LIGHTING
+#if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED)
+
{ //directional light
- for (uint i = 0; i < scene_data.directional_light_count; i++) {
- if (!bool(directional_lights.data[i].mask & instances.data[instance_index].layer_mask)) {
- continue; //not masked
+ // Do shadow and lighting in two passes to reduce register pressure
+ uint shadow0 = 0;
+ uint shadow1 = 0;
+
+ for (uint i = 0; i < 8; i++) {
+ if (i >= scene_data.directional_light_count) {
+ break;
}
- vec3 shadow_attenuation = vec3(1.0);
+ if (!bool(directional_lights.data[i].mask & draw_call.layer_mask)) {
+ continue; //not masked
+ }
-#ifdef LIGHT_TRANSMITTANCE_USED
- float transmittance_z = transmittance_depth;
-#endif
+ float shadow = 1.0;
+#ifdef USE_SOFT_SHADOWS
+ //version with soft shadows, more expensive
if (directional_lights.data[i].shadow_enabled) {
float depth_z = -vertex.z;
@@ -2331,8 +2540,6 @@ FRAGMENT_SHADER_CODE
normal_bias -= light_dir * dot(light_dir, normal_bias); \
m_var.xyz += normal_bias;
- float shadow = 0.0;
-
if (depth_z < directional_lights.data[i].shadow_split_offsets.x) {
vec4 v = vec4(vertex, 1.0);
@@ -2353,19 +2560,6 @@ FRAGMENT_SHADER_CODE
shadow_color = directional_lights.data[i].shadow_color1.rgb;
-#ifdef LIGHT_TRANSMITTANCE_USED
- {
- vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.x, 1.0);
- vec4 trans_coord = directional_lights.data[i].shadow_matrix1 * trans_vertex;
- trans_coord /= trans_coord.w;
-
- float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r;
- shadow_z *= directional_lights.data[i].shadow_z_range.x;
- float z = trans_coord.z * directional_lights.data[i].shadow_z_range.x;
-
- transmittance_z = z - shadow_z;
- }
-#endif
} else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) {
vec4 v = vec4(vertex, 1.0);
@@ -2385,19 +2579,6 @@ FRAGMENT_SHADER_CODE
}
shadow_color = directional_lights.data[i].shadow_color2.rgb;
-#ifdef LIGHT_TRANSMITTANCE_USED
- {
- vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.y, 1.0);
- vec4 trans_coord = directional_lights.data[i].shadow_matrix2 * trans_vertex;
- trans_coord /= trans_coord.w;
-
- float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r;
- shadow_z *= directional_lights.data[i].shadow_z_range.y;
- float z = trans_coord.z * directional_lights.data[i].shadow_z_range.y;
-
- transmittance_z = z - shadow_z;
- }
-#endif
} else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) {
vec4 v = vec4(vertex, 1.0);
@@ -2417,19 +2598,6 @@ FRAGMENT_SHADER_CODE
}
shadow_color = directional_lights.data[i].shadow_color3.rgb;
-#ifdef LIGHT_TRANSMITTANCE_USED
- {
- vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.z, 1.0);
- vec4 trans_coord = directional_lights.data[i].shadow_matrix3 * trans_vertex;
- trans_coord /= trans_coord.w;
-
- float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r;
- shadow_z *= directional_lights.data[i].shadow_z_range.z;
- float z = trans_coord.z * directional_lights.data[i].shadow_z_range.z;
-
- transmittance_z = z - shadow_z;
- }
-#endif
} else {
vec4 v = vec4(vertex, 1.0);
@@ -2450,20 +2618,6 @@ FRAGMENT_SHADER_CODE
}
shadow_color = directional_lights.data[i].shadow_color4.rgb;
-
-#ifdef LIGHT_TRANSMITTANCE_USED
- {
- vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.w, 1.0);
- vec4 trans_coord = directional_lights.data[i].shadow_matrix4 * trans_vertex;
- trans_coord /= trans_coord.w;
-
- float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r;
- shadow_z *= directional_lights.data[i].shadow_z_range.w;
- float z = trans_coord.z * directional_lights.data[i].shadow_z_range.w;
-
- transmittance_z = z - shadow_z;
- }
-#endif
}
if (directional_lights.data[i].blend_splits) {
@@ -2537,130 +2691,407 @@ FRAGMENT_SHADER_CODE
shadow = mix(shadow, 1.0, smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, vertex.z)); //done with negative values for performance
- shadow_attenuation = mix(shadow_color, vec3(1.0), shadow);
+#undef BIAS_FUNC
+ }
+#else
+ // Soft shadow disabled version
+
+ if (directional_lights.data[i].shadow_enabled) {
+ float depth_z = -vertex.z;
+
+ vec4 pssm_coord;
+ vec3 light_dir = directional_lights.data[i].direction;
+ vec3 base_normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(light_dir, -normalize(normal_interp))));
+
+#define BIAS_FUNC(m_var, m_idx) \
+ m_var.xyz += light_dir * directional_lights.data[i].shadow_bias[m_idx]; \
+ vec3 normal_bias = base_normal_bias * directional_lights.data[i].shadow_normal_bias[m_idx]; \
+ normal_bias -= light_dir * dot(light_dir, normal_bias); \
+ m_var.xyz += normal_bias;
+
+ if (depth_z < directional_lights.data[i].shadow_split_offsets.x) {
+ vec4 v = vec4(vertex, 1.0);
+
+ BIAS_FUNC(v, 0)
+
+ pssm_coord = (directional_lights.data[i].shadow_matrix1 * v);
+#ifdef LIGHT_TRANSMITTANCE_USED
+ {
+ vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.x, 1.0);
+ vec4 trans_coord = directional_lights.data[i].shadow_matrix1 * trans_vertex;
+ trans_coord /= trans_coord.w;
+
+ float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r;
+ shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.x;
+ float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.x;
+
+ transmittance_z = z - shadow_z;
+ }
+#endif
+ } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) {
+ vec4 v = vec4(vertex, 1.0);
+
+ BIAS_FUNC(v, 1)
+
+ pssm_coord = (directional_lights.data[i].shadow_matrix2 * v);
+#ifdef LIGHT_TRANSMITTANCE_USED
+ {
+ vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.y, 1.0);
+ vec4 trans_coord = directional_lights.data[i].shadow_matrix2 * trans_vertex;
+ trans_coord /= trans_coord.w;
+
+ float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r;
+ shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.y;
+ float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.y;
+
+ transmittance_z = z - shadow_z;
+ }
+#endif
+ } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) {
+ vec4 v = vec4(vertex, 1.0);
+
+ BIAS_FUNC(v, 2)
+
+ pssm_coord = (directional_lights.data[i].shadow_matrix3 * v);
+#ifdef LIGHT_TRANSMITTANCE_USED
+ {
+ vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.z, 1.0);
+ vec4 trans_coord = directional_lights.data[i].shadow_matrix3 * trans_vertex;
+ trans_coord /= trans_coord.w;
+
+ float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r;
+ shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.z;
+ float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.z;
+
+ transmittance_z = z - shadow_z;
+ }
+#endif
+
+ } else {
+ vec4 v = vec4(vertex, 1.0);
+
+ BIAS_FUNC(v, 3)
+
+ pssm_coord = (directional_lights.data[i].shadow_matrix4 * v);
+#ifdef LIGHT_TRANSMITTANCE_USED
+ {
+ vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.w, 1.0);
+ vec4 trans_coord = directional_lights.data[i].shadow_matrix4 * trans_vertex;
+ trans_coord /= trans_coord.w;
+
+ float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r;
+ shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.w;
+ float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.w;
+
+ transmittance_z = z - shadow_z;
+ }
+#endif
+ }
+
+ pssm_coord /= pssm_coord.w;
+
+ shadow = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord);
+
+ if (directional_lights.data[i].blend_splits) {
+ float pssm_blend;
+
+ if (depth_z < directional_lights.data[i].shadow_split_offsets.x) {
+ vec4 v = vec4(vertex, 1.0);
+ BIAS_FUNC(v, 1)
+ pssm_coord = (directional_lights.data[i].shadow_matrix2 * v);
+ pssm_blend = smoothstep(0.0, directional_lights.data[i].shadow_split_offsets.x, depth_z);
+ } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) {
+ vec4 v = vec4(vertex, 1.0);
+ BIAS_FUNC(v, 2)
+ pssm_coord = (directional_lights.data[i].shadow_matrix3 * v);
+ pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.x, directional_lights.data[i].shadow_split_offsets.y, depth_z);
+ } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) {
+ vec4 v = vec4(vertex, 1.0);
+ BIAS_FUNC(v, 3)
+ pssm_coord = (directional_lights.data[i].shadow_matrix4 * v);
+ pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.y, directional_lights.data[i].shadow_split_offsets.z, depth_z);
+ } else {
+ pssm_blend = 0.0; //if no blend, same coord will be used (divide by z will result in same value, and already cached)
+ }
+
+ pssm_coord /= pssm_coord.w;
+
+ float shadow2 = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord);
+ shadow = mix(shadow, shadow2, pssm_blend);
+ }
+
+ shadow = mix(shadow, 1.0, smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, vertex.z)); //done with negative values for performance
#undef BIAS_FUNC
}
+#endif
+
+ if (i < 4) {
+ shadow0 |= uint(clamp(shadow * 255.0, 0.0, 255.0)) << (i * 8);
+ } else {
+ shadow1 |= uint(clamp(shadow * 255.0, 0.0, 255.0)) << ((i - 4) * 8);
+ }
+ }
+
+ for (uint i = 0; i < 8; i++) {
+ if (i >= scene_data.directional_light_count) {
+ break;
+ }
+
+ if (!bool(directional_lights.data[i].mask & draw_call.layer_mask)) {
+ continue; //not masked
+ }
+
+#ifdef LIGHT_TRANSMITTANCE_USED
+ float transmittance_z = transmittance_depth;
+
+ if (directional_lights.data[i].shadow_enabled) {
+ float depth_z = -vertex.z;
+
+ if (depth_z < directional_lights.data[i].shadow_split_offsets.x) {
+ vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.x, 1.0);
+ vec4 trans_coord = directional_lights.data[i].shadow_matrix1 * trans_vertex;
+ trans_coord /= trans_coord.w;
+
+ float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r;
+ shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.x;
+ float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.x;
- light_compute(normal, directional_lights.data[i].direction, normalize(view), directional_lights.data[i].size, directional_lights.data[i].color * directional_lights.data[i].energy, 1.0, shadow_attenuation, albedo, roughness, metallic, specular, directional_lights.data[i].specular * specular_blob_intensity,
+ transmittance_z = z - shadow_z;
+ } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) {
+ vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.y, 1.0);
+ vec4 trans_coord = directional_lights.data[i].shadow_matrix2 * trans_vertex;
+ trans_coord /= trans_coord.w;
+
+ float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r;
+ shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.y;
+ float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.y;
+
+ transmittance_z = z - shadow_z;
+ } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) {
+ vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.z, 1.0);
+ vec4 trans_coord = directional_lights.data[i].shadow_matrix3 * trans_vertex;
+ trans_coord /= trans_coord.w;
+
+ float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r;
+ shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.z;
+ float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.z;
+
+ transmittance_z = z - shadow_z;
+
+ } else {
+ vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.w, 1.0);
+ vec4 trans_coord = directional_lights.data[i].shadow_matrix4 * trans_vertex;
+ trans_coord /= trans_coord.w;
+
+ float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r;
+ shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.w;
+ float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.w;
+
+ transmittance_z = z - shadow_z;
+ }
+#endif
+
+ float shadow = 1.0;
+
+ if (i < 4) {
+ shadow = float(shadow0 >> (i * 8) & 0xFF) / 255.0;
+ } else {
+ shadow = float(shadow1 >> ((i - 4) * 8) & 0xFF) / 255.0;
+ }
+
+ blur_shadow(shadow);
+
+ light_compute(normal, directional_lights.data[i].direction, normalize(view), directional_lights.data[i].color * directional_lights.data[i].energy, shadow, f0, orms, 1.0,
#ifdef LIGHT_BACKLIGHT_USED
- backlight,
+ backlight,
#endif
#ifdef LIGHT_TRANSMITTANCE_USED
- transmittance_color,
- transmittance_depth,
- transmittance_curve,
- transmittance_boost,
- transmittance_z,
+ transmittance_color,
+ transmittance_depth,
+ transmittance_curve,
+ transmittance_boost,
+ transmittance_z,
#endif
#ifdef LIGHT_RIM_USED
- rim, rim_tint,
+ rim, rim_tint, albedo,
#endif
#ifdef LIGHT_CLEARCOAT_USED
- clearcoat, clearcoat_gloss,
+ clearcoat, clearcoat_gloss,
#endif
#ifdef LIGHT_ANISOTROPY_USED
- binormal, tangent, anisotropy,
+ binormal, tangent, anisotropy,
+#endif
+#ifdef USE_SOFT_SHADOW
+ directional_lights.data[i].size,
#endif
#ifdef USE_SHADOW_TO_OPACITY
- alpha,
+ alpha,
#endif
- diffuse_light,
- specular_light);
+ diffuse_light,
+ specular_light);
+ }
}
- }
- { //omni lights
+ { //omni lights
- uint omni_light_count = cluster_cell.x >> CLUSTER_COUNTER_SHIFT;
- uint omni_light_pointer = cluster_cell.x & CLUSTER_POINTER_MASK;
+ uint cluster_omni_offset = cluster_offset;
- for (uint i = 0; i < omni_light_count; i++) {
- uint light_index = cluster_data.indices[omni_light_pointer + i];
+ uint item_min;
+ uint item_max;
+ uint item_from;
+ uint item_to;
- if (!bool(lights.data[light_index].mask & instances.data[instance_index].layer_mask)) {
- continue; //not masked
- }
+ cluster_get_item_range(cluster_omni_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
+
+#ifdef USE_SUBGROUPS
+ item_from = subgroupBroadcastFirst(subgroupMin(item_from));
+ item_to = subgroupBroadcastFirst(subgroupMax(item_to));
+#endif
- light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, albedo, roughness, metallic, specular, specular_blob_intensity,
+ for (uint i = item_from; i < item_to; i++) {
+ uint mask = cluster_buffer.data[cluster_omni_offset + i];
+ mask &= cluster_get_range_clip_mask(i, item_min, item_max);
+#ifdef USE_SUBGROUPS
+ uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
+#else
+ uint merged_mask = mask;
+#endif
+
+ while (merged_mask != 0) {
+ uint bit = findMSB(merged_mask);
+ merged_mask &= ~(1 << bit);
+#ifdef USE_SUBGROUPS
+ if (((1 << bit) & mask) == 0) { //do not process if not originally here
+ continue;
+ }
+#endif
+ uint light_index = 32 * i + bit;
+
+ if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) {
+ continue; //not masked
+ }
+
+ float shadow = light_process_omni_shadow(light_index, vertex, view);
+
+ shadow = blur_shadow(shadow);
+
+ light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow,
#ifdef LIGHT_BACKLIGHT_USED
- backlight,
+ backlight,
#endif
#ifdef LIGHT_TRANSMITTANCE_USED
- transmittance_color,
- transmittance_depth,
- transmittance_curve,
- transmittance_boost,
+ transmittance_color,
+ transmittance_depth,
+ transmittance_curve,
+ transmittance_boost,
#endif
#ifdef LIGHT_RIM_USED
- rim,
- rim_tint,
+ rim,
+ rim_tint,
+ albedo,
#endif
#ifdef LIGHT_CLEARCOAT_USED
- clearcoat, clearcoat_gloss,
+ clearcoat, clearcoat_gloss,
#endif
#ifdef LIGHT_ANISOTROPY_USED
- tangent, binormal, anisotropy,
+ tangent, binormal, anisotropy,
#endif
#ifdef USE_SHADOW_TO_OPACITY
- alpha,
+ alpha,
#endif
- diffuse_light, specular_light);
+ diffuse_light, specular_light);
+ }
+ }
}
- }
- { //spot lights
- uint spot_light_count = cluster_cell.y >> CLUSTER_COUNTER_SHIFT;
- uint spot_light_pointer = cluster_cell.y & CLUSTER_POINTER_MASK;
+ { //spot lights
- for (uint i = 0; i < spot_light_count; i++) {
- uint light_index = cluster_data.indices[spot_light_pointer + i];
+ uint cluster_spot_offset = cluster_offset + scene_data.cluster_type_size;
- if (!bool(lights.data[light_index].mask & instances.data[instance_index].layer_mask)) {
- continue; //not masked
- }
+ uint item_min;
+ uint item_max;
+ uint item_from;
+ uint item_to;
+
+ cluster_get_item_range(cluster_spot_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
+
+#ifdef USE_SUBGROUPS
+ item_from = subgroupBroadcastFirst(subgroupMin(item_from));
+ item_to = subgroupBroadcastFirst(subgroupMax(item_to));
+#endif
+
+ for (uint i = item_from; i < item_to; i++) {
+ uint mask = cluster_buffer.data[cluster_spot_offset + i];
+ mask &= cluster_get_range_clip_mask(i, item_min, item_max);
+#ifdef USE_SUBGROUPS
+ uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
+#else
+ uint merged_mask = mask;
+#endif
+
+ while (merged_mask != 0) {
+ uint bit = findMSB(merged_mask);
+ merged_mask &= ~(1 << bit);
+#ifdef USE_SUBGROUPS
+ if (((1 << bit) & mask) == 0) { //do not process if not originally here
+ continue;
+ }
+#endif
+
+ uint light_index = 32 * i + bit;
+
+ if (!bool(spot_lights.data[light_index].mask & draw_call.layer_mask)) {
+ continue; //not masked
+ }
+
+ float shadow = light_process_spot_shadow(light_index, vertex, view);
- light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, albedo, roughness, metallic, specular, specular_blob_intensity,
+ shadow = blur_shadow(shadow);
+
+ light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow,
#ifdef LIGHT_BACKLIGHT_USED
- backlight,
+ backlight,
#endif
#ifdef LIGHT_TRANSMITTANCE_USED
- transmittance_color,
- transmittance_depth,
- transmittance_curve,
- transmittance_boost,
+ transmittance_color,
+ transmittance_depth,
+ transmittance_curve,
+ transmittance_boost,
#endif
#ifdef LIGHT_RIM_USED
- rim,
- rim_tint,
+ rim,
+ rim_tint,
+ albedo,
#endif
#ifdef LIGHT_CLEARCOAT_USED
- clearcoat, clearcoat_gloss,
+ clearcoat, clearcoat_gloss,
#endif
#ifdef LIGHT_ANISOTROPY_USED
- tangent, binormal, anisotropy,
+ tangent, binormal, anisotropy,
#endif
#ifdef USE_SHADOW_TO_OPACITY
- alpha,
+ alpha,
#endif
- diffuse_light, specular_light);
+ diffuse_light, specular_light);
+ }
+ }
}
- }
#ifdef USE_SHADOW_TO_OPACITY
- alpha = min(alpha, clamp(length(ambient_light), 0.0, 1.0));
+ alpha = min(alpha, clamp(length(ambient_light), 0.0, 1.0));
#if defined(ALPHA_SCISSOR_USED)
- if (alpha < alpha_scissor) {
- discard;
- }
+ if (alpha < alpha_scissor) {
+ discard;
+ }
#endif // ALPHA_SCISSOR_USED
#ifdef USE_OPAQUE_PREPASS
- if (alpha < opaque_prepass_threshold) {
- discard;
- }
+ if (alpha < opaque_prepass_threshold) {
+ discard;
+ }
#endif // USE_OPAQUE_PREPASS
@@ -2672,173 +3103,149 @@ FRAGMENT_SHADER_CODE
#ifdef MODE_RENDER_SDF
- {
- vec3 local_pos = (scene_data.sdf_to_bounds * vec4(vertex, 1.0)).xyz;
- ivec3 grid_pos = scene_data.sdf_offset + ivec3(local_pos * vec3(scene_data.sdf_size));
-
- uint albedo16 = 0x1; //solid flag
- albedo16 |= clamp(uint(albedo.r * 31.0), 0, 31) << 11;
- albedo16 |= clamp(uint(albedo.g * 31.0), 0, 31) << 6;
- albedo16 |= clamp(uint(albedo.b * 31.0), 0, 31) << 1;
-
- imageStore(albedo_volume_grid, grid_pos, uvec4(albedo16));
-
- uint facing_bits = 0;
- const vec3 aniso_dir[6] = vec3[](
- vec3(1, 0, 0),
- vec3(0, 1, 0),
- vec3(0, 0, 1),
- vec3(-1, 0, 0),
- vec3(0, -1, 0),
- vec3(0, 0, -1));
-
- vec3 cam_normal = mat3(scene_data.camera_matrix) * normalize(normal_interp);
-
- float closest_dist = -1e20;
-
- for (uint i = 0; i < 6; i++) {
- float d = dot(cam_normal, aniso_dir[i]);
- if (d > closest_dist) {
- closest_dist = d;
- facing_bits = (1 << i);
+ {
+ vec3 local_pos = (scene_data.sdf_to_bounds * vec4(vertex, 1.0)).xyz;
+ ivec3 grid_pos = scene_data.sdf_offset + ivec3(local_pos * vec3(scene_data.sdf_size));
+
+ uint albedo16 = 0x1; //solid flag
+ albedo16 |= clamp(uint(albedo.r * 31.0), 0, 31) << 11;
+ albedo16 |= clamp(uint(albedo.g * 31.0), 0, 31) << 6;
+ albedo16 |= clamp(uint(albedo.b * 31.0), 0, 31) << 1;
+
+ imageStore(albedo_volume_grid, grid_pos, uvec4(albedo16));
+
+ uint facing_bits = 0;
+ const vec3 aniso_dir[6] = vec3[](
+ vec3(1, 0, 0),
+ vec3(0, 1, 0),
+ vec3(0, 0, 1),
+ vec3(-1, 0, 0),
+ vec3(0, -1, 0),
+ vec3(0, 0, -1));
+
+ vec3 cam_normal = mat3(scene_data.camera_matrix) * normalize(normal_interp);
+
+ float closest_dist = -1e20;
+
+ for (uint i = 0; i < 6; i++) {
+ float d = dot(cam_normal, aniso_dir[i]);
+ if (d > closest_dist) {
+ closest_dist = d;
+ facing_bits = (1 << i);
+ }
}
- }
- imageAtomicOr(geom_facing_grid, grid_pos, facing_bits); //store facing bits
+ imageAtomicOr(geom_facing_grid, grid_pos, facing_bits); //store facing bits
- if (length(emission) > 0.001) {
- float lumas[6];
- vec3 light_total = vec3(0);
+ if (length(emission) > 0.001) {
+ float lumas[6];
+ vec3 light_total = vec3(0);
- for (int i = 0; i < 6; i++) {
- float strength = max(0.0, dot(cam_normal, aniso_dir[i]));
- vec3 light = emission * strength;
- light_total += light;
- lumas[i] = max(light.r, max(light.g, light.b));
- }
+ for (int i = 0; i < 6; i++) {
+ float strength = max(0.0, dot(cam_normal, aniso_dir[i]));
+ vec3 light = emission * strength;
+ light_total += light;
+ lumas[i] = max(light.r, max(light.g, light.b));
+ }
- float luma_total = max(light_total.r, max(light_total.g, light_total.b));
+ float luma_total = max(light_total.r, max(light_total.g, light_total.b));
- uint light_aniso = 0;
+ uint light_aniso = 0;
- for (int i = 0; i < 6; i++) {
- light_aniso |= min(31, uint((lumas[i] / luma_total) * 31.0)) << (i * 5);
- }
+ for (int i = 0; i < 6; i++) {
+ light_aniso |= min(31, uint((lumas[i] / luma_total) * 31.0)) << (i * 5);
+ }
- //compress to RGBE9995 to save space
+ //compress to RGBE9995 to save space
- const float pow2to9 = 512.0f;
- const float B = 15.0f;
- const float N = 9.0f;
- const float LN2 = 0.6931471805599453094172321215;
+ const float pow2to9 = 512.0f;
+ const float B = 15.0f;
+ const float N = 9.0f;
+ const float LN2 = 0.6931471805599453094172321215;
- float cRed = clamp(light_total.r, 0.0, 65408.0);
- float cGreen = clamp(light_total.g, 0.0, 65408.0);
- float cBlue = clamp(light_total.b, 0.0, 65408.0);
+ float cRed = clamp(light_total.r, 0.0, 65408.0);
+ float cGreen = clamp(light_total.g, 0.0, 65408.0);
+ float cBlue = clamp(light_total.b, 0.0, 65408.0);
- float cMax = max(cRed, max(cGreen, cBlue));
+ float cMax = max(cRed, max(cGreen, cBlue));
- float expp = max(-B - 1.0f, floor(log(cMax) / LN2)) + 1.0f + B;
+ float expp = max(-B - 1.0f, floor(log(cMax) / LN2)) + 1.0f + B;
- float sMax = floor((cMax / pow(2.0f, expp - B - N)) + 0.5f);
+ float sMax = floor((cMax / pow(2.0f, expp - B - N)) + 0.5f);
- float exps = expp + 1.0f;
+ float exps = expp + 1.0f;
- if (0.0 <= sMax && sMax < pow2to9) {
- exps = expp;
- }
+ if (0.0 <= sMax && sMax < pow2to9) {
+ exps = expp;
+ }
- float sRed = floor((cRed / pow(2.0f, exps - B - N)) + 0.5f);
- float sGreen = floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f);
- float sBlue = floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f);
- //store as 8985 to have 2 extra neighbour bits
- uint light_rgbe = ((uint(sRed) & 0x1FF) >> 1) | ((uint(sGreen) & 0x1FF) << 8) | (((uint(sBlue) & 0x1FF) >> 1) << 17) | ((uint(exps) & 0x1F) << 25);
+ float sRed = floor((cRed / pow(2.0f, exps - B - N)) + 0.5f);
+ float sGreen = floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f);
+ float sBlue = floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f);
+ //store as 8985 to have 2 extra neighbour bits
+ uint light_rgbe = ((uint(sRed) & 0x1FF) >> 1) | ((uint(sGreen) & 0x1FF) << 8) | (((uint(sBlue) & 0x1FF) >> 1) << 17) | ((uint(exps) & 0x1F) << 25);
- imageStore(emission_grid, grid_pos, uvec4(light_rgbe));
- imageStore(emission_aniso_grid, grid_pos, uvec4(light_aniso));
+ imageStore(emission_grid, grid_pos, uvec4(light_rgbe));
+ imageStore(emission_aniso_grid, grid_pos, uvec4(light_aniso));
+ }
}
- }
#endif
#ifdef MODE_RENDER_MATERIAL
- albedo_output_buffer.rgb = albedo;
- albedo_output_buffer.a = alpha;
+ albedo_output_buffer.rgb = albedo;
+ albedo_output_buffer.a = alpha;
- normal_output_buffer.rgb = normal * 0.5 + 0.5;
- normal_output_buffer.a = 0.0;
- depth_output_buffer.r = -vertex.z;
+ normal_output_buffer.rgb = normal * 0.5 + 0.5;
+ normal_output_buffer.a = 0.0;
+ depth_output_buffer.r = -vertex.z;
-#if defined(AO_USED)
- orm_output_buffer.r = ao;
-#else
- orm_output_buffer.r = 0.0;
-#endif
- orm_output_buffer.g = roughness;
- orm_output_buffer.b = metallic;
- orm_output_buffer.a = sss_strength;
+ orm_output_buffer.r = ao;
+ orm_output_buffer.g = roughness;
+ orm_output_buffer.b = metallic;
+ orm_output_buffer.a = sss_strength;
- emission_output_buffer.rgb = emission;
- emission_output_buffer.a = 0.0;
+ emission_output_buffer.rgb = emission;
+ emission_output_buffer.a = 0.0;
#endif
#ifdef MODE_RENDER_NORMAL_ROUGHNESS
- normal_roughness_output_buffer = vec4(normal * 0.5 + 0.5, roughness);
+ normal_roughness_output_buffer = vec4(normal * 0.5 + 0.5, roughness);
#ifdef MODE_RENDER_GIPROBE
- if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes
- uint index1 = instances.data[instance_index].gi_offset & 0xFFFF;
- uint index2 = instances.data[instance_index].gi_offset >> 16;
- giprobe_buffer.x = index1 & 0xFF;
- giprobe_buffer.y = index2 & 0xFF;
- } else {
- giprobe_buffer.x = 0xFF;
- giprobe_buffer.y = 0xFF;
- }
+ if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes
+ uint index1 = draw_call.gi_offset & 0xFFFF;
+ uint index2 = draw_call.gi_offset >> 16;
+ giprobe_buffer.x = index1 & 0xFF;
+ giprobe_buffer.y = index2 & 0xFF;
+ } else {
+ giprobe_buffer.x = 0xFF;
+ giprobe_buffer.y = 0xFF;
+ }
#endif
-#endif //MODE_RENDER_NORMAL
+#endif //MODE_RENDER_NORMAL_ROUGHNESS
//nothing happens, so a tree-ssa optimizer will result in no fragment shader :)
#else
- specular_light *= scene_data.reflection_multiplier;
- ambient_light *= albedo; //ambient must be multiplied by albedo at the end
-
-//ambient occlusion
-#if defined(AO_USED)
-
-#ifndef LOW_END_MODE
- if (scene_data.ssao_enabled && scene_data.ssao_ao_affect > 0.0) {
- float ssao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), screen_uv).r;
- ao = mix(ao, min(ao, ssao), scene_data.ssao_ao_affect);
- ao_light_affect = mix(ao_light_affect, max(ao_light_affect, scene_data.ssao_light_affect), scene_data.ssao_ao_affect);
- }
-#endif //LOW_END_MODE
-
- ambient_light = mix(scene_data.ao_color.rgb, ambient_light, ao);
- ao_light_affect = mix(1.0, ao, ao_light_affect);
- specular_light = mix(scene_data.ao_color.rgb, specular_light, ao_light_affect);
- diffuse_light = mix(scene_data.ao_color.rgb, diffuse_light, ao_light_affect);
-#else
-
-#ifndef LOW_END_MODE
- if (scene_data.ssao_enabled) {
- float ao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), screen_uv).r;
- ambient_light = mix(scene_data.ao_color.rgb, ambient_light, ao);
- float ao_light_affect = mix(1.0, ao, scene_data.ssao_light_affect);
- specular_light = mix(scene_data.ao_color.rgb, specular_light, ao_light_affect);
- diffuse_light = mix(scene_data.ao_color.rgb, diffuse_light, ao_light_affect);
- }
-#endif //LOW_END_MODE
+ // multiply by albedo
+ diffuse_light *= albedo; // ambient must be multiplied by albedo at the end
-#endif // AO_USED
+ // apply direct light AO
+ ao = unpackUnorm4x8(orms).x;
+ specular_light *= ao;
+ diffuse_light *= ao;
- // base color remapping
- diffuse_light *= 1.0 - metallic; // TODO: avoid all diffuse and ambient light calculations when metallic == 1 up to this point
+ // apply metallic
+ metallic = unpackUnorm4x8(orms).z;
+ diffuse_light *= 1.0 - metallic;
ambient_light *= 1.0 - metallic;
+ //restore fog
+ fog = vec4(unpackHalf2x16(fog_rg), unpackHalf2x16(fog_ba));
+
#ifdef MODE_MULTIPLE_RENDER_TARGETS
#ifdef MODE_UNSHADED
@@ -2854,25 +3261,8 @@ FRAGMENT_SHADER_CODE
specular_buffer = vec4(specular_light, metallic);
#endif
- // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky.
- if (scene_data.fog_enabled) {
- vec4 fog = fog_process(vertex);
- diffuse_buffer.rgb = mix(diffuse_buffer.rgb, fog.rgb, fog.a);
- specular_buffer.rgb = mix(specular_buffer.rgb, vec3(0.0), fog.a);
- }
-
-#ifndef LOW_END_MODE
- if (scene_data.volumetric_fog_enabled) {
- vec4 fog = volumetric_fog_process(screen_uv, -vertex.z);
- diffuse_buffer.rgb = mix(diffuse_buffer.rgb, fog.rgb, fog.a);
- specular_buffer.rgb = mix(specular_buffer.rgb, vec3(0.0), fog.a);
- }
-#endif // LOW_END_MODE
-
-#if defined(CUSTOM_FOG_USED)
- diffuse_buffer.rgb = mix(diffuse_buffer.rgb, custom_fog.rgb, custom_fog.a);
- specular_buffer.rgb = mix(specular_buffer.rgb, vec3(0.0), custom_fog.a);
-#endif //CUSTOM_FOG_USED
+ diffuse_buffer.rgb = mix(diffuse_buffer.rgb, fog.rgb, fog.a);
+ specular_buffer.rgb = mix(specular_buffer.rgb, vec3(0.0), fog.a);
#else //MODE_MULTIPLE_RENDER_TARGETS
@@ -2884,22 +3274,9 @@ FRAGMENT_SHADER_CODE
#endif //USE_NO_SHADING
// Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky.
- if (scene_data.fog_enabled) {
- vec4 fog = fog_process(vertex);
- frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a);
- }
-#ifndef LOW_END_MODE
- if (scene_data.volumetric_fog_enabled) {
- vec4 fog = volumetric_fog_process(screen_uv, -vertex.z);
- frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a);
- }
-#endif
-
-#if defined(CUSTOM_FOG_USED)
- frag_color.rgb = mix(frag_color.rgb, custom_fog.rgb, custom_fog.a);
-#endif //CUSTOM_FOG_USED
+ frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a);
#endif //MODE_MULTIPLE_RENDER_TARGETS
#endif //MODE_RENDER_DEPTH
-}
+ }
diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl
index d18581c1b3..e9b79e1560 100644
--- a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl
+++ b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl
@@ -3,12 +3,30 @@
#define MAX_GI_PROBES 8
+#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic)
+
+#extension GL_KHR_shader_subgroup_ballot : enable
+#extension GL_KHR_shader_subgroup_arithmetic : enable
+
+#define USE_SUBGROUPS
+
+#endif
+
#include "cluster_data_inc.glsl"
+#if !defined(MODE_RENDER_DEPTH) || defined(MODE_RENDER_MATERIAL) || defined(MODE_RENDER_SDF) || defined(MODE_RENDER_NORMAL_ROUGHNESS) || defined(MODE_RENDER_GIPROBE) || defined(TANGENT_USED) || defined(NORMAL_MAP_USED)
+#ifndef NORMAL_USED
+#define NORMAL_USED
+#endif
+#endif
+
layout(push_constant, binding = 0, std430) uniform DrawCall {
- uint instance_index;
- uint pad; //16 bits minimum size
- vec2 bake_uv2_offset; //used for bake to uv2, ignored otherwise
+ mat4 transform;
+ uint flags;
+ uint instance_uniforms_ofs; //base offset in global buffer for instance variables
+ uint gi_offset; //GI information when using lightmapping (VCT or lightmap index)
+ uint layer_mask;
+ vec4 lightmap_uv_scale;
}
draw_call;
@@ -43,6 +61,11 @@ layout(set = 0, binding = 3, std140) uniform SceneData {
vec2 viewport_size;
vec2 screen_pixel_size;
+ uint cluster_shift;
+ uint cluster_width;
+ uint cluster_type_size;
+ uint max_cluster_element_count_div_32;
+
//use vec4s because std140 doesnt play nice with vec2s, z and w are wasted
vec4 directional_penumbra_shadow_kernel[32];
vec4 directional_soft_shadow_kernel[32];
@@ -128,26 +151,17 @@ scene_data;
#define INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK 0x7
#define INSTANCE_FLAGS_SKELETON (1 << 19)
+#define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 20)
-struct InstanceData {
- mat4 transform;
- mat4 normal_transform;
- uint flags;
- uint instance_uniforms_ofs; //base offset in global buffer for instance variables
- uint gi_offset; //GI information when using lightmapping (VCT or lightmap index)
- uint layer_mask;
- vec4 lightmap_uv_scale;
-};
-
-layout(set = 0, binding = 4, std430) restrict readonly buffer Instances {
- InstanceData data[];
+layout(set = 0, binding = 4, std430) restrict readonly buffer OmniLights {
+ LightData data[];
}
-instances;
+omni_lights;
-layout(set = 0, binding = 5, std430) restrict readonly buffer Lights {
+layout(set = 0, binding = 5, std430) restrict readonly buffer SpotLights {
LightData data[];
}
-lights;
+spot_lights;
layout(set = 0, binding = 6) buffer restrict readonly ReflectionProbeData {
ReflectionData data[];
@@ -166,40 +180,29 @@ struct Lightmap {
mat3 normal_xform;
};
-layout(set = 0, binding = 10, std140) restrict readonly buffer Lightmaps {
+layout(set = 0, binding = 8, std140) restrict readonly buffer Lightmaps {
Lightmap data[];
}
lightmaps;
-layout(set = 0, binding = 11) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES];
-
struct LightmapCapture {
vec4 sh[9];
};
-layout(set = 0, binding = 12, std140) restrict readonly buffer LightmapCaptures {
+layout(set = 0, binding = 9, std140) restrict readonly buffer LightmapCaptures {
LightmapCapture data[];
}
lightmap_captures;
-layout(set = 0, binding = 13) uniform texture2D decal_atlas;
-layout(set = 0, binding = 14) uniform texture2D decal_atlas_srgb;
+layout(set = 0, binding = 10) uniform texture2D decal_atlas;
+layout(set = 0, binding = 11) uniform texture2D decal_atlas_srgb;
-layout(set = 0, binding = 15, std430) restrict readonly buffer Decals {
+layout(set = 0, binding = 12, std430) restrict readonly buffer Decals {
DecalData data[];
}
decals;
-layout(set = 0, binding = 16) uniform utexture3D cluster_texture;
-
-layout(set = 0, binding = 17, std430) restrict readonly buffer ClusterData {
- uint indices[];
-}
-cluster_data;
-
-layout(set = 0, binding = 18) uniform texture2D directional_shadow_atlas;
-
-layout(set = 0, binding = 19, std430) restrict readonly buffer GlobalVariableData {
+layout(set = 0, binding = 13, std430) restrict readonly buffer GlobalVariableData {
vec4 data[];
}
global_variables;
@@ -213,7 +216,7 @@ struct SDFGIProbeCascadeData {
float to_cell; // 1/bounds * grid_size
};
-layout(set = 0, binding = 20, std140) uniform SDFGI {
+layout(set = 0, binding = 14, std140) uniform SDFGI {
vec3 grid_size;
uint max_cascades;
@@ -263,18 +266,27 @@ layout(set = 1, binding = 1) uniform textureCubeArray reflection_atlas;
layout(set = 1, binding = 2) uniform texture2D shadow_atlas;
+layout(set = 1, binding = 3) uniform texture2D directional_shadow_atlas;
+
+layout(set = 1, binding = 4) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES];
+
#ifndef LOW_END_MODE
-layout(set = 1, binding = 3) uniform texture3D gi_probe_textures[MAX_GI_PROBES];
+layout(set = 1, binding = 5) uniform texture3D gi_probe_textures[MAX_GI_PROBES];
#endif
+layout(set = 1, binding = 6, std430) buffer restrict readonly ClusterBuffer {
+ uint data[];
+}
+cluster_buffer;
+
/* Set 3, Render Buffers */
#ifdef MODE_RENDER_SDF
-layout(r16ui, set = 1, binding = 4) uniform restrict writeonly uimage3D albedo_volume_grid;
-layout(r32ui, set = 1, binding = 5) uniform restrict writeonly uimage3D emission_grid;
-layout(r32ui, set = 1, binding = 6) uniform restrict writeonly uimage3D emission_aniso_grid;
-layout(r32ui, set = 1, binding = 7) uniform restrict uimage3D geom_facing_grid;
+layout(r16ui, set = 1, binding = 7) uniform restrict writeonly uimage3D albedo_volume_grid;
+layout(r32ui, set = 1, binding = 8) uniform restrict writeonly uimage3D emission_grid;
+layout(r32ui, set = 1, binding = 9) uniform restrict writeonly uimage3D emission_aniso_grid;
+layout(r32ui, set = 1, binding = 10) uniform restrict uimage3D geom_facing_grid;
//still need to be present for shaders that use it, so remap them to something
#define depth_buffer shadow_atlas
@@ -283,17 +295,17 @@ layout(r32ui, set = 1, binding = 7) uniform restrict uimage3D geom_facing_grid;
#else
-layout(set = 1, binding = 4) uniform texture2D depth_buffer;
-layout(set = 1, binding = 5) uniform texture2D color_buffer;
+layout(set = 1, binding = 7) uniform texture2D depth_buffer;
+layout(set = 1, binding = 8) uniform texture2D color_buffer;
#ifndef LOW_END_MODE
-layout(set = 1, binding = 6) uniform texture2D normal_roughness_buffer;
-layout(set = 1, binding = 7) uniform texture2D ao_buffer;
-layout(set = 1, binding = 8) uniform texture2D ambient_buffer;
-layout(set = 1, binding = 9) uniform texture2D reflection_buffer;
-layout(set = 1, binding = 10) uniform texture2DArray sdfgi_lightprobe_texture;
-layout(set = 1, binding = 11) uniform texture3D sdfgi_occlusion_cascades;
+layout(set = 1, binding = 9) uniform texture2D normal_roughness_buffer;
+layout(set = 1, binding = 10) uniform texture2D ao_buffer;
+layout(set = 1, binding = 11) uniform texture2D ambient_buffer;
+layout(set = 1, binding = 12) uniform texture2D reflection_buffer;
+layout(set = 1, binding = 13) uniform texture2DArray sdfgi_lightprobe_texture;
+layout(set = 1, binding = 14) uniform texture3D sdfgi_occlusion_cascades;
struct GIProbeData {
mat4 xform;
@@ -311,12 +323,12 @@ struct GIProbeData {
uint mipmaps;
};
-layout(set = 1, binding = 12, std140) uniform GIProbes {
+layout(set = 1, binding = 15, std140) uniform GIProbes {
GIProbeData data[MAX_GI_PROBES];
}
gi_probes;
-layout(set = 1, binding = 13) uniform texture3D volumetric_fog_texture;
+layout(set = 1, binding = 16) uniform texture3D volumetric_fog_texture;
#endif // LOW_END_MODE
diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl
index 813ea29fa1..e4c3f3a84b 100644
--- a/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl
+++ b/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl
@@ -97,6 +97,8 @@ void main() {
float blend = 0.0;
#if 1
+ // No interpolation
+
vec3 inv_dir = 1.0 / ray_dir;
float rough = 0.5;
@@ -161,114 +163,11 @@ void main() {
hit_light *= (dot(max(vec3(0.0), (hit_normal * hit_aniso0)), vec3(1.0)) + dot(max(vec3(0.0), (-hit_normal * hit_aniso1)), vec3(1.0)));
- if (blend > 0.0) {
- light = mix(light, hit_light, blend);
- blend = 0.0;
- } else {
- light = hit_light;
-
- //process blend
- float blend_from = (float(params.probe_axis_size - 1) / 2.0) - 2.5;
- float blend_to = blend_from + 2.0;
-
- vec3 cam_pos = params.cam_transform[3].xyz - cascades.data[i].offset;
- cam_pos *= cascades.data[i].to_cell;
-
- pos += ray_dir * min(advance, max_advance);
- vec3 inner_pos = pos - cam_pos;
-
- inner_pos = inner_pos * float(params.probe_axis_size - 1) / params.grid_size.x;
-
- float len = length(inner_pos);
-
- inner_pos = abs(normalize(inner_pos));
- len *= max(inner_pos.x, max(inner_pos.y, inner_pos.z));
-
- if (len >= blend_from) {
- blend = smoothstep(blend_from, blend_to, len);
-
- pos /= cascades.data[i].to_cell;
- pos += cascades.data[i].offset;
- ray_pos = pos;
- hit = false; //continue trace for blend
-
- continue;
- }
- }
+ light = hit_light;
break;
}
- light = mix(light, vec3(0.0), blend);
-
-#else
-
- vec3 inv_dir = 1.0 / ray_dir;
-
- bool hit = false;
- vec4 light_accum = vec4(0.0);
-
- float blend_size = (params.grid_size.x / float(params.probe_axis_size - 1)) * 0.5;
-
- float radius_sizes[MAX_CASCADES];
- for (uint i = 0; i < params.max_cascades; i++) {
- radius_sizes[i] = (1.0 / cascades.data[i].to_cell) * (params.grid_size.x * 0.5 - blend_size);
- }
-
- float max_distance = radius_sizes[params.max_cascades - 1];
- float advance = 0;
- while (advance < max_distance) {
- for (uint i = 0; i < params.max_cascades; i++) {
- if (advance < radius_sizes[i]) {
- vec3 pos = (ray_pos + ray_dir * advance) - cascades.data[i].offset;
- pos *= cascades.data[i].to_cell * pos_to_uvw;
-
- float distance = texture(sampler3D(sdf_cascades[i], linear_sampler), pos).r * 255.0 - 1.0;
-
- vec4 hit_light = vec4(0.0);
- if (distance < 1.0) {
- hit_light.a = max(0.0, 1.0 - distance);
- hit_light.rgb = texture(sampler3D(light_cascades[i], linear_sampler), pos).rgb;
- hit_light.rgb *= hit_light.a;
- }
-
- distance /= cascades.data[i].to_cell;
-
- if (i < (params.max_cascades - 1)) {
- pos = (ray_pos + ray_dir * advance) - cascades.data[i + 1].offset;
- pos *= cascades.data[i + 1].to_cell * pos_to_uvw;
-
- float distance2 = texture(sampler3D(sdf_cascades[i + 1], linear_sampler), pos).r * 255.0 - 1.0;
-
- vec4 hit_light2 = vec4(0.0);
- if (distance2 < 1.0) {
- hit_light2.a = max(0.0, 1.0 - distance2);
- hit_light2.rgb = texture(sampler3D(light_cascades[i + 1], linear_sampler), pos).rgb;
- hit_light2.rgb *= hit_light2.a;
- }
-
- float prev_radius = i == 0 ? 0.0 : radius_sizes[i - 1];
- float blend = (advance - prev_radius) / (radius_sizes[i] - prev_radius);
-
- distance2 /= cascades.data[i + 1].to_cell;
-
- hit_light = mix(hit_light, hit_light2, blend);
- distance = mix(distance, distance2, blend);
- }
-
- light_accum += hit_light;
- advance += distance;
- break;
- }
- }
-
- if (light_accum.a > 0.98) {
- break;
- }
- }
-
- light = light_accum.rgb / light_accum.a;
-
#endif
imageStore(screen_buffer, screen_pos, vec4(linear_to_srgb(light), 1.0));
diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl
index 61e4bf5e18..bcdfe8cc85 100644
--- a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl
+++ b/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl
@@ -112,11 +112,23 @@ vec2 octahedron_encode(vec3 n) {
return n.xy;
}
+float get_omni_attenuation(float distance, float inv_range, float decay) {
+ float nd = distance * inv_range;
+ nd *= nd;
+ nd *= nd; // nd^4
+ nd = max(1.0 - nd, 0.0);
+ nd *= nd; // nd^2
+ return nd * pow(max(distance, 0.0001), -decay);
+}
+
void main() {
uint voxel_index = uint(gl_GlobalInvocationID.x);
//used for skipping voxels every N frames
- voxel_index = params.process_offset + voxel_index * params.process_increment;
+ if (params.process_increment > 1) {
+ voxel_index *= params.process_increment;
+ voxel_index += params.process_offset;
+ }
if (voxel_index >= dispatch_data.total_count) {
return;
@@ -134,10 +146,78 @@ void main() {
uint voxel_albedo = process_voxels.data[voxel_index].albedo;
vec3 albedo = vec3(uvec3(voxel_albedo >> 10, voxel_albedo >> 5, voxel_albedo) & uvec3(0x1F)) / float(0x1F);
- vec3 light_accum[6];
-
+ vec3 light_accum[6] = vec3[](vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0));
uint valid_aniso = (voxel_albedo >> 15) & 0x3F;
+ const vec3 aniso_dir[6] = vec3[](
+ vec3(1, 0, 0),
+ vec3(0, 1, 0),
+ vec3(0, 0, 1),
+ vec3(-1, 0, 0),
+ vec3(0, -1, 0),
+ vec3(0, 0, -1));
+
+ // Add indirect light first, in order to save computation resources
+#ifdef MODE_PROCESS_DYNAMIC
+ if (params.multibounce) {
+ vec3 pos = (vec3(positioni) + vec3(0.5)) * float(params.probe_axis_size - 1) / params.grid_size;
+ ivec3 probe_base_pos = ivec3(pos);
+
+ float weight_accum[6] = float[](0, 0, 0, 0, 0, 0);
+
+ ivec3 tex_pos = ivec3(probe_base_pos.xy, int(params.cascade));
+ tex_pos.x += probe_base_pos.z * int(params.probe_axis_size);
+
+ tex_pos.xy = tex_pos.xy * (OCT_SIZE + 2) + ivec2(1);
+
+ vec3 base_tex_posf = vec3(tex_pos);
+ vec2 tex_pixel_size = 1.0 / vec2(ivec2((OCT_SIZE + 2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE + 2) * params.probe_axis_size));
+ vec3 probe_uv_offset = (ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx;
+
+ for (uint j = 0; j < 8; j++) {
+ ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1);
+ ivec3 probe_posi = probe_base_pos;
+ probe_posi += offset;
+
+ // Compute weight
+
+ vec3 probe_pos = vec3(probe_posi);
+ vec3 probe_to_pos = pos - probe_pos;
+ vec3 probe_dir = normalize(-probe_to_pos);
+
+ // Compute lightprobe texture position
+
+ vec3 trilinear = vec3(1.0) - abs(probe_to_pos);
+
+ for (uint k = 0; k < 6; k++) {
+ if (bool(valid_aniso & (1 << k))) {
+ vec3 n = aniso_dir[k];
+ float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(n, probe_dir));
+
+ vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0);
+ tex_posf.xy *= tex_pixel_size;
+
+ vec3 pos_uvw = tex_posf;
+ pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy;
+ pos_uvw.x += float(offset.z) * probe_uv_offset.z;
+ vec3 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb;
+
+ light_accum[k] += indirect_light * weight;
+ weight_accum[k] += weight;
+ }
+ }
+ }
+
+ for (uint k = 0; k < 6; k++) {
+ if (weight_accum[k] > 0.0) {
+ light_accum[k] /= weight_accum[k];
+ light_accum[k] *= albedo;
+ }
+ }
+ }
+
+#endif
+
{
uint rgbe = process_voxels.data[voxel_index].light;
@@ -153,18 +233,10 @@ void main() {
uint aniso = process_voxels.data[voxel_index].light_aniso;
for (uint i = 0; i < 6; i++) {
float strength = ((aniso >> (i * 5)) & 0x1F) / float(0x1F);
- light_accum[i] = l * strength;
+ light_accum[i] += l * strength;
}
}
- const vec3 aniso_dir[6] = vec3[](
- vec3(1, 0, 0),
- vec3(0, 1, 0),
- vec3(0, 0, 1),
- vec3(-1, 0, 0),
- vec3(0, -1, 0),
- vec3(0, 0, -1));
-
// Raytrace light
vec3 pos_to_uvw = 1.0 / params.grid_size;
@@ -184,14 +256,15 @@ void main() {
direction = normalize(rel_vec);
light_distance = length(rel_vec);
rel_vec.y /= params.y_mult;
- attenuation = pow(clamp(1.0 - length(rel_vec) / lights.data[i].radius, 0.0, 1.0), lights.data[i].attenuation);
+ attenuation = get_omni_attenuation(light_distance, 1.0 / lights.data[i].radius, lights.data[i].attenuation);
+
} break;
case LIGHT_TYPE_SPOT: {
vec3 rel_vec = lights.data[i].position - position;
direction = normalize(rel_vec);
light_distance = length(rel_vec);
rel_vec.y /= params.y_mult;
- attenuation = pow(clamp(1.0 - length(rel_vec) / lights.data[i].radius, 0.0, 1.0), lights.data[i].attenuation);
+ attenuation = get_omni_attenuation(light_distance, 1.0 / lights.data[i].radius, lights.data[i].attenuation);
float angle = acos(dot(normalize(rel_vec), -lights.data[i].direction));
if (angle > lights.data[i].spot_angle) {
@@ -282,65 +355,6 @@ void main() {
}
}
- // Add indirect light
-
- if (params.multibounce) {
- vec3 pos = (vec3(positioni) + vec3(0.5)) * float(params.probe_axis_size - 1) / params.grid_size;
- ivec3 probe_base_pos = ivec3(pos);
-
- vec4 probe_accum[6] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0));
- float weight_accum[6] = float[](0, 0, 0, 0, 0, 0);
-
- ivec3 tex_pos = ivec3(probe_base_pos.xy, int(params.cascade));
- tex_pos.x += probe_base_pos.z * int(params.probe_axis_size);
-
- tex_pos.xy = tex_pos.xy * (OCT_SIZE + 2) + ivec2(1);
-
- vec3 base_tex_posf = vec3(tex_pos);
- vec2 tex_pixel_size = 1.0 / vec2(ivec2((OCT_SIZE + 2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE + 2) * params.probe_axis_size));
- vec3 probe_uv_offset = (ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx;
-
- for (uint j = 0; j < 8; j++) {
- ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1);
- ivec3 probe_posi = probe_base_pos;
- probe_posi += offset;
-
- // Compute weight
-
- vec3 probe_pos = vec3(probe_posi);
- vec3 probe_to_pos = pos - probe_pos;
- vec3 probe_dir = normalize(-probe_to_pos);
-
- // Compute lightprobe texture position
-
- vec3 trilinear = vec3(1.0) - abs(probe_to_pos);
-
- for (uint k = 0; k < 6; k++) {
- if (bool(valid_aniso & (1 << k))) {
- vec3 n = aniso_dir[k];
- float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(n, probe_dir));
-
- vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0);
- tex_posf.xy *= tex_pixel_size;
-
- vec3 pos_uvw = tex_posf;
- pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy;
- pos_uvw.x += float(offset.z) * probe_uv_offset.z;
- vec4 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0);
-
- probe_accum[k] += indirect_light * weight;
- weight_accum[k] += weight;
- }
- }
- }
-
- for (uint k = 0; k < 6; k++) {
- if (weight_accum[k] > 0.0) {
- light_accum[k] += probe_accum[k].rgb * albedo / weight_accum[k];
- }
- }
- }
-
// Store the light in the light texture
float lumas[6];
diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_fields.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_fields.glsl
index eec0a90c0d..69d8824d8a 100644
--- a/servers/rendering/renderer_rd/shaders/sdfgi_fields.glsl
+++ b/servers/rendering/renderer_rd/shaders/sdfgi_fields.glsl
@@ -14,7 +14,7 @@ layout(local_size_x = OCT_RES, local_size_y = OCT_RES, local_size_z = 1) in;
layout(rgba16f, set = 0, binding = 1) uniform restrict image2DArray irradiance_texture;
layout(rg16f, set = 0, binding = 2) uniform restrict image2DArray depth_texture;
-ayout(rgba32ui, set = 0, binding = 3) uniform restrict uimage2DArray irradiance_history_texture;
+layout(rgba32ui, set = 0, binding = 3) uniform restrict uimage2DArray irradiance_history_texture;
layout(rg32ui, set = 0, binding = 4) uniform restrict uimage2DArray depth_history_texture;
struct CascadeData {
diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl
index d516ab22c3..d122e7a38a 100644
--- a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl
+++ b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl
@@ -136,12 +136,24 @@ uint rgbe_encode(vec3 color) {
return (uint(sRed) & 0x1FF) | ((uint(sGreen) & 0x1FF) << 9) | ((uint(sBlue) & 0x1FF) << 18) | ((uint(exps) & 0x1F) << 27);
}
+struct SH {
+#if (SH_SIZE == 16)
+ float c[48];
+#else
+ float c[28];
+#endif
+};
+
+shared SH sh_accum[64]; //8x8
+
void main() {
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
if (any(greaterThanEqual(pos, params.image_size))) { //too large, do nothing
return;
}
+ uint probe_index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 8;
+
#ifdef MODE_PROCESS
float probe_cell_size = float(params.grid_size.x / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell;
@@ -154,27 +166,9 @@ void main() {
vec3 probe_pos = cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size;
vec3 pos_to_uvw = 1.0 / params.grid_size;
- vec4 probe_sh_accum[SH_SIZE] = vec4[](
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0)
-#if (SH_SIZE == 16)
- ,
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0),
- vec4(0.0)
-#endif
- );
+ for (uint i = 0; i < SH_SIZE * 3; i++) {
+ sh_accum[probe_index].c[i] = 0.0;
+ }
// quickly ensure each probe has a different "offset" for the vogel function, based on integer world position
uvec3 h3 = hash3(uvec3(params.world_offset + probe_cell));
@@ -195,14 +189,12 @@ void main() {
vec3 inv_dir = 1.0 / ray_dir;
bool hit = false;
- vec3 hit_normal;
- vec3 hit_light;
- vec3 hit_aniso0;
- vec3 hit_aniso1;
+ uint hit_cascade;
float bias = params.ray_bias;
vec3 abs_ray_dir = abs(ray_dir);
ray_pos += ray_dir * 1.0 / max(abs_ray_dir.x, max(abs_ray_dir.y, abs_ray_dir.z)) * bias / cascades.data[params.cascade].to_cell;
+ vec3 uvw;
for (uint j = params.cascade; j < params.max_cascades; j++) {
//convert to local bounds
@@ -221,14 +213,12 @@ void main() {
float advance = 0.0;
- vec3 uvw;
-
while (advance < max_advance) {
//read how much to advance from SDF
uvw = (pos + ray_dir * advance) * pos_to_uvw;
float distance = texture(sampler3D(sdf_cascades[j], linear_sampler), uvw).r * 255.0 - 1.0;
- if (distance < 0.001) {
+ if (distance < 0.05) {
//consider hit
hit = true;
break;
@@ -238,17 +228,7 @@ void main() {
}
if (hit) {
- const float EPSILON = 0.001;
- hit_normal = normalize(vec3(
- texture(sampler3D(sdf_cascades[j], linear_sampler), uvw + vec3(EPSILON, 0.0, 0.0)).r - texture(sampler3D(sdf_cascades[j], linear_sampler), uvw - vec3(EPSILON, 0.0, 0.0)).r,
- texture(sampler3D(sdf_cascades[j], linear_sampler), uvw + vec3(0.0, EPSILON, 0.0)).r - texture(sampler3D(sdf_cascades[j], linear_sampler), uvw - vec3(0.0, EPSILON, 0.0)).r,
- texture(sampler3D(sdf_cascades[j], linear_sampler), uvw + vec3(0.0, 0.0, EPSILON)).r - texture(sampler3D(sdf_cascades[j], linear_sampler), uvw - vec3(0.0, 0.0, EPSILON)).r));
-
- hit_light = texture(sampler3D(light_cascades[j], linear_sampler), uvw).rgb;
- vec4 aniso0 = texture(sampler3D(aniso0_cascades[j], linear_sampler), uvw);
- hit_aniso0 = aniso0.rgb;
- hit_aniso1 = vec3(aniso0.a, texture(sampler3D(aniso1_cascades[j], linear_sampler), uvw).rg);
-
+ hit_cascade = j;
break;
}
@@ -261,6 +241,17 @@ void main() {
vec4 light;
if (hit) {
+ const float EPSILON = 0.001;
+ vec3 hit_normal = normalize(vec3(
+ texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(EPSILON, 0.0, 0.0)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(EPSILON, 0.0, 0.0)).r,
+ texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(0.0, EPSILON, 0.0)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(0.0, EPSILON, 0.0)).r,
+ texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(0.0, 0.0, EPSILON)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(0.0, 0.0, EPSILON)).r));
+
+ vec3 hit_light = texture(sampler3D(light_cascades[hit_cascade], linear_sampler), uvw).rgb;
+ vec4 aniso0 = texture(sampler3D(aniso0_cascades[hit_cascade], linear_sampler), uvw);
+ vec3 hit_aniso0 = aniso0.rgb;
+ vec3 hit_aniso1 = vec3(aniso0.a, texture(sampler3D(aniso1_cascades[hit_cascade], linear_sampler), uvw).rg);
+
//one liner magic
light.rgb = hit_light * (dot(max(vec3(0.0), (hit_normal * hit_aniso0)), vec3(1.0)) + dot(max(vec3(0.0), (-hit_normal * hit_aniso1)), vec3(1.0)));
light.a = 1.0;
@@ -278,33 +269,33 @@ void main() {
}
vec3 ray_dir2 = ray_dir * ray_dir;
- float c[SH_SIZE] = float[](
-
- 0.282095, //l0
- 0.488603 * ray_dir.y, //l1n1
- 0.488603 * ray_dir.z, //l1n0
- 0.488603 * ray_dir.x, //l1p1
- 1.092548 * ray_dir.x * ray_dir.y, //l2n2
- 1.092548 * ray_dir.y * ray_dir.z, //l2n1
- 0.315392 * (3.0 * ray_dir2.z - 1.0), //l20
- 1.092548 * ray_dir.x * ray_dir.z, //l2p1
- 0.546274 * (ray_dir2.x - ray_dir2.y) //l2p2
+
+#define SH_ACCUM(m_idx, m_value) \
+ { \
+ vec3 l = light.rgb * (m_value); \
+ sh_accum[probe_index].c[m_idx * 3 + 0] += l.r; \
+ sh_accum[probe_index].c[m_idx * 3 + 1] += l.g; \
+ sh_accum[probe_index].c[m_idx * 3 + 2] += l.b; \
+ }
+ SH_ACCUM(0, 0.282095); //l0
+ SH_ACCUM(1, 0.488603 * ray_dir.y); //l1n1
+ SH_ACCUM(2, 0.488603 * ray_dir.z); //l1n0
+ SH_ACCUM(3, 0.488603 * ray_dir.x); //l1p1
+ SH_ACCUM(4, 1.092548 * ray_dir.x * ray_dir.y); //l2n2
+ SH_ACCUM(5, 1.092548 * ray_dir.y * ray_dir.z); //l2n1
+ SH_ACCUM(6, 0.315392 * (3.0 * ray_dir2.z - 1.0)); //l20
+ SH_ACCUM(7, 1.092548 * ray_dir.x * ray_dir.z); //l2p1
+ SH_ACCUM(8, 0.546274 * (ray_dir2.x - ray_dir2.y)); //l2p2
#if (SH_SIZE == 16)
- ,
- 0.590043 * ray_dir.y * (3.0f * ray_dir2.x - ray_dir2.y),
- 2.890611 * ray_dir.y * ray_dir.x * ray_dir.z,
- 0.646360 * ray_dir.y * (-1.0f + 5.0f * ray_dir2.z),
- 0.373176 * (5.0f * ray_dir2.z * ray_dir.z - 3.0f * ray_dir.z),
- 0.457045 * ray_dir.x * (-1.0f + 5.0f * ray_dir2.z),
- 1.445305 * (ray_dir2.x - ray_dir2.y) * ray_dir.z,
- 0.590043 * ray_dir.x * (ray_dir2.x - 3.0f * ray_dir2.y)
+ SH_ACCUM(9, 0.590043 * ray_dir.y * (3.0f * ray_dir2.x - ray_dir2.y));
+ SH_ACCUM(10, 2.890611 * ray_dir.y * ray_dir.x * ray_dir.z);
+ SH_ACCUM(11, 0.646360 * ray_dir.y * (-1.0f + 5.0f * ray_dir2.z));
+ SH_ACCUM(12, 0.373176 * (5.0f * ray_dir2.z * ray_dir.z - 3.0f * ray_dir.z));
+ SH_ACCUM(13, 0.457045 * ray_dir.x * (-1.0f + 5.0f * ray_dir2.z));
+ SH_ACCUM(14, 1.445305 * (ray_dir2.x - ray_dir2.y) * ray_dir.z);
+ SH_ACCUM(15, 0.590043 * ray_dir.x * (ray_dir2.x - 3.0f * ray_dir2.y));
#endif
- );
-
- for (uint j = 0; j < SH_SIZE; j++) {
- probe_sh_accum[j] += light * c[j];
- }
}
for (uint i = 0; i < SH_SIZE; i++) {
@@ -312,7 +303,7 @@ void main() {
ivec3 prev_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(params.history_index));
ivec2 average_pos = prev_pos.xy;
- vec4 value = probe_sh_accum[i] * 4.0 / float(params.ray_count);
+ vec4 value = vec4(sh_accum[probe_index].c[i * 3 + 0], sh_accum[probe_index].c[i * 3 + 1], sh_accum[probe_index].c[i * 3 + 2], 1.0) * 4.0 / float(params.ray_count);
ivec4 ivalue = clamp(ivec4(value * float(1 << HISTORY_BITS)), -32768, 32767); //clamp to 16 bits, so higher values don't break average
@@ -344,37 +335,11 @@ void main() {
ivec2 oct_pos = (pos / OCT_SIZE) * (OCT_SIZE + 2) + ivec2(1);
ivec2 local_pos = pos % OCT_SIZE;
- //fill the spherical harmonic
- vec4 sh[SH_SIZE];
-
- for (uint i = 0; i < SH_SIZE; i++) {
- // store in history texture
- ivec2 average_pos = sh_pos + ivec2(0, i);
- ivec4 average = imageLoad(lightprobe_average_texture, average_pos);
-
- sh[i] = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS);
- }
-
//compute the octahedral normal for this texel
vec3 normal = octahedron_encode(vec2(local_pos) / float(OCT_SIZE));
- /*
+
// read the spherical harmonic
- const float c1 = 0.429043;
- const float c2 = 0.511664;
- const float c3 = 0.743125;
- const float c4 = 0.886227;
- const float c5 = 0.247708;
- vec4 light = (c1 * sh[8] * (normal.x * normal.x - normal.y * normal.y) +
- c3 * sh[6] * normal.z * normal.z +
- c4 * sh[0] -
- c5 * sh[6] +
- 2.0 * c1 * sh[4] * normal.x * normal.y +
- 2.0 * c1 * sh[7] * normal.x * normal.z +
- 2.0 * c1 * sh[5] * normal.y * normal.z +
- 2.0 * c2 * sh[3] * normal.x +
- 2.0 * c2 * sh[1] * normal.y +
- 2.0 * c2 * sh[2] * normal.z);
-*/
+
vec3 normal2 = normal * normal;
float c[SH_SIZE] = float[](
@@ -426,7 +391,14 @@ void main() {
vec3 radiance = vec3(0.0);
for (uint i = 0; i < SH_SIZE; i++) {
- vec3 m = sh[i].rgb * c[i] * 4.0;
+ // store in history texture
+ ivec2 average_pos = sh_pos + ivec2(0, i);
+ ivec4 average = imageLoad(lightprobe_average_texture, average_pos);
+
+ vec4 sh = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS);
+
+ vec3 m = sh.rgb * c[i] * 4.0;
+
irradiance += m * l_mult[i];
radiance += m;
}
@@ -515,13 +487,15 @@ void main() {
//can't scroll, must look for position in parent cascade
//to global coords
- float probe_cell_size = float(params.grid_size.x / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell;
+ float cell_to_probe = float(params.grid_size.x / float(params.probe_axis_size - 1));
+
+ float probe_cell_size = cell_to_probe / cascades.data[params.cascade].to_cell;
vec3 probe_pos = cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size;
//to parent local coords
+ float probe_cell_size_next = cell_to_probe / cascades.data[params.cascade + 1].to_cell;
probe_pos -= cascades.data[params.cascade + 1].offset;
- probe_pos *= cascades.data[params.cascade + 1].to_cell;
- probe_pos = probe_pos * float(params.probe_axis_size - 1) / float(params.grid_size.x);
+ probe_pos /= probe_cell_size_next;
ivec3 probe_posi = ivec3(probe_pos);
//add up all light, no need to use occlusion here, since occlusion will do its work afterwards
@@ -574,20 +548,28 @@ void main() {
}
} else {
- // clear and let it re-raytrace, only for the last cascade, which happens very un-often
- //scroll
+ //scroll at the edge of the highest cascade, just copy what is there,
+ //since its the closest we have anyway
+
for (uint j = 0; j < params.history_size; j++) {
+ ivec2 tex_pos;
+ tex_pos = probe_cell.xy;
+ tex_pos.x += probe_cell.z * int(params.probe_axis_size);
+
for (int i = 0; i < SH_SIZE; i++) {
// copy from history texture
+ ivec3 src_pos = ivec3(tex_pos.x, tex_pos.y * SH_SIZE + i, int(j));
ivec3 dst_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(j));
- imageStore(lightprobe_history_scroll_texture, dst_pos, ivec4(0));
+ ivec4 value = imageLoad(lightprobe_history_texture, dst_pos);
+ imageStore(lightprobe_history_scroll_texture, dst_pos, value);
}
}
for (int i = 0; i < SH_SIZE; i++) {
// copy from average texture
- ivec2 dst_pos = ivec2(pos.x, pos.y * SH_SIZE + i);
- imageStore(lightprobe_average_scroll_texture, dst_pos, ivec4(0));
+ ivec2 spos = ivec2(pos.x, pos.y * SH_SIZE + i);
+ ivec4 average = imageLoad(lightprobe_average_texture, spos);
+ imageStore(lightprobe_average_scroll_texture, spos, average);
}
}
diff --git a/servers/rendering/renderer_rd/shaders/skeleton.glsl b/servers/rendering/renderer_rd/shaders/skeleton.glsl
new file mode 100644
index 0000000000..b19f5a9ad3
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/skeleton.glsl
@@ -0,0 +1,199 @@
+#[compute]
+
+#version 450
+
+VERSION_DEFINES
+
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 1, std430) buffer restrict writeonly DstVertexData {
+ uint data[];
+}
+dst_vertices;
+
+layout(set = 0, binding = 2, std430) buffer restrict readonly BlendShapeWeights {
+ float data[];
+}
+blend_shape_weights;
+
+layout(set = 1, binding = 0, std430) buffer restrict readonly SrcVertexData {
+ uint data[];
+}
+src_vertices;
+
+layout(set = 1, binding = 1, std430) buffer restrict readonly BoneWeightData {
+ uint data[];
+}
+src_bone_weights;
+
+layout(set = 1, binding = 2, std430) buffer restrict readonly BlendShapeData {
+ uint data[];
+}
+src_blend_shapes;
+
+layout(set = 2, binding = 0, std430) buffer restrict readonly SkeletonData {
+ vec4 data[];
+}
+bone_transforms;
+
+layout(push_constant, binding = 0, std430) uniform Params {
+ bool has_normal;
+ bool has_tangent;
+ bool has_skeleton;
+ bool has_blend_shape;
+
+ uint vertex_count;
+ uint vertex_stride;
+ uint skin_stride;
+ uint skin_weight_offset;
+
+ uint blend_shape_count;
+ bool normalized_blend_shapes;
+ uint pad0;
+ uint pad1;
+}
+params;
+
+vec4 decode_abgr_2_10_10_10(uint base) {
+ uvec4 abgr_2_10_10_10 = (uvec4(base) >> uvec4(0, 10, 20, 30)) & uvec4(0x3FF, 0x3FF, 0x3FF, 0x3);
+ return vec4(abgr_2_10_10_10) / vec4(1023.0, 1023.0, 1023.0, 3.0) * 2.0 - 1.0;
+}
+
+uint encode_abgr_2_10_10_10(vec4 base) {
+ uvec4 abgr_2_10_10_10 = uvec4(clamp(ivec4((base * 0.5 + 0.5) * vec4(1023.0, 1023.0, 1023.0, 3.0)), ivec4(0), ivec4(0x3FF, 0x3FF, 0x3FF, 0x3))) << uvec4(0, 10, 20, 30);
+ return abgr_2_10_10_10.x | abgr_2_10_10_10.y | abgr_2_10_10_10.z | abgr_2_10_10_10.w;
+}
+
+void main() {
+ uint index = gl_GlobalInvocationID.x;
+ if (index >= params.vertex_count) {
+ return;
+ }
+
+ uint src_offset = index * params.vertex_stride;
+
+#ifdef MODE_2D
+ vec2 vertex = uintBitsToFloat(uvec2(src_vertices.data[src_offset + 0], src_vertices.data[src_offset + 1]));
+#else
+ vec3 vertex;
+ vec3 normal;
+ vec4 tangent;
+
+ vertex = uintBitsToFloat(uvec3(src_vertices.data[src_offset + 0], src_vertices.data[src_offset + 1], src_vertices.data[src_offset + 2]));
+
+ src_offset += 3;
+
+ if (params.has_normal) {
+ normal = decode_abgr_2_10_10_10(src_vertices.data[src_offset]).rgb;
+ src_offset++;
+ }
+
+ if (params.has_tangent) {
+ tangent = decode_abgr_2_10_10_10(src_vertices.data[src_offset]);
+ }
+
+ if (params.has_blend_shape) {
+ float blend_total = 0.0;
+ vec3 blend_vertex = vec3(0.0);
+ vec3 blend_normal = vec3(0.0);
+ vec3 blend_tangent = vec3(0.0);
+
+ for (uint i = 0; i < params.blend_shape_count; i++) {
+ float w = blend_shape_weights.data[i];
+ if (w > 0.0001) {
+ uint base_offset = (params.vertex_count * i + index) * params.vertex_stride;
+
+ blend_vertex += uintBitsToFloat(uvec3(src_blend_shapes.data[base_offset + 0], src_blend_shapes.data[base_offset + 1], src_blend_shapes.data[base_offset + 2])) * w;
+
+ base_offset += 3;
+
+ if (params.has_normal) {
+ blend_normal += decode_abgr_2_10_10_10(src_blend_shapes.data[base_offset]).rgb * w;
+ base_offset++;
+ }
+
+ if (params.has_tangent) {
+ blend_tangent += decode_abgr_2_10_10_10(src_blend_shapes.data[base_offset]).rgb;
+ }
+
+ blend_total += w;
+ }
+ }
+
+ if (params.normalized_blend_shapes) {
+ vertex = (1.0 - blend_total) * vertex;
+ normal = (1.0 - blend_total) * normal;
+ tangent.rgb = (1.0 - blend_total) * tangent.rgb;
+ }
+
+ vertex += blend_vertex;
+ normal += normalize(normal + blend_normal);
+ tangent.rgb += normalize(tangent.rgb + blend_tangent);
+ }
+
+ if (params.has_skeleton) {
+ uint skin_offset = params.skin_stride * index;
+
+ uvec2 bones = uvec2(src_bone_weights.data[skin_offset + 0], src_bone_weights.data[skin_offset + 1]);
+ uvec2 bones_01 = uvec2(bones.x & 0xFFFF, bones.x >> 16) * 3; //pre-add xform offset
+ uvec2 bones_23 = uvec2(bones.y & 0xFFFF, bones.y >> 16) * 3;
+
+ skin_offset += params.skin_weight_offset;
+
+ uvec2 weights = uvec2(src_bone_weights.data[skin_offset + 0], src_bone_weights.data[skin_offset + 1]);
+
+ vec2 weights_01 = unpackUnorm2x16(weights.x);
+ vec2 weights_23 = unpackUnorm2x16(weights.y);
+
+ mat4 m = mat4(bone_transforms.data[bones_01.x], bone_transforms.data[bones_01.x + 1], bone_transforms.data[bones_01.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.x;
+ m += mat4(bone_transforms.data[bones_01.y], bone_transforms.data[bones_01.y + 1], bone_transforms.data[bones_01.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.y;
+ m += mat4(bone_transforms.data[bones_23.x], bone_transforms.data[bones_23.x + 1], bone_transforms.data[bones_23.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.x;
+ m += mat4(bone_transforms.data[bones_23.y], bone_transforms.data[bones_23.y + 1], bone_transforms.data[bones_23.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.y;
+
+ if (params.skin_weight_offset == 4) {
+ //using 8 bones/weights
+ skin_offset = params.skin_stride * index + 2;
+
+ bones = uvec2(src_bone_weights.data[skin_offset + 0], src_bone_weights.data[skin_offset + 1]);
+ bones_01 = uvec2(bones.x & 0xFFFF, bones.x >> 16) * 3; //pre-add xform offset
+ bones_23 = uvec2(bones.y & 0xFFFF, bones.y >> 16) * 3;
+
+ skin_offset += params.skin_weight_offset;
+
+ weights = uvec2(src_bone_weights.data[skin_offset + 0], src_bone_weights.data[skin_offset + 1]);
+
+ weights_01 = unpackUnorm2x16(weights.x);
+ weights_23 = unpackUnorm2x16(weights.y);
+
+ m += mat4(bone_transforms.data[bones_01.x], bone_transforms.data[bones_01.x + 1], bone_transforms.data[bones_01.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.x;
+ m += mat4(bone_transforms.data[bones_01.y], bone_transforms.data[bones_01.y + 1], bone_transforms.data[bones_01.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.y;
+ m += mat4(bone_transforms.data[bones_23.x], bone_transforms.data[bones_23.x + 1], bone_transforms.data[bones_23.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.x;
+ m += mat4(bone_transforms.data[bones_23.y], bone_transforms.data[bones_23.y + 1], bone_transforms.data[bones_23.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.y;
+ }
+
+ //reverse order because its transposed
+ vertex = (vec4(vertex, 1.0) * m).xyz;
+ normal = normalize((vec4(normal, 0.0) * m).xyz);
+ tangent.xyz = normalize((vec4(tangent.xyz, 0.0) * m).xyz);
+ }
+
+ uint dst_offset = index * params.vertex_stride;
+
+ uvec3 uvertex = floatBitsToUint(vertex);
+ dst_vertices.data[dst_offset + 0] = uvertex.x;
+ dst_vertices.data[dst_offset + 1] = uvertex.y;
+ dst_vertices.data[dst_offset + 2] = uvertex.z;
+
+ dst_offset += 3;
+
+ if (params.has_normal) {
+ dst_vertices.data[dst_offset] = encode_abgr_2_10_10_10(vec4(normal, 0.0));
+ dst_offset++;
+ }
+
+ if (params.has_tangent) {
+ dst_vertices.data[dst_offset] = encode_abgr_2_10_10_10(tangent);
+ }
+
+#endif
+}
diff --git a/servers/rendering/renderer_rd/shaders/ssao.glsl b/servers/rendering/renderer_rd/shaders/ssao.glsl
index 346338181a..231f8f91ec 100644
--- a/servers/rendering/renderer_rd/shaders/ssao.glsl
+++ b/servers/rendering/renderer_rd/shaders/ssao.glsl
@@ -1,249 +1,486 @@
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2016, Intel Corporation
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of
+// the Software.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// File changes (yyyy-mm-dd)
+// 2016-09-07: filip.strugar@intel.com: first commit
+// 2020-12-05: clayjohn: convert to Vulkan and Godot
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
#[compute]
#version 450
VERSION_DEFINES
-layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+#define SSAO_ADAPTIVE_TAP_BASE_COUNT 5
+
+#define INTELSSAO_MAIN_DISK_SAMPLE_COUNT (32)
+const vec4 sample_pattern[INTELSSAO_MAIN_DISK_SAMPLE_COUNT] = {
+ vec4(0.78488064, 0.56661671, 1.500000, -0.126083), vec4(0.26022232, -0.29575172, 1.500000, -1.064030), vec4(0.10459357, 0.08372527, 1.110000, -2.730563), vec4(-0.68286800, 0.04963045, 1.090000, -0.498827),
+ vec4(-0.13570161, -0.64190155, 1.250000, -0.532765), vec4(-0.26193795, -0.08205118, 0.670000, -1.783245), vec4(-0.61177456, 0.66664219, 0.710000, -0.044234), vec4(0.43675563, 0.25119025, 0.610000, -1.167283),
+ vec4(0.07884444, 0.86618668, 0.640000, -0.459002), vec4(-0.12790935, -0.29869005, 0.600000, -1.729424), vec4(-0.04031125, 0.02413622, 0.600000, -4.792042), vec4(0.16201244, -0.52851415, 0.790000, -1.067055),
+ vec4(-0.70991218, 0.47301072, 0.640000, -0.335236), vec4(0.03277707, -0.22349690, 0.600000, -1.982384), vec4(0.68921727, 0.36800742, 0.630000, -0.266718), vec4(0.29251814, 0.37775412, 0.610000, -1.422520),
+ vec4(-0.12224089, 0.96582592, 0.600000, -0.426142), vec4(0.11071457, -0.16131058, 0.600000, -2.165947), vec4(0.46562141, -0.59747696, 0.600000, -0.189760), vec4(-0.51548797, 0.11804193, 0.600000, -1.246800),
+ vec4(0.89141309, -0.42090443, 0.600000, 0.028192), vec4(-0.32402530, -0.01591529, 0.600000, -1.543018), vec4(0.60771245, 0.41635221, 0.600000, -0.605411), vec4(0.02379565, -0.08239821, 0.600000, -3.809046),
+ vec4(0.48951152, -0.23657045, 0.600000, -1.189011), vec4(-0.17611565, -0.81696892, 0.600000, -0.513724), vec4(-0.33930185, -0.20732205, 0.600000, -1.698047), vec4(-0.91974425, 0.05403209, 0.600000, 0.062246),
+ vec4(-0.15064627, -0.14949332, 0.600000, -1.896062), vec4(0.53180975, -0.35210401, 0.600000, -0.758838), vec4(0.41487166, 0.81442589, 0.600000, -0.505648), vec4(-0.24106961, -0.32721516, 0.600000, -1.665244)
+};
+
+// these values can be changed (up to SSAO_MAX_TAPS) with no changes required elsewhere; values for 4th and 5th preset are ignored but array needed to avoid compilation errors
+// the actual number of texture samples is two times this value (each "tap" has two symmetrical depth texture samples)
+const int num_taps[5] = { 3, 5, 12, 0, 0 };
+
+#define SSAO_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET (99) // to disable simply set to 99 or similar
+#define SSAO_TILT_SAMPLES_AMOUNT (0.4)
+//
+#define SSAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET (1) // to disable simply set to 99 or similar
+#define SSAO_HALOING_REDUCTION_AMOUNT (0.6) // values from 0.0 - 1.0, 1.0 means max weighting (will cause artifacts, 0.8 is more reasonable)
+//
+#define SSAO_NORMAL_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (2) // to disable simply set to 99 or similar
+#define SSAO_NORMAL_BASED_EDGES_DOT_THRESHOLD (0.5) // use 0-0.1 for super-sharp normal-based edges
+//
+#define SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET (1) // whether to use detail; to disable simply set to 99 or similar
+//
+#define SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (2) // !!warning!! the MIP generation on the C++ side will be enabled on quality preset 2 regardless of this value, so if changing here, change the C++ side too
+#define SSAO_DEPTH_MIPS_GLOBAL_OFFSET (-4.3) // best noise/quality/performance tradeoff, found empirically
+//
+// !!warning!! the edge handling is hard-coded to 'disabled' on quality level 0, and enabled above, on the C++ side; while toggling it here will work for
+// testing purposes, it will not yield performance gains (or correct results)
+#define SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (1)
+//
+#define SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET (1)
+
+#define SSAO_MAX_TAPS 32
+#define SSAO_MAX_REF_TAPS 512
+#define SSAO_ADAPTIVE_TAP_BASE_COUNT 5
+#define SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT (SSAO_MAX_TAPS - SSAO_ADAPTIVE_TAP_BASE_COUNT)
+#define SSAO_DEPTH_MIP_LEVELS 4
-#define TWO_PI 6.283185307179586476925286766559
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
-#ifdef SSAO_QUALITY_HIGH
-#define NUM_SAMPLES (20)
-#endif
+layout(set = 0, binding = 0) uniform sampler2DArray source_depth_mipmaps;
+layout(rgba8, set = 0, binding = 1) uniform restrict readonly image2D source_normal;
+layout(set = 0, binding = 2) uniform Constants { //get into a lower set
+ vec4 rotation_matrices[20];
+}
+constants;
-#ifdef SSAO_QUALITY_ULTRA
-#define NUM_SAMPLES (48)
+#ifdef ADAPTIVE
+layout(rg8, set = 1, binding = 0) uniform restrict readonly image2DArray source_ssao;
+layout(set = 1, binding = 1) uniform sampler2D source_importance;
+layout(set = 1, binding = 2, std430) buffer Counter {
+ uint sum;
+}
+counter;
#endif
-#ifdef SSAO_QUALITY_LOW
-#define NUM_SAMPLES (8)
-#endif
+layout(rg8, set = 2, binding = 0) uniform restrict writeonly image2D dest_image;
-#if !defined(SSAO_QUALITY_LOW) && !defined(SSAO_QUALITY_HIGH) && !defined(SSAO_QUALITY_ULTRA)
-#define NUM_SAMPLES (12)
-#endif
+// This push_constant is full - 128 bytes - if you need to add more data, consider adding to the uniform buffer instead
+layout(push_constant, binding = 3, std430) uniform Params {
+ ivec2 screen_size;
+ int pass;
+ int quality;
-// If using depth mip levels, the log of the maximum pixel offset before we need to switch to a lower
-// miplevel to maintain reasonable spatial locality in the cache
-// If this number is too small (< 3), too many taps will land in the same pixel, and we'll get bad variance that manifests as flashing.
-// If it is too high (> 5), we'll get bad performance because we're not using the MIP levels effectively
-#define LOG_MAX_OFFSET (3)
-
-// This must be less than or equal to the MAX_MIP_LEVEL defined in SSAO.cpp
-#define MAX_MIP_LEVEL (4)
-
-// This is the number of turns around the circle that the spiral pattern makes. This should be prime to prevent
-// taps from lining up. This particular choice was tuned for NUM_SAMPLES == 9
-
-const int ROTATIONS[] = int[](
- 1, 1, 2, 3, 2, 5, 2, 3, 2,
- 3, 3, 5, 5, 3, 4, 7, 5, 5, 7,
- 9, 8, 5, 5, 7, 7, 7, 8, 5, 8,
- 11, 12, 7, 10, 13, 8, 11, 8, 7, 14,
- 11, 11, 13, 12, 13, 19, 17, 13, 11, 18,
- 19, 11, 11, 14, 17, 21, 15, 16, 17, 18,
- 13, 17, 11, 17, 19, 18, 25, 18, 19, 19,
- 29, 21, 19, 27, 31, 29, 21, 18, 17, 29,
- 31, 31, 23, 18, 25, 26, 25, 23, 19, 34,
- 19, 27, 21, 25, 39, 29, 17, 21, 27);
-
-//#define NUM_SPIRAL_TURNS (7)
-const int NUM_SPIRAL_TURNS = ROTATIONS[NUM_SAMPLES - 1];
-
-layout(set = 0, binding = 0) uniform sampler2D source_depth_mipmaps;
-layout(r8, set = 1, binding = 0) uniform restrict writeonly image2D dest_image;
-
-#ifndef USE_HALF_SIZE
-layout(set = 2, binding = 0) uniform sampler2D source_depth;
-#endif
+ vec2 half_screen_pixel_size;
+ int size_multiplier;
+ float detail_intensity;
-layout(set = 3, binding = 0) uniform sampler2D source_normal;
+ vec2 NDC_to_view_mul;
+ vec2 NDC_to_view_add;
-layout(push_constant, binding = 1, std430) uniform Params {
- ivec2 screen_size;
- float z_far;
- float z_near;
+ vec2 pad2;
+ vec2 half_screen_pixel_size_x025;
- bool orthogonal;
- float intensity_div_r6;
float radius;
- float bias;
-
- vec4 proj_info;
- vec2 pixel_size;
- float proj_scale;
- uint pad;
+ float intensity;
+ float shadow_power;
+ float shadow_clamp;
+
+ float fade_out_mul;
+ float fade_out_add;
+ float horizon_angle_threshold;
+ float inv_radius_near_limit;
+
+ bool is_orthogonal;
+ float neg_inv_radius;
+ float load_counter_avg_div;
+ float adaptive_sample_limit;
+
+ ivec2 pass_coord_offset;
+ vec2 pass_uv_offset;
}
params;
-vec3 reconstructCSPosition(vec2 S, float z) {
- if (params.orthogonal) {
- return vec3((S.xy * params.proj_info.xy + params.proj_info.zw), z);
+// packing/unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions!
+float pack_edges(vec4 p_edgesLRTB) {
+ p_edgesLRTB = round(clamp(p_edgesLRTB, 0.0, 1.0) * 3.05);
+ return dot(p_edgesLRTB, vec4(64.0 / 255.0, 16.0 / 255.0, 4.0 / 255.0, 1.0 / 255.0));
+}
+
+vec3 NDC_to_view_space(vec2 p_pos, float p_viewspace_depth) {
+ if (params.is_orthogonal) {
+ return vec3((params.NDC_to_view_mul * p_pos.xy + params.NDC_to_view_add), p_viewspace_depth);
} else {
- return vec3((S.xy * params.proj_info.xy + params.proj_info.zw) * z, z);
+ return vec3((params.NDC_to_view_mul * p_pos.xy + params.NDC_to_view_add) * p_viewspace_depth, p_viewspace_depth);
}
}
-vec3 getPosition(ivec2 ssP) {
- vec3 P;
-#ifdef USE_HALF_SIZE
- P.z = texelFetch(source_depth_mipmaps, ssP, 0).r;
- P.z = -P.z;
-#else
- P.z = texelFetch(source_depth, ssP, 0).r;
+// calculate effect radius and fit our screen sampling pattern inside it
+void calculate_radius_parameters(const float p_pix_center_length, const vec2 p_pixel_size_at_center, out float r_lookup_radius, out float r_radius, out float r_fallof_sq) {
+ r_radius = params.radius;
- P.z = P.z * 2.0 - 1.0;
- if (params.orthogonal) {
- P.z = ((P.z + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0;
- } else {
- P.z = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - P.z * (params.z_far - params.z_near));
- }
- P.z = -P.z;
-#endif
- // Offset to pixel center
- P = reconstructCSPosition(vec2(ssP) + vec2(0.5), P.z);
- return P;
+ // when too close, on-screen sampling disk will grow beyond screen size; limit this to avoid closeup temporal artifacts
+ const float too_close_limit = clamp(p_pix_center_length * params.inv_radius_near_limit, 0.0, 1.0) * 0.8 + 0.2;
+
+ r_radius *= too_close_limit;
+
+ // 0.85 is to reduce the radius to allow for more samples on a slope to still stay within influence
+ r_lookup_radius = (0.85 * r_radius) / p_pixel_size_at_center.x;
+
+ // used to calculate falloff (both for AO samples and per-sample weights)
+ r_fallof_sq = -1.0 / (r_radius * r_radius);
+}
+
+vec4 calculate_edges(const float p_center_z, const float p_left_z, const float p_right_z, const float p_top_z, const float p_bottom_z) {
+ // slope-sensitive depth-based edge detection
+ vec4 edgesLRTB = vec4(p_left_z, p_right_z, p_top_z, p_bottom_z) - p_center_z;
+ vec4 edgesLRTB_slope_adjusted = edgesLRTB + edgesLRTB.yxwz;
+ edgesLRTB = min(abs(edgesLRTB), abs(edgesLRTB_slope_adjusted));
+ return clamp((1.3 - edgesLRTB / (p_center_z * 0.040)), 0.0, 1.0);
+}
+
+vec3 decode_normal(vec3 p_encoded_normal) {
+ vec3 normal = p_encoded_normal * 2.0 - 1.0;
+ return normal;
}
-/** Returns a unit vector and a screen-space radius for the tap on a unit disk (the caller should scale by the actual disk radius) */
-vec2 tapLocation(int sampleNumber, float spinAngle, out float ssR) {
- // Radius relative to ssR
- float alpha = (float(sampleNumber) + 0.5) * (1.0 / float(NUM_SAMPLES));
- float angle = alpha * (float(NUM_SPIRAL_TURNS) * 6.28) + spinAngle;
+vec3 load_normal(ivec2 p_pos) {
+ vec3 encoded_normal = imageLoad(source_normal, p_pos).xyz;
+ encoded_normal.z = 1.0 - encoded_normal.z;
+ return decode_normal(encoded_normal);
+}
- ssR = alpha;
- return vec2(cos(angle), sin(angle));
+vec3 load_normal(ivec2 p_pos, ivec2 p_offset) {
+ vec3 encoded_normal = imageLoad(source_normal, p_pos + p_offset).xyz;
+ encoded_normal.z = 1.0 - encoded_normal.z;
+ return decode_normal(encoded_normal);
}
-/** Read the camera-space position of the point at screen-space pixel ssP + unitOffset * ssR. Assumes length(unitOffset) == 1 */
-vec3 getOffsetPosition(ivec2 ssP, float ssR) {
- // Derivation:
- // mipLevel = floor(log(ssR / MAX_OFFSET));
+// all vectors in viewspace
+float calculate_pixel_obscurance(vec3 p_pixel_normal, vec3 p_hit_delta, float p_fallof_sq) {
+ float length_sq = dot(p_hit_delta, p_hit_delta);
+ float NdotD = dot(p_pixel_normal, p_hit_delta) / sqrt(length_sq);
- int mipLevel = clamp(int(floor(log2(ssR))) - LOG_MAX_OFFSET, 0, MAX_MIP_LEVEL);
+ float falloff_mult = max(0.0, length_sq * p_fallof_sq + 1.0);
- vec3 P;
+ return max(0, NdotD - params.horizon_angle_threshold) * falloff_mult;
+}
- // We need to divide by 2^mipLevel to read the appropriately scaled coordinate from a MIP-map.
- // Manually clamp to the texture size because texelFetch bypasses the texture unit
- ivec2 mipP = clamp(ssP >> mipLevel, ivec2(0), (params.screen_size >> mipLevel) - ivec2(1));
+void SSAO_tap_inner(const int p_quality_level, inout float r_obscurance_sum, inout float r_weight_sum, const vec2 p_sampling_uv, const float p_mip_level, const vec3 p_pix_center_pos, vec3 p_pixel_normal, const float p_fallof_sq, const float p_weight_mod) {
+ // get depth at sample
+ float viewspace_sample_z = textureLod(source_depth_mipmaps, vec3(p_sampling_uv, params.pass), p_mip_level).x;
-#ifdef USE_HALF_SIZE
- P.z = texelFetch(source_depth_mipmaps, mipP, mipLevel).r;
- P.z = -P.z;
-#else
- if (mipLevel < 1) {
- //read from depth buffer
- P.z = texelFetch(source_depth, mipP, 0).r;
- P.z = P.z * 2.0 - 1.0;
- if (params.orthogonal) {
- P.z = ((P.z + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0;
- } else {
- P.z = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - P.z * (params.z_far - params.z_near));
- }
- P.z = -P.z;
+ // convert to viewspace
+ vec3 hit_pos = NDC_to_view_space(p_sampling_uv.xy, viewspace_sample_z).xyz;
+ vec3 hit_delta = hit_pos - p_pix_center_pos;
- } else {
- //read from mipmaps
- P.z = texelFetch(source_depth_mipmaps, mipP, mipLevel - 1).r;
- P.z = -P.z;
+ float obscurance = calculate_pixel_obscurance(p_pixel_normal, hit_delta, p_fallof_sq);
+ float weight = 1.0;
+
+ if (p_quality_level >= SSAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET) {
+ float reduct = max(0, -hit_delta.z);
+ reduct = clamp(reduct * params.neg_inv_radius + 2.0, 0.0, 1.0);
+ weight = SSAO_HALOING_REDUCTION_AMOUNT * reduct + (1.0 - SSAO_HALOING_REDUCTION_AMOUNT);
+ }
+ weight *= p_weight_mod;
+ r_obscurance_sum += obscurance * weight;
+ r_weight_sum += weight;
+}
+
+void SSAOTap(const int p_quality_level, inout float r_obscurance_sum, inout float r_weight_sum, const int p_tap_index, const mat2 p_rot_scale, const vec3 p_pix_center_pos, vec3 p_pixel_normal, const vec2 p_normalized_screen_pos, const float p_mip_offset, const float p_fallof_sq, float p_weight_mod, vec2 p_norm_xy, float p_norm_xy_length) {
+ vec2 sample_offset;
+ float sample_pow_2_len;
+
+ // patterns
+ {
+ vec4 new_sample = sample_pattern[p_tap_index];
+ sample_offset = new_sample.xy * p_rot_scale;
+ sample_pow_2_len = new_sample.w; // precalculated, same as: sample_pow_2_len = log2( length( new_sample.xy ) );
+ p_weight_mod *= new_sample.z;
}
-#endif
- // Offset to pixel center
- P = reconstructCSPosition(vec2(ssP) + vec2(0.5), P.z);
+ // snap to pixel center (more correct obscurance math, avoids artifacts)
+ sample_offset = round(sample_offset);
- return P;
+ // calculate MIP based on the sample distance from the centre, similar to as described
+ // in http://graphics.cs.williams.edu/papers/SAOHPG12/.
+ float mip_level = (p_quality_level < SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (sample_pow_2_len + p_mip_offset);
+
+ vec2 sampling_uv = sample_offset * params.half_screen_pixel_size + p_normalized_screen_pos;
+
+ SSAO_tap_inner(p_quality_level, r_obscurance_sum, r_weight_sum, sampling_uv, mip_level, p_pix_center_pos, p_pixel_normal, p_fallof_sq, p_weight_mod);
+
+ // for the second tap, just use the mirrored offset
+ vec2 sample_offset_mirrored_uv = -sample_offset;
+
+ // tilt the second set of samples so that the disk is effectively rotated by the normal
+ // effective at removing one set of artifacts, but too expensive for lower quality settings
+ if (p_quality_level >= SSAO_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET) {
+ float dot_norm = dot(sample_offset_mirrored_uv, p_norm_xy);
+ sample_offset_mirrored_uv -= dot_norm * p_norm_xy_length * p_norm_xy;
+ sample_offset_mirrored_uv = round(sample_offset_mirrored_uv);
+ }
+
+ // snap to pixel center (more correct obscurance math, avoids artifacts)
+ vec2 sampling_mirrored_uv = sample_offset_mirrored_uv * params.half_screen_pixel_size + p_normalized_screen_pos;
+
+ SSAO_tap_inner(p_quality_level, r_obscurance_sum, r_weight_sum, sampling_mirrored_uv, mip_level, p_pix_center_pos, p_pixel_normal, p_fallof_sq, p_weight_mod);
}
-/** Compute the occlusion due to sample with index \a i about the pixel at \a ssC that corresponds
- to camera-space point \a C with unit normal \a n_C, using maximum screen-space sampling radius \a ssDiskRadius
+void generate_SSAO_shadows_internal(out float r_shadow_term, out vec4 r_edges, out float r_weight, const vec2 p_pos, int p_quality_level, bool p_adaptive_base) {
+ vec2 pos_rounded = trunc(p_pos);
+ uvec2 upos = uvec2(pos_rounded);
+
+ const int number_of_taps = (p_adaptive_base) ? (SSAO_ADAPTIVE_TAP_BASE_COUNT) : (num_taps[p_quality_level]);
+ float pix_z, pix_left_z, pix_top_z, pix_right_z, pix_bottom_z;
+
+ vec4 valuesUL = textureGather(source_depth_mipmaps, vec3(pos_rounded * params.half_screen_pixel_size, params.pass));
+ vec4 valuesBR = textureGather(source_depth_mipmaps, vec3((pos_rounded + vec2(1.0)) * params.half_screen_pixel_size, params.pass));
+
+ // get this pixel's viewspace depth
+ pix_z = valuesUL.y;
+
+ // get left right top bottom neighbouring pixels for edge detection (gets compiled out on quality_level == 0)
+ pix_left_z = valuesUL.x;
+ pix_top_z = valuesUL.z;
+ pix_right_z = valuesBR.z;
+ pix_bottom_z = valuesBR.x;
+
+ vec2 normalized_screen_pos = pos_rounded * params.half_screen_pixel_size + params.half_screen_pixel_size_x025;
+ vec3 pix_center_pos = NDC_to_view_space(normalized_screen_pos, pix_z);
- Note that units of H() in the HPG12 paper are meters, not
- unitless. The whole falloff/sampling function is therefore
- unitless. In this implementation, we factor out (9 / radius).
+ // Load this pixel's viewspace normal
+ uvec2 full_res_coord = upos * 2 * params.size_multiplier + params.pass_coord_offset.xy;
+ vec3 pixel_normal = load_normal(ivec2(full_res_coord));
- Four versions of the falloff function are implemented below
-*/
-float sampleAO(in ivec2 ssC, in vec3 C, in vec3 n_C, in float ssDiskRadius, in float p_radius, in int tapIndex, in float randomPatternRotationAngle) {
- // Offset on the unit disk, spun for this pixel
- float ssR;
- vec2 unitOffset = tapLocation(tapIndex, randomPatternRotationAngle, ssR);
- ssR *= ssDiskRadius;
+ const vec2 pixel_size_at_center = NDC_to_view_space(normalized_screen_pos.xy + params.half_screen_pixel_size, pix_center_pos.z).xy - pix_center_pos.xy;
- ivec2 ssP = ivec2(ssR * unitOffset) + ssC;
+ float pixel_lookup_radius;
+ float fallof_sq;
- if (any(lessThan(ssP, ivec2(0))) || any(greaterThanEqual(ssP, params.screen_size))) {
- return 0.0;
+ // calculate effect radius and fit our screen sampling pattern inside it
+ float viewspace_radius;
+ calculate_radius_parameters(length(pix_center_pos), pixel_size_at_center, pixel_lookup_radius, viewspace_radius, fallof_sq);
+
+ // calculate samples rotation/scaling
+ mat2 rot_scale_matrix;
+ uint pseudo_random_index;
+
+ {
+ vec4 rotation_scale;
+ // reduce effect radius near the screen edges slightly; ideally, one would render a larger depth buffer (5% on each side) instead
+ if (!p_adaptive_base && (p_quality_level >= SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET)) {
+ float near_screen_border = min(min(normalized_screen_pos.x, 1.0 - normalized_screen_pos.x), min(normalized_screen_pos.y, 1.0 - normalized_screen_pos.y));
+ near_screen_border = clamp(10.0 * near_screen_border + 0.6, 0.0, 1.0);
+ pixel_lookup_radius *= near_screen_border;
+ }
+
+ // load & update pseudo-random rotation matrix
+ pseudo_random_index = uint(pos_rounded.y * 2 + pos_rounded.x) % 5;
+ rotation_scale = constants.rotation_matrices[params.pass * 5 + pseudo_random_index];
+ rot_scale_matrix = mat2(rotation_scale.x * pixel_lookup_radius, rotation_scale.y * pixel_lookup_radius, rotation_scale.z * pixel_lookup_radius, rotation_scale.w * pixel_lookup_radius);
}
- // The occluding point in camera space
- vec3 Q = getOffsetPosition(ssP, ssR);
+ // the main obscurance & sample weight storage
+ float obscurance_sum = 0.0;
+ float weight_sum = 0.0;
- vec3 v = Q - C;
+ // edge mask for between this and left/right/top/bottom neighbour pixels - not used in quality level 0 so initialize to "no edge" (1 is no edge, 0 is edge)
+ vec4 edgesLRTB = vec4(1.0, 1.0, 1.0, 1.0);
- float vv = dot(v, v);
- float vn = dot(v, n_C);
+ // Move center pixel slightly towards camera to avoid imprecision artifacts due to using of 16bit depth buffer; a lot smaller offsets needed when using 32bit floats
+ pix_center_pos *= 0.9992;
- const float epsilon = 0.01;
- float radius2 = p_radius * p_radius;
+ if (!p_adaptive_base && (p_quality_level >= SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) {
+ edgesLRTB = calculate_edges(pix_z, pix_left_z, pix_right_z, pix_top_z, pix_bottom_z);
+ }
- // A: From the HPG12 paper
- // Note large epsilon to avoid overdarkening within cracks
- //return float(vv < radius2) * max((vn - bias) / (epsilon + vv), 0.0) * radius2 * 0.6;
+ // adds a more high definition sharp effect, which gets blurred out (reuses left/right/top/bottom samples that we used for edge detection)
+ if (!p_adaptive_base && (p_quality_level >= SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET)) {
+ // disable in case of quality level 4 (reference)
+ if (p_quality_level != 4) {
+ //approximate neighbouring pixels positions (actually just deltas or "positions - pix_center_pos" )
+ vec3 normalized_viewspace_dir = vec3(pix_center_pos.xy / pix_center_pos.zz, 1.0);
+ vec3 pixel_left_delta = vec3(-pixel_size_at_center.x, 0.0, 0.0) + normalized_viewspace_dir * (pix_left_z - pix_center_pos.z);
+ vec3 pixel_right_delta = vec3(+pixel_size_at_center.x, 0.0, 0.0) + normalized_viewspace_dir * (pix_right_z - pix_center_pos.z);
+ vec3 pixel_top_delta = vec3(0.0, -pixel_size_at_center.y, 0.0) + normalized_viewspace_dir * (pix_top_z - pix_center_pos.z);
+ vec3 pixel_bottom_delta = vec3(0.0, +pixel_size_at_center.y, 0.0) + normalized_viewspace_dir * (pix_bottom_z - pix_center_pos.z);
+
+ const float range_reduction = 4.0f; // this is to avoid various artifacts
+ const float modified_fallof_sq = range_reduction * fallof_sq;
+
+ vec4 additional_obscurance;
+ additional_obscurance.x = calculate_pixel_obscurance(pixel_normal, pixel_left_delta, modified_fallof_sq);
+ additional_obscurance.y = calculate_pixel_obscurance(pixel_normal, pixel_right_delta, modified_fallof_sq);
+ additional_obscurance.z = calculate_pixel_obscurance(pixel_normal, pixel_top_delta, modified_fallof_sq);
+ additional_obscurance.w = calculate_pixel_obscurance(pixel_normal, pixel_bottom_delta, modified_fallof_sq);
+
+ obscurance_sum += params.detail_intensity * dot(additional_obscurance, edgesLRTB);
+ }
+ }
- // B: Smoother transition to zero (lowers contrast, smoothing out corners). [Recommended]
- float f = max(radius2 - vv, 0.0);
- return f * f * f * max((vn - params.bias) / (epsilon + vv), 0.0);
+ // Sharp normals also create edges - but this adds to the cost as well
+ if (!p_adaptive_base && (p_quality_level >= SSAO_NORMAL_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) {
+ vec3 neighbour_normal_left = load_normal(ivec2(full_res_coord), ivec2(-2, 0));
+ vec3 neighbour_normal_right = load_normal(ivec2(full_res_coord), ivec2(2, 0));
+ vec3 neighbour_normal_top = load_normal(ivec2(full_res_coord), ivec2(0, -2));
+ vec3 neighbour_normal_bottom = load_normal(ivec2(full_res_coord), ivec2(0, 2));
- // C: Medium contrast (which looks better at high radii), no division. Note that the
- // contribution still falls off with radius^2, but we've adjusted the rate in a way that is
- // more computationally efficient and happens to be aesthetically pleasing.
- // return 4.0 * max(1.0 - vv * invRadius2, 0.0) * max(vn - bias, 0.0);
+ const float dot_threshold = SSAO_NORMAL_BASED_EDGES_DOT_THRESHOLD;
- // D: Low contrast, no division operation
- // return 2.0 * float(vv < radius * radius) * max(vn - bias, 0.0);
-}
+ vec4 normal_edgesLRTB;
+ normal_edgesLRTB.x = clamp((dot(pixel_normal, neighbour_normal_left) + dot_threshold), 0.0, 1.0);
+ normal_edgesLRTB.y = clamp((dot(pixel_normal, neighbour_normal_right) + dot_threshold), 0.0, 1.0);
+ normal_edgesLRTB.z = clamp((dot(pixel_normal, neighbour_normal_top) + dot_threshold), 0.0, 1.0);
+ normal_edgesLRTB.w = clamp((dot(pixel_normal, neighbour_normal_bottom) + dot_threshold), 0.0, 1.0);
-void main() {
- // Pixel being shaded
- ivec2 ssC = ivec2(gl_GlobalInvocationID.xy);
- if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing
- return;
+ edgesLRTB *= normal_edgesLRTB;
}
- // World space point being shaded
- vec3 C = getPosition(ssC);
+ const float global_mip_offset = SSAO_DEPTH_MIPS_GLOBAL_OFFSET;
+ float mip_offset = (p_quality_level < SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (log2(pixel_lookup_radius) + global_mip_offset);
-#ifdef USE_HALF_SIZE
- vec3 n_C = texelFetch(source_normal, ssC << 1, 0).xyz * 2.0 - 1.0;
-#else
- vec3 n_C = texelFetch(source_normal, ssC, 0).xyz * 2.0 - 1.0;
+ // Used to tilt the second set of samples so that the disk is effectively rotated by the normal
+ // effective at removing one set of artifacts, but too expensive for lower quality settings
+ vec2 norm_xy = vec2(pixel_normal.x, pixel_normal.y);
+ float norm_xy_length = length(norm_xy);
+ norm_xy /= vec2(norm_xy_length, -norm_xy_length);
+ norm_xy_length *= SSAO_TILT_SAMPLES_AMOUNT;
+
+ // standard, non-adaptive approach
+ if ((p_quality_level != 3) || p_adaptive_base) {
+ for (int i = 0; i < number_of_taps; i++) {
+ SSAOTap(p_quality_level, obscurance_sum, weight_sum, i, rot_scale_matrix, pix_center_pos, pixel_normal, normalized_screen_pos, mip_offset, fallof_sq, 1.0, norm_xy, norm_xy_length);
+ }
+ }
+#ifdef ADAPTIVE
+ else {
+ // add new ones if needed
+ vec2 full_res_uv = normalized_screen_pos + params.pass_uv_offset.xy;
+ float importance = textureLod(source_importance, full_res_uv, 0.0).x;
+
+ // this is to normalize SSAO_DETAIL_AO_AMOUNT across all pixel regardless of importance
+ obscurance_sum *= (SSAO_ADAPTIVE_TAP_BASE_COUNT / float(SSAO_MAX_TAPS)) + (importance * SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT / float(SSAO_MAX_TAPS));
+
+ // load existing base values
+ vec2 base_values = imageLoad(source_ssao, ivec3(upos, params.pass)).xy;
+ weight_sum += base_values.y * float(SSAO_ADAPTIVE_TAP_BASE_COUNT * 4.0);
+ obscurance_sum += (base_values.x) * weight_sum;
+
+ // increase importance around edges
+ float edge_count = dot(1.0 - edgesLRTB, vec4(1.0, 1.0, 1.0, 1.0));
+
+ float avg_total_importance = float(counter.sum) * params.load_counter_avg_div;
+
+ float importance_limiter = clamp(params.adaptive_sample_limit / avg_total_importance, 0.0, 1.0);
+ importance *= importance_limiter;
+
+ float additional_sample_count = SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT * importance;
+
+ const float blend_range = 3.0;
+ const float blend_range_inv = 1.0 / blend_range;
+
+ additional_sample_count += 0.5;
+ uint additional_samples = uint(additional_sample_count);
+ uint additional_samples_to = min(SSAO_MAX_TAPS, additional_samples + SSAO_ADAPTIVE_TAP_BASE_COUNT);
+
+ for (uint i = SSAO_ADAPTIVE_TAP_BASE_COUNT; i < additional_samples_to; i++) {
+ additional_sample_count -= 1.0f;
+ float weight_mod = clamp(additional_sample_count * blend_range_inv, 0.0, 1.0);
+ SSAOTap(p_quality_level, obscurance_sum, weight_sum, int(i), rot_scale_matrix, pix_center_pos, pixel_normal, normalized_screen_pos, mip_offset, fallof_sq, weight_mod, norm_xy, norm_xy_length);
+ }
+ }
#endif
- n_C = normalize(n_C);
- n_C.y = -n_C.y; //because this code reads flipped
- // Hash function used in the HPG12 AlchemyAO paper
- float randomPatternRotationAngle = mod(float((3 * ssC.x ^ ssC.y + ssC.x * ssC.y) * 10), TWO_PI);
+ // early out for adaptive base - just output weight (used for the next pass)
+ if (p_adaptive_base) {
+ float obscurance = obscurance_sum / weight_sum;
+
+ r_shadow_term = obscurance;
+ r_edges = vec4(0.0);
+ r_weight = weight_sum;
+ return;
+ }
+
+ // calculate weighted average
+ float obscurance = obscurance_sum / weight_sum;
- // Reconstruct normals from positions. These will lead to 1-pixel black lines
- // at depth discontinuities, however the blur will wipe those out so they are not visible
- // in the final image.
+ // calculate fadeout (1 close, gradient, 0 far)
+ float fade_out = clamp(pix_center_pos.z * params.fade_out_mul + params.fade_out_add, 0.0, 1.0);
- // Choose the screen-space sample radius
- // proportional to the projected area of the sphere
+ // Reduce the SSAO shadowing if we're on the edge to remove artifacts on edges (we don't care for the lower quality one)
+ if (!p_adaptive_base && (p_quality_level >= SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) {
+ // when there's more than 2 opposite edges, start fading out the occlusion to reduce aliasing artifacts
+ float edge_fadeout_factor = clamp((1.0 - edgesLRTB.x - edgesLRTB.y) * 0.35, 0.0, 1.0) + clamp((1.0 - edgesLRTB.z - edgesLRTB.w) * 0.35, 0.0, 1.0);
- float ssDiskRadius = -params.proj_scale * params.radius;
- if (!params.orthogonal) {
- ssDiskRadius = -params.proj_scale * params.radius / C.z;
+ fade_out *= clamp(1.0 - edge_fadeout_factor, 0.0, 1.0);
}
- float sum = 0.0;
- for (int i = 0; i < NUM_SAMPLES; ++i) {
- sum += sampleAO(ssC, C, n_C, ssDiskRadius, params.radius, i, randomPatternRotationAngle);
+
+ // strength
+ obscurance = params.intensity * obscurance;
+
+ // clamp
+ obscurance = min(obscurance, params.shadow_clamp);
+
+ // fadeout
+ obscurance *= fade_out;
+
+ // conceptually switch to occlusion with the meaning being visibility (grows with visibility, occlusion == 1 implies full visibility),
+ // to be in line with what is more commonly used.
+ float occlusion = 1.0 - obscurance;
+
+ // modify the gradient
+ // note: this cannot be moved to a later pass because of loss of precision after storing in the render target
+ occlusion = pow(clamp(occlusion, 0.0, 1.0), params.shadow_power);
+
+ // outputs!
+ r_shadow_term = occlusion; // Our final 'occlusion' term (0 means fully occluded, 1 means fully lit)
+ r_edges = edgesLRTB; // These are used to prevent blurring across edges, 1 means no edge, 0 means edge, 0.5 means half way there, etc.
+ r_weight = weight_sum;
+}
+
+void main() {
+ float out_shadow_term;
+ float out_weight;
+ vec4 out_edges;
+ ivec2 ssC = ivec2(gl_GlobalInvocationID.xy);
+ if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing
+ return;
}
- float A = max(0.0, 1.0 - sum * params.intensity_div_r6 * (5.0 / float(NUM_SAMPLES)));
+ vec2 uv = vec2(gl_GlobalInvocationID) + vec2(0.5);
+#ifdef SSAO_BASE
+ generate_SSAO_shadows_internal(out_shadow_term, out_edges, out_weight, uv, params.quality, true);
- imageStore(dest_image, ssC, vec4(A));
+ imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(out_shadow_term, out_weight / (float(SSAO_ADAPTIVE_TAP_BASE_COUNT) * 4.0), 0.0, 0.0));
+#else
+ generate_SSAO_shadows_internal(out_shadow_term, out_edges, out_weight, uv, params.quality, false); // pass in quality levels
+ if (params.quality == 0) {
+ out_edges = vec4(1.0);
+ }
+
+ imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(out_shadow_term, pack_edges(out_edges), 0.0, 0.0));
+#endif
}
diff --git a/servers/rendering/renderer_rd/shaders/ssao_blur.glsl b/servers/rendering/renderer_rd/shaders/ssao_blur.glsl
index 3e63e3cb59..510a777048 100644
--- a/servers/rendering/renderer_rd/shaders/ssao_blur.glsl
+++ b/servers/rendering/renderer_rd/shaders/ssao_blur.glsl
@@ -1,3 +1,22 @@
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2016, Intel Corporation
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of
+// the Software.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// File changes (yyyy-mm-dd)
+// 2016-09-07: filip.strugar@intel.com: first commit
+// 2020-12-05: clayjohn: convert to Vulkan and Godot
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
#[compute]
#version 450
@@ -7,147 +26,129 @@ VERSION_DEFINES
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(set = 0, binding = 0) uniform sampler2D source_ssao;
-layout(set = 1, binding = 0) uniform sampler2D source_depth;
-#ifdef MODE_UPSCALE
-layout(set = 2, binding = 0) uniform sampler2D source_depth_mipmaps;
-#endif
-layout(r8, set = 3, binding = 0) uniform restrict writeonly image2D dest_image;
-
-//////////////////////////////////////////////////////////////////////////////////////////////
-// Tunable Parameters:
+layout(rg8, set = 1, binding = 0) uniform restrict writeonly image2D dest_image;
layout(push_constant, binding = 1, std430) uniform Params {
- float edge_sharpness; /** Increase to make depth edges crisper. Decrease to reduce flicker. */
- int filter_scale;
- float z_far;
- float z_near;
- bool orthogonal;
- uint pad0;
- uint pad1;
- uint pad2;
- ivec2 axis; /** (1, 0) or (0, 1) */
- ivec2 screen_size;
+ float edge_sharpness;
+ float pad;
+ vec2 half_screen_pixel_size;
}
params;
-/** Filter radius in pixels. This will be multiplied by SCALE. */
-#define R (4)
+vec4 unpack_edges(float p_packed_val) {
+ uint packed_val = uint(p_packed_val * 255.5);
+ vec4 edgesLRTB;
+ edgesLRTB.x = float((packed_val >> 6) & 0x03) / 3.0;
+ edgesLRTB.y = float((packed_val >> 4) & 0x03) / 3.0;
+ edgesLRTB.z = float((packed_val >> 2) & 0x03) / 3.0;
+ edgesLRTB.w = float((packed_val >> 0) & 0x03) / 3.0;
+
+ return clamp(edgesLRTB + params.edge_sharpness, 0.0, 1.0);
+}
+
+void add_sample(float p_ssao_value, float p_edge_value, inout float r_sum, inout float r_sum_weight) {
+ float weight = p_edge_value;
+
+ r_sum += (weight * p_ssao_value);
+ r_sum_weight += weight;
+}
+
+#ifdef MODE_WIDE
+vec2 sample_blurred_wide(vec2 p_coord) {
+ vec2 vC = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(0, 0)).xy;
+ vec2 vL = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(-2, 0)).xy;
+ vec2 vT = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(0, -2)).xy;
+ vec2 vR = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(2, 0)).xy;
+ vec2 vB = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(0, 2)).xy;
+
+ float packed_edges = vC.y;
+ vec4 edgesLRTB = unpack_edges(packed_edges);
+ edgesLRTB.x *= unpack_edges(vL.y).y;
+ edgesLRTB.z *= unpack_edges(vT.y).w;
+ edgesLRTB.y *= unpack_edges(vR.y).x;
+ edgesLRTB.w *= unpack_edges(vB.y).z;
+
+ float ssao_value = vC.x;
+ float ssao_valueL = vL.x;
+ float ssao_valueT = vT.x;
+ float ssao_valueR = vR.x;
+ float ssao_valueB = vB.x;
+
+ float sum_weight = 0.8f;
+ float sum = ssao_value * sum_weight;
+
+ add_sample(ssao_valueL, edgesLRTB.x, sum, sum_weight);
+ add_sample(ssao_valueR, edgesLRTB.y, sum, sum_weight);
+ add_sample(ssao_valueT, edgesLRTB.z, sum, sum_weight);
+ add_sample(ssao_valueB, edgesLRTB.w, sum, sum_weight);
+
+ float ssao_avg = sum / sum_weight;
+
+ ssao_value = ssao_avg;
+
+ return vec2(ssao_value, packed_edges);
+}
+#endif
+
+#ifdef MODE_SMART
+vec2 sample_blurred(vec3 p_pos, vec2 p_coord) {
+ float packed_edges = texelFetch(source_ssao, ivec2(p_pos.xy), 0).y;
+ vec4 edgesLRTB = unpack_edges(packed_edges);
+
+ vec4 valuesUL = textureGather(source_ssao, vec2(p_coord - params.half_screen_pixel_size * 0.5));
+ vec4 valuesBR = textureGather(source_ssao, vec2(p_coord + params.half_screen_pixel_size * 0.5));
+
+ float ssao_value = valuesUL.y;
+ float ssao_valueL = valuesUL.x;
+ float ssao_valueT = valuesUL.z;
+ float ssao_valueR = valuesBR.z;
+ float ssao_valueB = valuesBR.x;
+
+ float sum_weight = 0.5;
+ float sum = ssao_value * sum_weight;
+
+ add_sample(ssao_valueL, edgesLRTB.x, sum, sum_weight);
+ add_sample(ssao_valueR, edgesLRTB.y, sum, sum_weight);
+
+ add_sample(ssao_valueT, edgesLRTB.z, sum, sum_weight);
+ add_sample(ssao_valueB, edgesLRTB.w, sum, sum_weight);
-//////////////////////////////////////////////////////////////////////////////////////////////
+ float ssao_avg = sum / sum_weight;
-// Gaussian coefficients
-const float gaussian[R + 1] =
- //float[](0.356642, 0.239400, 0.072410, 0.009869);
- //float[](0.398943, 0.241971, 0.053991, 0.004432, 0.000134); // stddev = 1.0
- float[](0.153170, 0.144893, 0.122649, 0.092902, 0.062970); // stddev = 2.0
-//float[](0.111220, 0.107798, 0.098151, 0.083953, 0.067458, 0.050920, 0.036108); // stddev = 3.0
+ ssao_value = ssao_avg;
+
+ return vec2(ssao_value, packed_edges);
+}
+#endif
void main() {
// Pixel being shaded
ivec2 ssC = ivec2(gl_GlobalInvocationID.xy);
- if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing
- return;
- }
-
-#ifdef MODE_UPSCALE
-
- //closest one should be the same pixel, but check nearby just in case
- float depth = texelFetch(source_depth, ssC, 0).r;
-
- depth = depth * 2.0 - 1.0;
- if (params.orthogonal) {
- depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0;
- } else {
- depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near));
- }
-
- vec2 pixel_size = 1.0 / vec2(params.screen_size);
- vec2 closest_uv = vec2(ssC) * pixel_size + pixel_size * 0.5;
- vec2 from_uv = closest_uv;
- vec2 ps2 = pixel_size; // * 2.0;
-
- float closest_depth = abs(textureLod(source_depth_mipmaps, closest_uv, 0.0).r - depth);
-
- vec2 offsets[4] = vec2[](vec2(ps2.x, 0), vec2(-ps2.x, 0), vec2(0, ps2.y), vec2(0, -ps2.y));
- for (int i = 0; i < 4; i++) {
- vec2 neighbour = from_uv + offsets[i];
- float neighbour_depth = abs(textureLod(source_depth_mipmaps, neighbour, 0.0).r - depth);
- if (neighbour_depth < closest_depth) {
- closest_uv = neighbour;
- closest_depth = neighbour_depth;
- }
- }
-
- float visibility = textureLod(source_ssao, closest_uv, 0.0).r;
- imageStore(dest_image, ssC, vec4(visibility));
-#else
- float depth = texelFetch(source_depth, ssC, 0).r;
+#ifdef MODE_NON_SMART
-#ifdef MODE_FULL_SIZE
- depth = depth * 2.0 - 1.0;
+ vec2 halfPixel = params.half_screen_pixel_size * 0.5f;
- if (params.orthogonal) {
- depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0;
- } else {
- depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near));
- }
+ vec2 uv = (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size;
-#endif
- float depth_divide = 1.0 / params.z_far;
-
- //depth *= depth_divide;
-
- /*
- if (depth > params.z_far * 0.999) {
- discard; //skybox
- }
- */
-
- float sum = texelFetch(source_ssao, ssC, 0).r;
-
- // Base weight for depth falloff. Increase this for more blurriness,
- // decrease it for better edge discrimination
- float BASE = gaussian[0];
- float totalWeight = BASE;
- sum *= totalWeight;
-
- ivec2 clamp_limit = params.screen_size - ivec2(1);
-
- for (int r = -R; r <= R; ++r) {
- // We already handled the zero case above. This loop should be unrolled and the static branch optimized out,
- // so the IF statement has no runtime cost
- if (r != 0) {
- ivec2 ppos = ssC + params.axis * (r * params.filter_scale);
- float value = texelFetch(source_ssao, clamp(ppos, ivec2(0), clamp_limit), 0).r;
- ivec2 rpos = clamp(ppos, ivec2(0), clamp_limit);
-
- float temp_depth = texelFetch(source_depth, rpos, 0).r;
-#ifdef MODE_FULL_SIZE
- temp_depth = temp_depth * 2.0 - 1.0;
- if (params.orthogonal) {
- temp_depth = ((temp_depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0;
- } else {
- temp_depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - temp_depth * (params.z_far - params.z_near));
- }
- //temp_depth *= depth_divide;
-#endif
- // spatial domain: offset gaussian tap
- float weight = 0.3 + gaussian[abs(r)];
- //weight *= max(0.0, dot(temp_normal, normal));
+ vec2 centre = textureLod(source_ssao, vec2(uv), 0.0).xy;
- // range domain (the "bilateral" weight). As depth difference increases, decrease weight.
- weight *= max(0.0, 1.0 - params.edge_sharpness * abs(temp_depth - depth));
+ vec4 vals;
+ vals.x = textureLod(source_ssao, vec2(uv + vec2(-halfPixel.x * 3, -halfPixel.y)), 0.0).x;
+ vals.y = textureLod(source_ssao, vec2(uv + vec2(+halfPixel.x, -halfPixel.y * 3)), 0.0).x;
+ vals.z = textureLod(source_ssao, vec2(uv + vec2(-halfPixel.x, +halfPixel.y * 3)), 0.0).x;
+ vals.w = textureLod(source_ssao, vec2(uv + vec2(+halfPixel.x * 3, +halfPixel.y)), 0.0).x;
- sum += value * weight;
- totalWeight += weight;
- }
- }
+ vec2 sampled = vec2(dot(vals, vec4(0.2)) + centre.x * 0.2, centre.y);
- const float epsilon = 0.0001;
- float visibility = sum / (totalWeight + epsilon);
+#else
+#ifdef MODE_SMART
+ vec2 sampled = sample_blurred(vec3(gl_GlobalInvocationID), (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size);
+#else // MODE_WIDE
+ vec2 sampled = sample_blurred_wide((vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size);
+#endif
- imageStore(dest_image, ssC, vec4(visibility));
#endif
+ imageStore(dest_image, ivec2(ssC), vec4(sampled, 0.0, 0.0));
}
diff --git a/servers/rendering/renderer_rd/shaders/ssao_downsample.glsl b/servers/rendering/renderer_rd/shaders/ssao_downsample.glsl
new file mode 100644
index 0000000000..cb2d31f70d
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/ssao_downsample.glsl
@@ -0,0 +1,206 @@
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2016, Intel Corporation
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of
+// the Software.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// File changes (yyyy-mm-dd)
+// 2016-09-07: filip.strugar@intel.com: first commit
+// 2020-12-05: clayjohn: convert to Vulkan and Godot
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#[compute]
+
+#version 450
+
+VERSION_DEFINES
+
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+layout(push_constant, binding = 1, std430) uniform Params {
+ vec2 pixel_size;
+ float z_far;
+ float z_near;
+ bool orthogonal;
+ float radius_sq;
+ uvec2 pad;
+}
+params;
+
+layout(set = 0, binding = 0) uniform sampler2D source_depth;
+
+layout(r16f, set = 1, binding = 0) uniform restrict writeonly image2DArray dest_image0; //rename
+#ifdef GENERATE_MIPS
+layout(r16f, set = 2, binding = 0) uniform restrict writeonly image2DArray dest_image1;
+layout(r16f, set = 2, binding = 1) uniform restrict writeonly image2DArray dest_image2;
+layout(r16f, set = 2, binding = 2) uniform restrict writeonly image2DArray dest_image3;
+#endif
+
+vec4 screen_space_to_view_space_depth(vec4 p_depth) {
+ if (params.orthogonal) {
+ vec4 depth = p_depth * 2.0 - 1.0;
+ return ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0;
+ }
+
+ float depth_linearize_mul = params.z_near;
+ float depth_linearize_add = params.z_far;
+
+ // Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar"
+
+ // Set your depth_linearize_mul and depth_linearize_add to:
+ // depth_linearize_mul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear );
+ // depth_linearize_add = cameraClipFar / ( cameraClipFar - cameraClipNear );
+
+ return depth_linearize_mul / (depth_linearize_add - p_depth);
+}
+
+float screen_space_to_view_space_depth(float p_depth) {
+ if (params.orthogonal) {
+ float depth = p_depth * 2.0 - 1.0;
+ return ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / (2.0 * params.z_far);
+ }
+
+ float depth_linearize_mul = params.z_near;
+ float depth_linearize_add = params.z_far;
+
+ return depth_linearize_mul / (depth_linearize_add - p_depth);
+}
+
+#ifdef GENERATE_MIPS
+
+shared float depth_buffer[4][8][8];
+
+float mip_smart_average(vec4 p_depths) {
+ float closest = min(min(p_depths.x, p_depths.y), min(p_depths.z, p_depths.w));
+ float fallof_sq = -1.0f / params.radius_sq;
+ vec4 dists = p_depths - closest.xxxx;
+ vec4 weights = clamp(dists * dists * fallof_sq + 1.0, 0.0, 1.0);
+ return dot(weights, p_depths) / dot(weights, vec4(1.0, 1.0, 1.0, 1.0));
+}
+
+void prepare_depths_and_mips(vec4 p_samples, uvec2 p_output_coord, uvec2 p_gtid) {
+ p_samples = screen_space_to_view_space_depth(p_samples);
+
+ depth_buffer[0][p_gtid.x][p_gtid.y] = p_samples.w;
+ depth_buffer[1][p_gtid.x][p_gtid.y] = p_samples.z;
+ depth_buffer[2][p_gtid.x][p_gtid.y] = p_samples.x;
+ depth_buffer[3][p_gtid.x][p_gtid.y] = p_samples.y;
+
+ imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 0), vec4(p_samples.w));
+ imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 1), vec4(p_samples.z));
+ imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 2), vec4(p_samples.x));
+ imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 3), vec4(p_samples.y));
+
+ uint depth_array_index = 2 * (p_gtid.y % 2) + (p_gtid.x % 2);
+ uvec2 depth_array_offset = ivec2(p_gtid.x % 2, p_gtid.y % 2);
+ ivec2 buffer_coord = ivec2(p_gtid) - ivec2(depth_array_offset);
+
+ p_output_coord /= 2;
+ groupMemoryBarrier();
+ barrier();
+
+ // if (still_alive) <-- all threads alive here
+ {
+ float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0];
+ float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 1];
+ float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 1][buffer_coord.y + 0];
+ float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 1][buffer_coord.y + 1];
+
+ float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11));
+ imageStore(dest_image1, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg));
+ depth_buffer[depth_array_index][buffer_coord.x][buffer_coord.y] = avg;
+ }
+
+ bool still_alive = p_gtid.x % 4 == depth_array_offset.x && p_gtid.y % 4 == depth_array_offset.y;
+
+ p_output_coord /= 2;
+ groupMemoryBarrier();
+ barrier();
+
+ if (still_alive) {
+ float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0];
+ float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 2];
+ float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 2][buffer_coord.y + 0];
+ float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 2][buffer_coord.y + 2];
+
+ float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11));
+ imageStore(dest_image2, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg));
+ depth_buffer[depth_array_index][buffer_coord.x][buffer_coord.y] = avg;
+ }
+
+ still_alive = p_gtid.x % 8 == depth_array_offset.x && depth_array_offset.y % 8 == depth_array_offset.y;
+
+ p_output_coord /= 2;
+ groupMemoryBarrier();
+ barrier();
+
+ if (still_alive) {
+ float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0];
+ float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 4];
+ float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 4][buffer_coord.y + 0];
+ float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 4][buffer_coord.y + 4];
+
+ float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11));
+ imageStore(dest_image3, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg));
+ }
+}
+#else
+#ifndef USE_HALF_BUFFERS
+void prepare_depths(vec4 p_samples, uvec2 p_tid) {
+ p_samples = screen_space_to_view_space_depth(p_samples);
+
+ imageStore(dest_image0, ivec3(p_tid, 0), vec4(p_samples.w));
+ imageStore(dest_image0, ivec3(p_tid, 1), vec4(p_samples.z));
+ imageStore(dest_image0, ivec3(p_tid, 2), vec4(p_samples.x));
+ imageStore(dest_image0, ivec3(p_tid, 3), vec4(p_samples.y));
+}
+#endif
+#endif
+
+void main() {
+#ifdef USE_HALF_BUFFERS
+#ifdef USE_HALF_SIZE
+ float sample_00 = texelFetch(source_depth, ivec2(4 * gl_GlobalInvocationID.x + 0, 4 * gl_GlobalInvocationID.y + 0), 0).x;
+ float sample_11 = texelFetch(source_depth, ivec2(4 * gl_GlobalInvocationID.x + 2, 4 * gl_GlobalInvocationID.y + 2), 0).x;
+#else
+ float sample_00 = texelFetch(source_depth, ivec2(2 * gl_GlobalInvocationID.x + 0, 2 * gl_GlobalInvocationID.y + 0), 0).x;
+ float sample_11 = texelFetch(source_depth, ivec2(2 * gl_GlobalInvocationID.x + 1, 2 * gl_GlobalInvocationID.y + 1), 0).x;
+#endif
+ sample_00 = screen_space_to_view_space_depth(sample_00);
+ sample_11 = screen_space_to_view_space_depth(sample_11);
+
+ imageStore(dest_image0, ivec3(gl_GlobalInvocationID.xy, 0), vec4(sample_00));
+ imageStore(dest_image0, ivec3(gl_GlobalInvocationID.xy, 3), vec4(sample_11));
+#else //!USE_HALF_BUFFERS
+#ifdef USE_HALF_SIZE
+ ivec2 depth_buffer_coord = 4 * ivec2(gl_GlobalInvocationID.xy);
+ ivec2 output_coord = ivec2(gl_GlobalInvocationID);
+
+ vec2 uv = (vec2(depth_buffer_coord) + 0.5f) * params.pixel_size;
+ vec4 samples;
+ samples.x = textureLodOffset(source_depth, uv, 0, ivec2(0, 2)).x;
+ samples.y = textureLodOffset(source_depth, uv, 0, ivec2(2, 2)).x;
+ samples.z = textureLodOffset(source_depth, uv, 0, ivec2(2, 0)).x;
+ samples.w = textureLodOffset(source_depth, uv, 0, ivec2(0, 0)).x;
+#else
+ ivec2 depth_buffer_coord = 2 * ivec2(gl_GlobalInvocationID.xy);
+ ivec2 output_coord = ivec2(gl_GlobalInvocationID);
+
+ vec2 uv = (vec2(depth_buffer_coord) + 0.5f) * params.pixel_size;
+ vec4 samples = textureGather(source_depth, uv);
+#endif
+#ifdef GENERATE_MIPS
+ prepare_depths_and_mips(samples, output_coord, gl_LocalInvocationID.xy);
+#else
+ prepare_depths(samples, gl_GlobalInvocationID.xy);
+#endif
+#endif
+}
diff --git a/servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl b/servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl
new file mode 100644
index 0000000000..6aa7624261
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl
@@ -0,0 +1,126 @@
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2016, Intel Corporation
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of
+// the Software.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// File changes (yyyy-mm-dd)
+// 2016-09-07: filip.strugar@intel.com: first commit
+// 2020-12-05: clayjohn: convert to Vulkan and Godot
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#[compute]
+
+#version 450
+
+VERSION_DEFINES
+
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+#ifdef GENERATE_MAP
+layout(set = 0, binding = 0) uniform sampler2DArray source_ssao;
+#else
+layout(set = 0, binding = 0) uniform sampler2D source_importance;
+#endif
+layout(r8, set = 1, binding = 0) uniform restrict writeonly image2D dest_image;
+
+#ifdef PROCESS_MAPB
+layout(set = 2, binding = 0, std430) buffer Counter {
+ uint sum;
+}
+counter;
+#endif
+
+layout(push_constant, binding = 1, std430) uniform Params {
+ vec2 half_screen_pixel_size;
+ float intensity;
+ float power;
+}
+params;
+
+void main() {
+ // Pixel being shaded
+ ivec2 ssC = ivec2(gl_GlobalInvocationID.xy);
+
+#ifdef GENERATE_MAP
+ // importance map stuff
+ uvec2 base_position = ssC * 2;
+
+ vec2 base_uv = (vec2(base_position) + vec2(0.5f, 0.5f)) * params.half_screen_pixel_size;
+
+ float avg = 0.0;
+ float minV = 1.0;
+ float maxV = 0.0;
+ for (int i = 0; i < 4; i++) {
+ vec4 vals = textureGather(source_ssao, vec3(base_uv, i));
+
+ // apply the same modifications that would have been applied in the main shader
+ vals = params.intensity * vals;
+
+ vals = 1 - vals;
+
+ vals = pow(clamp(vals, 0.0, 1.0), vec4(params.power));
+
+ avg += dot(vec4(vals.x, vals.y, vals.z, vals.w), vec4(1.0 / 16.0, 1.0 / 16.0, 1.0 / 16.0, 1.0 / 16.0));
+
+ maxV = max(maxV, max(max(vals.x, vals.y), max(vals.z, vals.w)));
+ minV = min(minV, min(min(vals.x, vals.y), min(vals.z, vals.w)));
+ }
+
+ float min_max_diff = maxV - minV;
+
+ imageStore(dest_image, ssC, vec4(pow(clamp(min_max_diff * 2.0, 0.0, 1.0), 0.8)));
+#endif
+
+#ifdef PROCESS_MAPA
+ vec2 uv = (vec2(ssC) + 0.5f) * params.half_screen_pixel_size * 2.0;
+
+ float centre = textureLod(source_importance, uv, 0.0).x;
+
+ vec2 half_pixel = params.half_screen_pixel_size;
+
+ vec4 vals;
+ vals.x = textureLod(source_importance, uv + vec2(-half_pixel.x * 3, -half_pixel.y), 0.0).x;
+ vals.y = textureLod(source_importance, uv + vec2(+half_pixel.x, -half_pixel.y * 3), 0.0).x;
+ vals.z = textureLod(source_importance, uv + vec2(+half_pixel.x * 3, +half_pixel.y), 0.0).x;
+ vals.w = textureLod(source_importance, uv + vec2(-half_pixel.x, +half_pixel.y * 3), 0.0).x;
+
+ float avg = dot(vals, vec4(0.25, 0.25, 0.25, 0.25));
+
+ imageStore(dest_image, ssC, vec4(avg));
+#endif
+
+#ifdef PROCESS_MAPB
+ vec2 uv = (vec2(ssC) + 0.5f) * params.half_screen_pixel_size * 2.0;
+
+ float centre = textureLod(source_importance, uv, 0.0).x;
+
+ vec2 half_pixel = params.half_screen_pixel_size;
+
+ vec4 vals;
+ vals.x = textureLod(source_importance, uv + vec2(-half_pixel.x, -half_pixel.y * 3), 0.0).x;
+ vals.y = textureLod(source_importance, uv + vec2(+half_pixel.x * 3, -half_pixel.y), 0.0).x;
+ vals.z = textureLod(source_importance, uv + vec2(+half_pixel.x, +half_pixel.y * 3), 0.0).x;
+ vals.w = textureLod(source_importance, uv + vec2(-half_pixel.x * 3, +half_pixel.y), 0.0).x;
+
+ float avg = dot(vals, vec4(0.25, 0.25, 0.25, 0.25));
+
+ imageStore(dest_image, ssC, vec4(avg));
+
+ // sum the average; to avoid overflowing we assume max AO resolution is not bigger than 16384x16384; so quarter res (used here) will be 4096x4096, which leaves us with 8 bits per pixel
+ uint sum = uint(clamp(avg, 0.0, 1.0) * 255.0 + 0.5);
+
+ // save every 9th to avoid InterlockedAdd congestion - since we're blurring, this is good enough; compensated by multiplying load_counter_avg_div by 9
+ if (((ssC.x % 3) + (ssC.y % 3)) == 0) {
+ atomicAdd(counter.sum, sum);
+ }
+#endif
+}
diff --git a/servers/rendering/renderer_rd/shaders/ssao_interleave.glsl b/servers/rendering/renderer_rd/shaders/ssao_interleave.glsl
new file mode 100644
index 0000000000..4fdf334aa5
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/ssao_interleave.glsl
@@ -0,0 +1,119 @@
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2016, Intel Corporation
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of
+// the Software.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// File changes (yyyy-mm-dd)
+// 2016-09-07: filip.strugar@intel.com: first commit
+// 2020-12-05: clayjohn: convert to Vulkan and Godot
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#[compute]
+
+#version 450
+
+VERSION_DEFINES
+
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+layout(rgba8, set = 0, binding = 0) uniform restrict writeonly image2D dest_image;
+layout(set = 1, binding = 0) uniform sampler2DArray source_texture;
+
+layout(push_constant, binding = 1, std430) uniform Params {
+ float inv_sharpness;
+ uint size_modifier;
+ vec2 pixel_size;
+}
+params;
+
+vec4 unpack_edges(float p_packed_val) {
+ uint packed_val = uint(p_packed_val * 255.5);
+ vec4 edgesLRTB;
+ edgesLRTB.x = float((packed_val >> 6) & 0x03) / 3.0;
+ edgesLRTB.y = float((packed_val >> 4) & 0x03) / 3.0;
+ edgesLRTB.z = float((packed_val >> 2) & 0x03) / 3.0;
+ edgesLRTB.w = float((packed_val >> 0) & 0x03) / 3.0;
+
+ return clamp(edgesLRTB + params.inv_sharpness, 0.0, 1.0);
+}
+
+void main() {
+ ivec2 ssC = ivec2(gl_GlobalInvocationID.xy);
+ if (any(greaterThanEqual(ssC, ivec2(1.0 / params.pixel_size)))) { //too large, do nothing
+ return;
+ }
+
+#ifdef MODE_SMART
+ float ao;
+ uvec2 pix_pos = uvec2(gl_GlobalInvocationID.xy);
+ vec2 uv = (gl_GlobalInvocationID.xy + vec2(0.5)) * params.pixel_size;
+
+ // calculate index in the four deinterleaved source array texture
+ int mx = int(pix_pos.x % 2);
+ int my = int(pix_pos.y % 2);
+ int index_center = mx + my * 2; // center index
+ int index_horizontal = (1 - mx) + my * 2; // neighbouring, horizontal
+ int index_vertical = mx + (1 - my) * 2; // neighbouring, vertical
+ int index_diagonal = (1 - mx) + (1 - my) * 2; // diagonal
+
+ vec2 center_val = texelFetch(source_texture, ivec3(pix_pos / uvec2(params.size_modifier), index_center), 0).xy;
+
+ ao = center_val.x;
+
+ vec4 edgesLRTB = unpack_edges(center_val.y);
+
+ // convert index shifts to sampling offsets
+ float fmx = float(mx);
+ float fmy = float(my);
+
+ // in case of an edge, push sampling offsets away from the edge (towards pixel center)
+ float fmxe = (edgesLRTB.y - edgesLRTB.x);
+ float fmye = (edgesLRTB.w - edgesLRTB.z);
+
+ // calculate final sampling offsets and sample using bilinear filter
+ vec2 uv_horizontal = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(fmx + fmxe - 0.5, 0.5 - fmy)) * params.pixel_size;
+ float ao_horizontal = textureLod(source_texture, vec3(uv_horizontal, index_horizontal), 0.0).x;
+ vec2 uv_vertical = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(0.5 - fmx, fmy - 0.5 + fmye)) * params.pixel_size;
+ float ao_vertical = textureLod(source_texture, vec3(uv_vertical, index_vertical), 0.0).x;
+ vec2 uv_diagonal = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(fmx - 0.5 + fmxe, fmy - 0.5 + fmye)) * params.pixel_size;
+ float ao_diagonal = textureLod(source_texture, vec3(uv_diagonal, index_diagonal), 0.0).x;
+
+ // reduce weight for samples near edge - if the edge is on both sides, weight goes to 0
+ vec4 blendWeights;
+ blendWeights.x = 1.0;
+ blendWeights.y = (edgesLRTB.x + edgesLRTB.y) * 0.5;
+ blendWeights.z = (edgesLRTB.z + edgesLRTB.w) * 0.5;
+ blendWeights.w = (blendWeights.y + blendWeights.z) * 0.5;
+
+ // calculate weighted average
+ float blendWeightsSum = dot(blendWeights, vec4(1.0, 1.0, 1.0, 1.0));
+ ao = dot(vec4(ao, ao_horizontal, ao_vertical, ao_diagonal), blendWeights) / blendWeightsSum;
+
+ imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(ao));
+#else // !MODE_SMART
+
+ vec2 uv = (gl_GlobalInvocationID.xy + vec2(0.5)) * params.pixel_size;
+#ifdef MODE_HALF
+ float a = textureLod(source_texture, vec3(uv, 0), 0.0).x;
+ float d = textureLod(source_texture, vec3(uv, 3), 0.0).x;
+ float avg = (a + d) * 0.5;
+
+#else
+ float a = textureLod(source_texture, vec3(uv, 0), 0.0).x;
+ float b = textureLod(source_texture, vec3(uv, 1), 0.0).x;
+ float c = textureLod(source_texture, vec3(uv, 2), 0.0).x;
+ float d = textureLod(source_texture, vec3(uv, 3), 0.0).x;
+ float avg = (a + b + c + d) * 0.25;
+
+#endif
+ imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(avg));
+#endif
+}
diff --git a/servers/rendering/renderer_rd/shaders/ssao_minify.glsl b/servers/rendering/renderer_rd/shaders/ssao_minify.glsl
deleted file mode 100644
index 263fca386f..0000000000
--- a/servers/rendering/renderer_rd/shaders/ssao_minify.glsl
+++ /dev/null
@@ -1,45 +0,0 @@
-#[compute]
-
-#version 450
-
-VERSION_DEFINES
-
-layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
-
-layout(push_constant, binding = 1, std430) uniform Params {
- vec2 pixel_size;
- float z_far;
- float z_near;
- ivec2 source_size;
- bool orthogonal;
- uint pad;
-}
-params;
-
-#ifdef MINIFY_START
-layout(set = 0, binding = 0) uniform sampler2D source_texture;
-#else
-layout(r32f, set = 0, binding = 0) uniform restrict readonly image2D source_image;
-#endif
-layout(r32f, set = 1, binding = 0) uniform restrict writeonly image2D dest_image;
-
-void main() {
- ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
-
- if (any(greaterThan(pos, params.source_size >> 1))) { //too large, do nothing
- return;
- }
-
-#ifdef MINIFY_START
- float depth = texelFetch(source_texture, pos << 1, 0).r * 2.0 - 1.0;
- if (params.orthogonal) {
- depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0;
- } else {
- depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near));
- }
-#else
- float depth = imageLoad(source_image, pos << 1).r;
-#endif
-
- imageStore(dest_image, pos, vec4(depth));
-}
diff --git a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl
index 13b162f0c9..aa32809a06 100644
--- a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl
+++ b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl
@@ -4,6 +4,15 @@
VERSION_DEFINES
+/* Do not use subgroups here, seems there is not much advantage and causes glitches
+#extension GL_KHR_shader_subgroup_ballot: enable
+#extension GL_KHR_shader_subgroup_arithmetic: enable
+
+#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic)
+#define USE_SUBGROUPS
+#endif
+*/
+
#if defined(MODE_FOG) || defined(MODE_FILTER)
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
@@ -23,22 +32,25 @@ layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;
layout(set = 0, binding = 1) uniform texture2D shadow_atlas;
layout(set = 0, binding = 2) uniform texture2D directional_shadow_atlas;
-layout(set = 0, binding = 3, std430) restrict readonly buffer Lights {
+layout(set = 0, binding = 3, std430) restrict readonly buffer OmniLights {
LightData data[];
}
-lights;
+omni_lights;
-layout(set = 0, binding = 4, std140) uniform DirectionalLights {
+layout(set = 0, binding = 4, std430) restrict readonly buffer SpotLights {
+ LightData data[];
+}
+spot_lights;
+
+layout(set = 0, binding = 5, std140) uniform DirectionalLights {
DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS];
}
directional_lights;
-layout(set = 0, binding = 5) uniform utexture3D cluster_texture;
-
-layout(set = 0, binding = 6, std430) restrict readonly buffer ClusterData {
- uint indices[];
+layout(set = 0, binding = 6, std430) buffer restrict readonly ClusterBuffer {
+ uint data[];
}
-cluster_data;
+cluster_buffer;
layout(set = 0, binding = 7) uniform sampler linear_sampler;
@@ -132,7 +144,7 @@ layout(set = 1, binding = 2) uniform texture3D sdfgi_occlusion_texture;
#endif //SDFGI
-layout(push_constant, binding = 0, std430) uniform Params {
+layout(set = 0, binding = 14, std140) uniform Params {
vec2 fog_frustum_size_begin;
vec2 fog_frustum_size_end;
@@ -150,7 +162,14 @@ layout(push_constant, binding = 0, std430) uniform Params {
float detail_spread;
float gi_inject;
uint max_gi_probes;
- uint pad;
+ uint cluster_type_size;
+
+ vec2 screen_size;
+ uint cluster_shift;
+ uint cluster_width;
+
+ uvec3 cluster_pad;
+ uint max_cluster_element_count_div_32;
mat3x4 cam_rotation;
}
@@ -169,6 +188,31 @@ vec3 hash3f(uvec3 x) {
return vec3(x & 0xFFFFF) / vec3(float(0xFFFFF));
}
+float get_omni_attenuation(float distance, float inv_range, float decay) {
+ float nd = distance * inv_range;
+ nd *= nd;
+ nd *= nd; // nd^4
+ nd = max(1.0 - nd, 0.0);
+ nd *= nd; // nd^2
+ return nd * pow(max(distance, 0.0001), -decay);
+}
+
+void cluster_get_item_range(uint p_offset, out uint item_min, out uint item_max, out uint item_from, out uint item_to) {
+ uint item_min_max = cluster_buffer.data[p_offset];
+ item_min = item_min_max & 0xFFFF;
+ item_max = item_min_max >> 16;
+ ;
+
+ item_from = item_min >> 5;
+ item_to = (item_max == 0) ? 0 : ((item_max - 1) >> 5) + 1; //side effect of how it is stored, as item_max 0 means no elements
+}
+
+uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) {
+ int local_min = clamp(int(z_min) - int(i) * 32, 0, 31);
+ int mask_width = min(int(z_max) - int(z_min), 32 - local_min);
+ return bitfieldInsert(uint(0), uint(0xFFFFFFFF), local_min, mask_width);
+}
+
void main() {
vec3 fog_cell_size = 1.0 / vec3(params.fog_volume_size);
@@ -184,6 +228,12 @@ void main() {
//posf += mix(vec3(0.0),vec3(1.0),0.3) * hash3f(uvec3(pos)) * 2.0 - 1.0;
vec3 fog_unit_pos = posf * fog_cell_size + fog_cell_size * 0.5; //center of voxels
+
+ uvec2 screen_pos = uvec2(fog_unit_pos.xy * params.screen_size);
+ uvec2 cluster_pos = screen_pos >> params.cluster_shift;
+ uint cluster_offset = (params.cluster_width * cluster_pos.y + cluster_pos.x) * (params.max_cluster_element_count_div_32 + 32);
+ //positions in screen are too spread apart, no hopes for optimizing with subgroups
+
fog_unit_pos.z = pow(fog_unit_pos.z, params.detail_spread);
vec3 view_pos;
@@ -191,6 +241,8 @@ void main() {
view_pos.z = -params.fog_frustum_end * fog_unit_pos.z;
view_pos.y = -view_pos.y;
+ uint cluster_z = uint(clamp((abs(view_pos.z) / params.z_far) * 32.0, 0.0, 31.0));
+
vec3 total_light = params.light_color;
float total_density = params.base_density;
@@ -257,108 +309,160 @@ void main() {
//compute lights from cluster
- vec3 cluster_pos;
- cluster_pos.xy = fog_unit_pos.xy;
- cluster_pos.z = clamp((abs(view_pos.z) - params.z_near) / (params.z_far - params.z_near), 0.0, 1.0);
+ { //omni lights
- uvec4 cluster_cell = texture(usampler3D(cluster_texture, linear_sampler), cluster_pos);
+ uint cluster_omni_offset = cluster_offset;
- uint omni_light_count = cluster_cell.x >> CLUSTER_COUNTER_SHIFT;
- uint omni_light_pointer = cluster_cell.x & CLUSTER_POINTER_MASK;
+ uint item_min;
+ uint item_max;
+ uint item_from;
+ uint item_to;
- for (uint i = 0; i < omni_light_count; i++) {
- uint light_index = cluster_data.indices[omni_light_pointer + i];
+ cluster_get_item_range(cluster_omni_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
- vec3 light_pos = lights.data[i].position;
- float d = distance(lights.data[i].position, view_pos) * lights.data[i].inv_radius;
- vec3 shadow_attenuation = vec3(1.0);
+#ifdef USE_SUBGROUPS
+ item_from = subgroupBroadcastFirst(subgroupMin(item_from));
+ item_to = subgroupBroadcastFirst(subgroupMax(item_to));
+#endif
- if (d < 1.0) {
- vec2 attenuation_energy = unpackHalf2x16(lights.data[i].attenuation_energy);
- vec4 color_specular = unpackUnorm4x8(lights.data[i].color_specular);
+ for (uint i = item_from; i < item_to; i++) {
+ uint mask = cluster_buffer.data[cluster_omni_offset + i];
+ mask &= cluster_get_range_clip_mask(i, item_min, item_max);
+#ifdef USE_SUBGROUPS
+ uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
+#else
+ uint merged_mask = mask;
+#endif
+
+ while (merged_mask != 0) {
+ uint bit = findMSB(merged_mask);
+ merged_mask &= ~(1 << bit);
+#ifdef USE_SUBGROUPS
+ if (((1 << bit) & mask) == 0) { //do not process if not originally here
+ continue;
+ }
+#endif
+ uint light_index = 32 * i + bit;
- float attenuation = pow(max(1.0 - d, 0.0), attenuation_energy.x);
+ //if (!bool(omni_omni_lights.data[light_index].mask & draw_call.layer_mask)) {
+ // continue; //not masked
+ //}
- vec3 light = attenuation_energy.y * color_specular.rgb / M_PI;
+ vec3 light_pos = omni_lights.data[light_index].position;
+ float d = distance(omni_lights.data[light_index].position, view_pos);
+ float shadow_attenuation = 1.0;
- vec4 shadow_color_enabled = unpackUnorm4x8(lights.data[i].shadow_color_enabled);
+ if (d * omni_lights.data[light_index].inv_radius < 1.0) {
+ float attenuation = get_omni_attenuation(d, omni_lights.data[light_index].inv_radius, omni_lights.data[light_index].attenuation);
- if (shadow_color_enabled.a > 0.5) {
- //has shadow
- vec4 v = vec4(view_pos, 1.0);
+ vec3 light = omni_lights.data[light_index].color / M_PI;
- vec4 splane = (lights.data[i].shadow_matrix * v);
- float shadow_len = length(splane.xyz); //need to remember shadow len from here
+ if (omni_lights.data[light_index].shadow_enabled) {
+ //has shadow
+ vec4 v = vec4(view_pos, 1.0);
- splane.xyz = normalize(splane.xyz);
- vec4 clamp_rect = lights.data[i].atlas_rect;
+ vec4 splane = (omni_lights.data[light_index].shadow_matrix * v);
+ float shadow_len = length(splane.xyz); //need to remember shadow len from here
- if (splane.z >= 0.0) {
- splane.z += 1.0;
+ splane.xyz = normalize(splane.xyz);
+ vec4 clamp_rect = omni_lights.data[light_index].atlas_rect;
- clamp_rect.y += clamp_rect.w;
+ if (splane.z >= 0.0) {
+ splane.z += 1.0;
- } else {
- splane.z = 1.0 - splane.z;
- }
+ clamp_rect.y += clamp_rect.w;
+
+ } else {
+ splane.z = 1.0 - splane.z;
+ }
- splane.xy /= splane.z;
+ splane.xy /= splane.z;
- splane.xy = splane.xy * 0.5 + 0.5;
- splane.z = shadow_len * lights.data[i].inv_radius;
- splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw;
- splane.w = 1.0; //needed? i think it should be 1 already
+ splane.xy = splane.xy * 0.5 + 0.5;
+ splane.z = shadow_len * omni_lights.data[light_index].inv_radius;
+ splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw;
+ splane.w = 1.0; //needed? i think it should be 1 already
- float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r;
- float shadow = exp(min(0.0, (depth - splane.z)) / lights.data[i].inv_radius * lights.data[i].shadow_volumetric_fog_fade);
+ float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r;
- shadow_attenuation = mix(shadow_color_enabled.rgb, vec3(1.0), shadow);
+ shadow_attenuation = exp(min(0.0, (depth - splane.z)) / omni_lights.data[light_index].inv_radius * omni_lights.data[light_index].shadow_volumetric_fog_fade);
+ }
+ total_light += light * attenuation * shadow_attenuation;
+ }
}
- total_light += light * attenuation * shadow_attenuation;
}
}
- uint spot_light_count = cluster_cell.y >> CLUSTER_COUNTER_SHIFT;
- uint spot_light_pointer = cluster_cell.y & CLUSTER_POINTER_MASK;
+ { //spot lights
- for (uint i = 0; i < spot_light_count; i++) {
- uint light_index = cluster_data.indices[spot_light_pointer + i];
+ uint cluster_spot_offset = cluster_offset + params.cluster_type_size;
- vec3 light_pos = lights.data[i].position;
- vec3 light_rel_vec = lights.data[i].position - view_pos;
- float d = length(light_rel_vec) * lights.data[i].inv_radius;
- vec3 shadow_attenuation = vec3(1.0);
+ uint item_min;
+ uint item_max;
+ uint item_from;
+ uint item_to;
+
+ cluster_get_item_range(cluster_spot_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
+
+#ifdef USE_SUBGROUPS
+ item_from = subgroupBroadcastFirst(subgroupMin(item_from));
+ item_to = subgroupBroadcastFirst(subgroupMax(item_to));
+#endif
+
+ for (uint i = item_from; i < item_to; i++) {
+ uint mask = cluster_buffer.data[cluster_spot_offset + i];
+ mask &= cluster_get_range_clip_mask(i, item_min, item_max);
+#ifdef USE_SUBGROUPS
+ uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
+#else
+ uint merged_mask = mask;
+#endif
- if (d < 1.0) {
- vec2 attenuation_energy = unpackHalf2x16(lights.data[i].attenuation_energy);
- vec4 color_specular = unpackUnorm4x8(lights.data[i].color_specular);
+ while (merged_mask != 0) {
+ uint bit = findMSB(merged_mask);
+ merged_mask &= ~(1 << bit);
+#ifdef USE_SUBGROUPS
+ if (((1 << bit) & mask) == 0) { //do not process if not originally here
+ continue;
+ }
+#endif
- float attenuation = pow(max(1.0 - d, 0.0), attenuation_energy.x);
+ //if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) {
+ // continue; //not masked
+ //}
- vec3 spot_dir = lights.data[i].direction;
- vec2 spot_att_angle = unpackHalf2x16(lights.data[i].cone_attenuation_angle);
- float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_att_angle.y);
- float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_att_angle.y));
- attenuation *= 1.0 - pow(spot_rim, spot_att_angle.x);
+ uint light_index = 32 * i + bit;
- vec3 light = attenuation_energy.y * color_specular.rgb / M_PI;
+ vec3 light_pos = omni_lights.data[light_index].position;
+ vec3 light_rel_vec = omni_lights.data[light_index].position - view_pos;
+ float d = length(light_rel_vec);
+ float shadow_attenuation = 1.0;
- vec4 shadow_color_enabled = unpackUnorm4x8(lights.data[i].shadow_color_enabled);
+ if (d * omni_lights.data[light_index].inv_radius < 1.0) {
+ float attenuation = get_omni_attenuation(d, omni_lights.data[light_index].inv_radius, omni_lights.data[light_index].attenuation);
- if (shadow_color_enabled.a > 0.5) {
- //has shadow
- vec4 v = vec4(view_pos, 1.0);
+ vec3 spot_dir = omni_lights.data[light_index].direction;
+ float scos = max(dot(-normalize(light_rel_vec), spot_dir), omni_lights.data[light_index].cone_angle);
+ float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - omni_lights.data[light_index].cone_angle));
+ attenuation *= 1.0 - pow(spot_rim, omni_lights.data[light_index].cone_attenuation);
- vec4 splane = (lights.data[i].shadow_matrix * v);
- splane /= splane.w;
+ vec3 light = omni_lights.data[light_index].color / M_PI;
- float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r;
- float shadow = exp(min(0.0, (depth - splane.z)) / lights.data[i].inv_radius * lights.data[i].shadow_volumetric_fog_fade);
+ if (omni_lights.data[light_index].shadow_enabled) {
+ //has shadow
+ vec4 v = vec4(view_pos, 1.0);
- shadow_attenuation = mix(shadow_color_enabled.rgb, vec3(1.0), shadow);
- }
+ vec4 splane = (omni_lights.data[light_index].shadow_matrix * v);
+ splane /= splane.w;
- total_light += light * attenuation * shadow_attenuation;
+ float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r;
+
+ shadow_attenuation = exp(min(0.0, (depth - splane.z)) / omni_lights.data[light_index].inv_radius * omni_lights.data[light_index].shadow_volumetric_fog_fade);
+ }
+
+ total_light += light * attenuation * shadow_attenuation;
+ }
+ }
}
}