diff options
Diffstat (limited to 'servers/rendering/renderer_rd/shaders')
15 files changed, 1824 insertions, 968 deletions
diff --git a/servers/rendering/renderer_rd/shaders/SCsub b/servers/rendering/renderer_rd/shaders/SCsub index deaa9668df..1b0197c1c1 100644 --- a/servers/rendering/renderer_rd/shaders/SCsub +++ b/servers/rendering/renderer_rd/shaders/SCsub @@ -44,3 +44,6 @@ if "RD_GLSL" in env["BUILDERS"]: env.RD_GLSL("particles_copy.glsl") env.RD_GLSL("sort.glsl") env.RD_GLSL("skeleton.glsl") + env.RD_GLSL("cluster_render.glsl") + env.RD_GLSL("cluster_store.glsl") + env.RD_GLSL("cluster_debug.glsl") diff --git a/servers/rendering/renderer_rd/shaders/canvas.glsl b/servers/rendering/renderer_rd/shaders/canvas.glsl index 7808e7ed52..3b39edc70e 100644 --- a/servers/rendering/renderer_rd/shaders/canvas.glsl +++ b/servers/rendering/renderer_rd/shaders/canvas.glsl @@ -396,7 +396,7 @@ vec4 light_shadow_compute(uint light_base, vec4 light_color, vec4 shadow_uv vec4 shadow_color = unpackUnorm4x8(light_array.data[light_base].shadow_color); #ifdef LIGHT_SHADER_CODE_USED - shadow_color *= shadow_modulate; + shadow_color.rgb *= shadow_modulate; #endif shadow_color.a *= light_color.a; //respect light alpha @@ -497,9 +497,9 @@ void main() { vec2 shadow_vertex = vertex; { - float normal_depth = 1.0; + float normal_map_depth = 1.0; -#if defined(NORMALMAP_USED) +#if defined(NORMAL_MAP_USED) vec3 normal_map = vec3(0.0, 0.0, 1.0); normal_used = true; #endif @@ -510,8 +510,8 @@ FRAGMENT_SHADER_CODE /* clang-format on */ -#if defined(NORMALMAP_USED) - normal = mix(vec3(0.0, 0.0, 1.0), normal_map * vec3(2.0, -2.0, 1.0) - vec3(1.0, -1.0, 0.0), normal_depth); +#if defined(NORMAL_MAP_USED) + normal = mix(vec3(0.0, 0.0, 1.0), normal_map * vec3(2.0, -2.0, 1.0) - vec3(1.0, -1.0, 0.0), normal_map_depth); #endif } @@ -546,7 +546,7 @@ FRAGMENT_SHADER_CODE #ifdef LIGHT_SHADER_CODE_USED vec4 shadow_modulate = vec4(1.0); - light_color = light_compute(light_vertex, direction, normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, color, uv, true); + light_color = light_compute(light_vertex, vec3(direction, light_array.data[light_base].height), normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, uv, color, true); #else if (normal_used) { @@ -563,7 +563,7 @@ FRAGMENT_SHADER_CODE light_color = light_shadow_compute(light_base, light_color, shadow_uv #ifdef LIGHT_SHADER_CODE_USED , - shadow_modulate + shadow_modulate.rgb #endif ); } @@ -605,7 +605,7 @@ FRAGMENT_SHADER_CODE vec3 light_position = vec3(light_array.data[light_base].position, light_array.data[light_base].height); light_color.rgb *= light_base_color.rgb; - light_color = light_compute(light_vertex, light_position, normal, light_color, light_base_color.a, specular_shininess, shadow_modulate, screen_uv, color, uv, false); + light_color = light_compute(light_vertex, light_position, normal, light_color, light_base_color.a, specular_shininess, shadow_modulate, screen_uv, uv, color, false); #else light_color.rgb *= light_base_color.rgb * light_base_color.a; @@ -659,7 +659,7 @@ FRAGMENT_SHADER_CODE light_color = light_shadow_compute(light_base, light_color, shadow_uv #ifdef LIGHT_SHADER_CODE_USED , - shadow_modulate + shadow_modulate.rgb #endif ); } diff --git a/servers/rendering/renderer_rd/shaders/cluster_data_inc.glsl b/servers/rendering/renderer_rd/shaders/cluster_data_inc.glsl index e723468dd8..3a4bf4da07 100644 --- a/servers/rendering/renderer_rd/shaders/cluster_data_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/cluster_data_inc.glsl @@ -6,12 +6,18 @@ struct LightData { //this structure needs to be as packed as possible vec3 position; float inv_radius; + vec3 direction; float size; - uint attenuation_energy; //attenuation - uint color_specular; //rgb color, a specular (8 bit unorm) - uint cone_attenuation_angle; // attenuation and angle, (16bit float) - uint shadow_color_enabled; //shadow rgb color, a>0.5 enabled (8bit unorm) + + vec3 color; + float attenuation; + + float cone_attenuation; + float cone_angle; + float specular_amount; + bool shadow_enabled; + vec4 atlas_rect; // rect in the shadow atlas mat4 shadow_matrix; float shadow_bias; @@ -34,9 +40,13 @@ struct ReflectionData { float index; vec3 box_offset; uint mask; - vec4 params; // intensity, 0, interior , boxproject vec3 ambient; // ambient color + float intensity; + bool exterior; + bool box_project; uint ambient_mode; + uint pad; + //0-8 is intensity,8-9 is ambient, mode mat4 local_matrix; // up to here for spot and omni, rest is for directional // notes: for ambientblend, use distance to edge to blend between already existing global environment }; diff --git a/servers/rendering/renderer_rd/shaders/cluster_debug.glsl b/servers/rendering/renderer_rd/shaders/cluster_debug.glsl new file mode 100644 index 0000000000..70a875192c --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/cluster_debug.glsl @@ -0,0 +1,115 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +const vec3 usage_gradient[33] = vec3[]( // 1 (none) + 32 + vec3(0.14, 0.17, 0.23), + vec3(0.24, 0.44, 0.83), + vec3(0.23, 0.57, 0.84), + vec3(0.22, 0.71, 0.84), + vec3(0.22, 0.85, 0.83), + vec3(0.21, 0.85, 0.72), + vec3(0.21, 0.85, 0.57), + vec3(0.20, 0.85, 0.42), + vec3(0.20, 0.85, 0.27), + vec3(0.27, 0.86, 0.19), + vec3(0.51, 0.85, 0.19), + vec3(0.57, 0.86, 0.19), + vec3(0.62, 0.85, 0.19), + vec3(0.67, 0.86, 0.20), + vec3(0.73, 0.85, 0.20), + vec3(0.78, 0.85, 0.20), + vec3(0.83, 0.85, 0.20), + vec3(0.85, 0.82, 0.20), + vec3(0.85, 0.76, 0.20), + vec3(0.85, 0.81, 0.20), + vec3(0.85, 0.65, 0.20), + vec3(0.84, 0.60, 0.21), + vec3(0.84, 0.56, 0.21), + vec3(0.84, 0.51, 0.21), + vec3(0.84, 0.46, 0.21), + vec3(0.84, 0.41, 0.21), + vec3(0.84, 0.36, 0.21), + vec3(0.84, 0.31, 0.21), + vec3(0.84, 0.27, 0.21), + vec3(0.83, 0.22, 0.22), + vec3(0.83, 0.22, 0.27), + vec3(0.83, 0.22, 0.32), + vec3(1.00, 0.63, 0.70)); +layout(push_constant, binding = 0, std430) uniform Params { + uvec2 screen_size; + uvec2 cluster_screen_size; + + uint cluster_shift; + uint cluster_type; + float z_near; + float z_far; + + bool orthogonal; + uint max_cluster_element_count_div_32; + uint pad1; + uint pad2; +} +params; + +layout(set = 0, binding = 1, std430) buffer restrict readonly ClusterData { + uint data[]; +} +cluster_data; + +layout(rgba16f, set = 0, binding = 2) uniform restrict writeonly image2D screen_buffer; +layout(set = 0, binding = 3) uniform texture2D depth_buffer; +layout(set = 0, binding = 4) uniform sampler depth_buffer_sampler; + +void main() { + uvec2 screen_pos = gl_GlobalInvocationID.xy; + if (any(greaterThanEqual(screen_pos, params.screen_size))) { + return; + } + + uvec2 cluster_pos = screen_pos >> params.cluster_shift; + + uint offset = cluster_pos.y * params.cluster_screen_size.x + cluster_pos.x; + offset += params.cluster_screen_size.x * params.cluster_screen_size.y * params.cluster_type; + offset *= (params.max_cluster_element_count_div_32 + 32); + + //depth buffers generally can't be accessed via image API + float depth = texelFetch(sampler2D(depth_buffer, depth_buffer_sampler), ivec2(screen_pos), 0).r * 2.0 - 1.0; + + if (params.orthogonal) { + depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; + } else { + depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near)); + } + depth /= params.z_far; + + uint slice = uint(clamp(floor(depth * 32.0), 0.0, 31.0)); + uint slice_minmax = cluster_data.data[offset + params.max_cluster_element_count_div_32 + slice]; + uint item_min = slice_minmax & 0xFFFF; + uint item_max = slice_minmax >> 16; + + uint item_count = 0; + for (uint i = 0; i < params.max_cluster_element_count_div_32; i++) { + uint slice_bits = cluster_data.data[offset + i]; + while (slice_bits != 0) { + uint bit = findLSB(slice_bits); + uint item = i * 32 + bit; + if ((item >= item_min && item < item_max)) { + item_count++; + } + slice_bits &= ~(1 << bit); + } + } + + item_count = min(item_count, 32); + + vec3 color = usage_gradient[item_count]; + + color = mix(color * 1.2, color * 0.3, float(slice) / 31.0); + + imageStore(screen_buffer, ivec2(screen_pos), vec4(color, 1.0)); +} diff --git a/servers/rendering/renderer_rd/shaders/cluster_render.glsl b/servers/rendering/renderer_rd/shaders/cluster_render.glsl new file mode 100644 index 0000000000..8723ea78e4 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/cluster_render.glsl @@ -0,0 +1,168 @@ +#[vertex] + +#version 450 + +VERSION_DEFINES + +layout(location = 0) in vec3 vertex_attrib; + +layout(location = 0) out float depth_interp; +layout(location = 1) out flat uint element_index; + +layout(push_constant, binding = 0, std430) uniform Params { + uint base_index; + uint pad0; + uint pad1; + uint pad2; +} +params; + +layout(set = 0, binding = 1, std140) uniform State { + mat4 projection; + + float inv_z_far; + uint screen_to_clusters_shift; // shift to obtain coordinates in block indices + uint cluster_screen_width; // + uint cluster_data_size; // how much data for a single cluster takes + + uint cluster_depth_offset; + uint pad0; + uint pad1; + uint pad2; +} +state; + +struct RenderElement { + uint type; //0-4 + bool touches_near; + bool touches_far; + uint original_index; + mat3x4 transform_inv; + vec3 scale; + uint pad; +}; + +layout(set = 0, binding = 2, std430) buffer restrict readonly RenderElements { + RenderElement data[]; +} +render_elements; + +void main() { + element_index = params.base_index + gl_InstanceIndex; + + vec3 vertex = vertex_attrib; + vertex *= render_elements.data[element_index].scale; + + vertex = vec4(vertex, 1.0) * render_elements.data[element_index].transform_inv; + depth_interp = -vertex.z; + + gl_Position = state.projection * vec4(vertex, 1.0); +} + +#[fragment] + +#version 450 + +VERSION_DEFINES + +#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic) && defined(GL_KHR_shader_subgroup_vote) + +#extension GL_KHR_shader_subgroup_ballot : enable +#extension GL_KHR_shader_subgroup_arithmetic : enable +#extension GL_KHR_shader_subgroup_vote : enable + +#define USE_SUBGROUPS +#endif + +layout(location = 0) in float depth_interp; +layout(location = 1) in flat uint element_index; + +layout(set = 0, binding = 1, std140) uniform State { + mat4 projection; + float inv_z_far; + uint screen_to_clusters_shift; // shift to obtain coordinates in block indices + uint cluster_screen_width; // + uint cluster_data_size; // how much data for a single cluster takes + uint cluster_depth_offset; + uint pad0; + uint pad1; + uint pad2; +} +state; + +//cluster data is layout linearly, each cell contains the follow information: +// - list of bits for every element to mark as used, so (max_elem_count/32)*4 uints +// - a uint for each element to mark the depth bits used when rendering (0-31) + +layout(set = 0, binding = 3, std430) buffer restrict ClusterRender { + uint data[]; +} +cluster_render; + +void main() { + //convert from screen to cluster + uvec2 cluster = uvec2(gl_FragCoord.xy) >> state.screen_to_clusters_shift; + + //get linear cluster offset from screen poss + uint cluster_offset = cluster.x + state.cluster_screen_width * cluster.y; + //multiply by data size to position at the beginning of the element list for this cluster + cluster_offset *= state.cluster_data_size; + + //find the current element in the list and plot the bit to mark it as used + uint usage_write_offset = cluster_offset + (element_index >> 5); + uint usage_write_bit = 1 << (element_index & 0x1F); + +#ifdef USE_SUBGROUPS + + uint cluster_thread_group_index; + + if (!gl_HelperInvocation) { + //http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf + + uvec4 mask; + + while (true) { + // find the cluster offset of the first active thread + // threads that did break; go inactive and no longer count + uint first = subgroupBroadcastFirst(cluster_offset); + // update the mask for thread that match this cluster + mask = subgroupBallot(first == cluster_offset); + if (first == cluster_offset) { + // This thread belongs to the group of threads that match this offset, + // so exit the loop. + break; + } + } + + cluster_thread_group_index = subgroupBallotExclusiveBitCount(mask); + + if (cluster_thread_group_index == 0) { + atomicOr(cluster_render.data[usage_write_offset], usage_write_bit); + } + } +#else + if (!gl_HelperInvocation) { + atomicOr(cluster_render.data[usage_write_offset], usage_write_bit); + } +#endif + //find the current element in the depth usage list and mark the current depth as used + float unit_depth = depth_interp * state.inv_z_far; + + uint z_bit = clamp(uint(floor(unit_depth * 32.0)), 0, 31); + + uint z_write_offset = cluster_offset + state.cluster_depth_offset + element_index; + uint z_write_bit = 1 << z_bit; + +#ifdef USE_SUBGROUPS + if (!gl_HelperInvocation) { + z_write_bit = subgroupOr(z_write_bit); //merge all Zs + if (cluster_thread_group_index == 0) { + atomicOr(cluster_render.data[z_write_offset], z_write_bit); + } + } +#else + if (!gl_HelperInvocation) { + atomicOr(cluster_render.data[z_write_offset], z_write_bit); + } +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/cluster_store.glsl b/servers/rendering/renderer_rd/shaders/cluster_store.glsl new file mode 100644 index 0000000000..5be0893c4f --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/cluster_store.glsl @@ -0,0 +1,119 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(push_constant, binding = 0, std430) uniform Params { + uint cluster_render_data_size; // how much data for a single cluster takes + uint max_render_element_count_div_32; //divided by 32 + uvec2 cluster_screen_size; + uint render_element_count_div_32; //divided by 32 + + uint max_cluster_element_count_div_32; //divided by 32 + uint pad1; + uint pad2; +} +params; + +layout(set = 0, binding = 1, std430) buffer restrict readonly ClusterRender { + uint data[]; +} +cluster_render; + +layout(set = 0, binding = 2, std430) buffer restrict ClusterStore { + uint data[]; +} +cluster_store; + +struct RenderElement { + uint type; //0-4 + bool touches_near; + bool touches_far; + uint original_index; + mat3x4 transform_inv; + vec3 scale; + uint pad; +}; + +layout(set = 0, binding = 3, std430) buffer restrict readonly RenderElements { + RenderElement data[]; +} +render_elements; + +void main() { + uvec2 pos = gl_GlobalInvocationID.xy; + if (any(greaterThanEqual(pos, params.cluster_screen_size))) { + return; + } + + //counter for each type of render_element + + //base offset for this cluster + uint base_offset = (pos.x + params.cluster_screen_size.x * pos.y); + uint src_offset = base_offset * params.cluster_render_data_size; + + uint render_element_offset = 0; + + //check all render_elements and see which one was written to + while (render_element_offset < params.render_element_count_div_32) { + uint bits = cluster_render.data[src_offset + render_element_offset]; + while (bits != 0) { + //if bits exist, check the render_element + uint index_bit = findLSB(bits); + uint index = render_element_offset * 32 + index_bit; + uint type = render_elements.data[index].type; + + uint z_range_offset = src_offset + params.max_render_element_count_div_32 + index; + uint z_range = cluster_render.data[z_range_offset]; + + //if object was written, z was written, but check just in case + if (z_range != 0) { //should always be > 0 + + uint from_z = findLSB(z_range); + uint to_z = findMSB(z_range) + 1; + + if (render_elements.data[index].touches_near) { + from_z = 0; + } + + if (render_elements.data[index].touches_far) { + to_z = 32; + } + + // find cluster offset in the buffer used for indexing in the renderer + uint dst_offset = (base_offset + type * (params.cluster_screen_size.x * params.cluster_screen_size.y)) * (params.max_cluster_element_count_div_32 + 32); + + uint orig_index = render_elements.data[index].original_index; + //store this index in the Z slices by setting the relevant bit + for (uint i = from_z; i < to_z; i++) { + uint slice_ofs = dst_offset + params.max_cluster_element_count_div_32 + i; + + uint minmax = cluster_store.data[slice_ofs]; + + if (minmax == 0) { + minmax = 0xFFFF; //min 0, max 0xFFFF + } + + uint elem_min = min(orig_index, minmax & 0xFFFF); + uint elem_max = max(orig_index + 1, minmax >> 16); //always store plus one, so zero means range is empty when not written to + + minmax = elem_min | (elem_max << 16); + cluster_store.data[slice_ofs] = minmax; + } + + uint store_word = orig_index >> 5; + uint store_bit = orig_index & 0x1F; + + //store the actual render_element index at the end, so the rendering code can reference it + cluster_store.data[dst_offset + store_word] |= 1 << store_bit; + } + + bits &= ~(1 << index_bit); //clear the bit to continue iterating + } + + render_element_offset++; + } +} diff --git a/servers/rendering/renderer_rd/shaders/gi.glsl b/servers/rendering/renderer_rd/shaders/gi.glsl index 8011dadc72..c2965f9874 100644 --- a/servers/rendering/renderer_rd/shaders/gi.glsl +++ b/servers/rendering/renderer_rd/shaders/gi.glsl @@ -99,7 +99,7 @@ layout(push_constant, binding = 0, std430) uniform Params { uint max_giprobes; bool high_quality_vct; - bool use_sdfgi; + uint pad2; bool orthogonal; vec3 ao_color; @@ -331,7 +331,7 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o } ambient_light.rgb = diffuse; -#if 1 + if (roughness < 0.2) { vec3 pos_to_uvw = 1.0 / sdfgi.grid_size; vec4 light_accum = vec4(0.0); @@ -363,7 +363,6 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o //ray_pos += ray_dir * (bias / sdfgi.cascades[cascade].to_cell); //bias to avoid self occlusion ray_pos += (ray_dir * 1.0 / max(abs_ray_dir.x, max(abs_ray_dir.y, abs_ray_dir.z)) + cam_normal * 1.4) * bias / sdfgi.cascades[cascade].to_cell; } - float softness = 0.2 + min(1.0, roughness * 5.0) * 4.0; //approximation to roughness so it does not seem like a hard fade while (length(ray_pos) < max_distance) { for (uint i = 0; i < sdfgi.max_cascades; i++) { @@ -434,8 +433,6 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o } } -#endif - reflection_light.rgb = specular; ambient_light.rgb *= sdfgi.energy; @@ -621,11 +618,12 @@ void main() { vec3 reflection = normalize(reflect(normalize(vertex), normal)); - if (params.use_sdfgi) { - sdfgi_process(vertex, normal, reflection, roughness, ambient_light, reflection_light); - } +#ifdef USE_SDFGI + sdfgi_process(vertex, normal, reflection, roughness, ambient_light, reflection_light); +#endif - if (params.max_giprobes > 0) { +#ifdef USE_GIPROBES + { uvec2 giprobe_tex = texelFetch(usampler2D(giprobe_buffer, linear_sampler), pos, 0).rg; roughness *= roughness; //find arbitrary tangent and bitangent, then build a matrix @@ -656,6 +654,7 @@ void main() { ambient_light = amb_accum; } } +#endif } imageStore(ambient_buffer, pos, ambient_light); diff --git a/servers/rendering/renderer_rd/shaders/giprobe.glsl b/servers/rendering/renderer_rd/shaders/giprobe.glsl index ea4237a45e..4f4753d147 100644 --- a/servers/rendering/renderer_rd/shaders/giprobe.glsl +++ b/servers/rendering/renderer_rd/shaders/giprobe.glsl @@ -208,6 +208,15 @@ float raymarch(float distance, float distance_adv, vec3 from, vec3 direction) { return occlusion; //max(0.0,distance); } +float get_omni_attenuation(float distance, float inv_range, float decay) { + float nd = distance * inv_range; + nd *= nd; + nd *= nd; // nd^4 + nd = max(1.0 - nd, 0.0); + nd *= nd; // nd^2 + return nd * pow(max(distance, 0.0001), -decay); +} + bool compute_light_vector(uint light, vec3 pos, out float attenuation, out vec3 light_pos) { if (lights.data[light].type == LIGHT_TYPE_DIRECTIONAL) { light_pos = pos - lights.data[light].direction * length(vec3(params.limits)); @@ -220,7 +229,7 @@ bool compute_light_vector(uint light, vec3 pos, out float attenuation, out vec3 return false; } - attenuation = pow(clamp(1.0 - distance / lights.data[light].radius, 0.0001, 1.0), lights.data[light].attenuation); + attenuation = get_omni_attenuation(distance, 1.0 / lights.data[light].radius, lights.data[light].attenuation); if (lights.data[light].type == LIGHT_TYPE_SPOT) { vec3 rel = normalize(pos - light_pos); diff --git a/servers/rendering/renderer_rd/shaders/particles.glsl b/servers/rendering/renderer_rd/shaders/particles.glsl index 926c7ef9fc..cb6d8dc7f6 100644 --- a/servers/rendering/renderer_rd/shaders/particles.glsl +++ b/servers/rendering/renderer_rd/shaders/particles.glsl @@ -173,7 +173,7 @@ uint hash(uint x) { return x; } -bool emit_particle(mat4 p_xform, vec3 p_velocity, vec4 p_color, vec4 p_custom, uint p_flags) { +bool emit_subparticle(mat4 p_xform, vec3 p_velocity, vec4 p_color, vec4 p_custom, uint p_flags) { if (!params.can_emit) { return false; } diff --git a/servers/rendering/renderer_rd/shaders/scene_forward.glsl b/servers/rendering/renderer_rd/shaders/scene_forward.glsl index a7fe86b029..c3e7e2acbf 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward.glsl @@ -16,7 +16,7 @@ layout(location = 0) in vec3 vertex_attrib; layout(location = 1) in vec3 normal_attrib; #endif -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) layout(location = 2) in vec4 tangent_attrib; #endif @@ -76,7 +76,7 @@ layout(location = 3) out vec2 uv_interp; layout(location = 4) out vec2 uv2_interp; #endif -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) layout(location = 5) out vec3 tangent_interp; layout(location = 6) out vec3 binormal_interp; #endif @@ -97,8 +97,6 @@ VERTEX_SHADER_GLOBALS invariant gl_Position; -layout(location = 7) flat out uint instance_index; - #ifdef MODE_DUAL_PARABOLOID layout(location = 8) out float dp_clip; @@ -106,22 +104,27 @@ layout(location = 8) out float dp_clip; #endif void main() { - instance_index = draw_call.instance_index; vec4 instance_custom = vec4(0.0); #if defined(COLOR_USED) color_interp = color_attrib; #endif - mat4 world_matrix = instances.data[instance_index].transform; - mat3 world_normal_matrix = mat3(instances.data[instance_index].normal_transform); + mat4 world_matrix = draw_call.transform; - if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH)) { + mat3 world_normal_matrix; + if (bool(draw_call.flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) { + world_normal_matrix = inverse(mat3(world_matrix)); + } else { + world_normal_matrix = mat3(world_matrix); + } + + if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH)) { //multimesh, instances are for it - uint offset = (instances.data[instance_index].flags >> INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT) & INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK; + uint offset = (draw_call.flags >> INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT) & INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK; offset *= gl_InstanceIndex; mat4 matrix; - if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) { + if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) { matrix = mat4(transforms.data[offset + 0], transforms.data[offset + 1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); offset += 2; } else { @@ -129,14 +132,14 @@ void main() { offset += 3; } - if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_HAS_COLOR)) { + if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_HAS_COLOR)) { #ifdef COLOR_USED color_interp *= transforms.data[offset]; #endif offset += 1; } - if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA)) { + if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA)) { instance_custom = transforms.data[offset]; } @@ -144,10 +147,6 @@ void main() { matrix = transpose(matrix); world_matrix = world_matrix * matrix; world_normal_matrix = world_normal_matrix * mat3(matrix); - - } else { - //not a multimesh, instances are for multiple draw calls - instance_index += gl_InstanceIndex; } vec3 vertex = vertex_attrib; @@ -155,14 +154,14 @@ void main() { vec3 normal = normal_attrib * 2.0 - 1.0; #endif -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) vec3 tangent = tangent_attrib.xyz * 2.0 - 1.0; float binormalf = tangent_attrib.a * 2.0 - 1.0; vec3 binormal = normalize(cross(normal, tangent) * binormalf); #endif #if 0 - if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_SKELETON)) { + if (bool(draw_call.flags & INSTANCE_FLAGS_SKELETON)) { //multimesh, instances are for it uvec2 bones_01 = uvec2(bone_attrib.x & 0xFFFF, bone_attrib.x >> 16) * 3; @@ -179,7 +178,7 @@ void main() { vertex = (vec4(vertex, 1.0) * m).xyz; normal = (vec4(normal, 0.0) * m).xyz; -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) tangent = (vec4(tangent, 0.0) * m).xyz; binormal = (vec4(binormal, 0.0) * m).xyz; @@ -208,7 +207,7 @@ void main() { normal = world_normal_matrix * normal; -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) tangent = world_normal_matrix * tangent; binormal = world_normal_matrix * binormal; @@ -239,7 +238,7 @@ VERTEX_SHADER_CODE #endif -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) binormal = modelview_normal * binormal; tangent = modelview_normal * tangent; @@ -251,7 +250,7 @@ VERTEX_SHADER_CODE vertex = (scene_data.inv_camera_matrix * vec4(vertex, 1.0)).xyz; normal = mat3(scene_data.inverse_normal_matrix) * normal; -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) binormal = mat3(scene_data.camera_inverse_binormal_matrix) * binormal; tangent = mat3(scene_data.camera_inverse_tangent_matrix) * tangent; @@ -263,7 +262,7 @@ VERTEX_SHADER_CODE normal_interp = normal; #endif -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) tangent_interp = tangent; binormal_interp = binormal; #endif @@ -305,7 +304,7 @@ VERTEX_SHADER_CODE #endif #ifdef MODE_RENDER_MATERIAL if (scene_data.material_uv2_mode) { - gl_Position.xy = (uv2_attrib.xy + draw_call.bake_uv2_offset) * 2.0 - 1.0; + gl_Position.xy = (uv2_attrib.xy + draw_call.lightmap_uv_scale.xy) * 2.0 - 1.0; gl_Position.z = 0.00001; gl_Position.w = 1.0; } @@ -340,13 +339,11 @@ layout(location = 3) in vec2 uv_interp; layout(location = 4) in vec2 uv2_interp; #endif -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) layout(location = 5) in vec3 tangent_interp; layout(location = 6) in vec3 binormal_interp; #endif -layout(location = 7) flat in uint instance_index; - #ifdef MODE_DUAL_PARABOLOID layout(location = 8) in float dp_clip; @@ -355,8 +352,7 @@ layout(location = 8) in float dp_clip; //defines to keep compatibility with vertex -#define world_matrix instances.data[instance_index].transform -#define world_normal_matrix instances.data[instance_index].normal_transform +#define world_matrix draw_call.transform #define projection_matrix scene_data.projection_matrix #if defined(ENABLE_SSS) && defined(ENABLE_TRANSMITTANCE) @@ -545,7 +541,7 @@ vec3 F0(float metallic, float specular, vec3 albedo) { return mix(vec3(dielectric), albedo, vec3(metallic)); } -void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float attenuation, vec3 shadow_attenuation, vec3 diffuse_color, float roughness, float metallic, float specular, float specular_blob_intensity, +void light_compute(vec3 N, vec3 L, vec3 V, vec3 light_color, float attenuation, vec3 f0, uint orms, float specular_amount, #ifdef LIGHT_BACKLIGHT_USED vec3 backlight, #endif @@ -557,7 +553,7 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float atte float transmittance_z, #endif #ifdef LIGHT_RIM_USED - float rim, float rim_tint, + float rim, float rim_tint, vec3 rim_color, #endif #ifdef LIGHT_CLEARCOAT_USED float clearcoat, float clearcoat_gloss, @@ -565,6 +561,9 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float atte #ifdef LIGHT_ANISOTROPY_USED vec3 B, vec3 T, float anisotropy, #endif +#ifdef USE_SOFT_SHADOWS + float A, +#endif #ifdef USE_SHADOW_TO_OPACITY inout float alpha, #endif @@ -574,7 +573,6 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float atte // light is written by the light shader vec3 normal = N; - vec3 albedo = diffuse_color; vec3 light = L; vec3 view = V; @@ -585,7 +583,12 @@ LIGHT_SHADER_CODE /* clang-format on */ #else + +#ifdef USE_SOFT_SHADOWS float NdotL = min(A + dot(N, L), 1.0); +#else + float NdotL = dot(N, L); +#endif float cNdotL = max(NdotL, 0.0); // clamped NdotL float NdotV = dot(N, V); float cNdotV = max(NdotV, 0.0); @@ -595,14 +598,25 @@ LIGHT_SHADER_CODE #endif #if defined(SPECULAR_BLINN) || defined(SPECULAR_SCHLICK_GGX) || defined(LIGHT_CLEARCOAT_USED) +#ifdef USE_SOFT_SHADOWS float cNdotH = clamp(A + dot(N, H), 0.0, 1.0); +#else + float cNdotH = clamp(dot(N, H), 0.0, 1.0); +#endif #endif #if defined(DIFFUSE_BURLEY) || defined(SPECULAR_SCHLICK_GGX) || defined(LIGHT_CLEARCOAT_USED) +#ifdef USE_SOFT_SHADOWS float cLdotH = clamp(A + dot(L, H), 0.0, 1.0); +#else + float cLdotH = clamp(dot(L, H), 0.0, 1.0); +#endif #endif + float metallic = unpackUnorm4x8(orms).z; if (metallic < 1.0) { + float roughness = unpackUnorm4x8(orms).y; + #if defined(DIFFUSE_OREN_NAYAR) vec3 diffuse_brdf_NL; #else @@ -612,23 +626,6 @@ LIGHT_SHADER_CODE #if defined(DIFFUSE_LAMBERT_WRAP) // energy conserving lambert wrap shader diffuse_brdf_NL = max(0.0, (NdotL + roughness) / ((1.0 + roughness) * (1.0 + roughness))); - -#elif defined(DIFFUSE_OREN_NAYAR) - - { - // see http://mimosa-pudica.net/improved-oren-nayar.html - float LdotV = dot(L, V); - - float s = LdotV - NdotL * NdotV; - float t = mix(1.0, max(NdotL, NdotV), step(0.0, s)); - - float sigma2 = roughness * roughness; // TODO: this needs checking - vec3 A = 1.0 + sigma2 * (-0.5 / (sigma2 + 0.33) + 0.17 * diffuse_color / (sigma2 + 0.13)); - float B = 0.45 * sigma2 / (sigma2 + 0.09); - - diffuse_brdf_NL = cNdotL * (A + vec3(B) * s / t) * (1.0 / M_PI); - } - #elif defined(DIFFUSE_TOON) diffuse_brdf_NL = smoothstep(-roughness, max(roughness, 0.01), NdotL); @@ -656,15 +653,15 @@ LIGHT_SHADER_CODE diffuse_brdf_NL = cNdotL * (1.0 / M_PI); #endif - diffuse_light += light_color * diffuse_color * shadow_attenuation * diffuse_brdf_NL * attenuation; + diffuse_light += light_color * diffuse_brdf_NL * attenuation; #if defined(LIGHT_BACKLIGHT_USED) - diffuse_light += light_color * diffuse_color * (vec3(1.0 / M_PI) - diffuse_brdf_NL) * backlight * attenuation; + diffuse_light += light_color * (vec3(1.0 / M_PI) - diffuse_brdf_NL) * backlight * attenuation; #endif #if defined(LIGHT_RIM_USED) float rim_light = pow(max(0.0, 1.0 - cNdotV), max(0.0, (1.0 - roughness) * 16.0)); - diffuse_light += rim_light * rim * mix(vec3(1.0), diffuse_color, rim_tint) * light_color; + diffuse_light += rim_light * rim * mix(vec3(1.0), rim_color, rim_tint) * light_color; #endif #ifdef LIGHT_TRANSMITTANCE_USED @@ -682,7 +679,7 @@ LIGHT_SHADER_CODE vec3(0.358, 0.004, 0.0) * exp(dd / 1.99) + vec3(0.078, 0.0, 0.0) * exp(dd / 7.41); - diffuse_light += profile * transmittance_color.a * diffuse_color * light_color * clamp(transmittance_boost - NdotL, 0.0, 1.0) * (1.0 / M_PI) * attenuation; + diffuse_light += profile * transmittance_color.a * light_color * clamp(transmittance_boost - NdotL, 0.0, 1.0) * (1.0 / M_PI); } #else @@ -692,7 +689,7 @@ LIGHT_SHADER_CODE fade = pow(max(0.0, 1.0 - fade), transmittance_curve); fade *= clamp(transmittance_boost - NdotL, 0.0, 1.0); - diffuse_light += diffuse_color * transmittance_color.rgb * light_color * (1.0 / M_PI) * transmittance_color.a * fade * attenuation; + diffuse_light += transmittance_color.rgb * light_color * (1.0 / M_PI) * transmittance_color.a * fade; } #endif //SSS_MODE_SKIN @@ -700,6 +697,7 @@ LIGHT_SHADER_CODE #endif //LIGHT_TRANSMITTANCE_USED } + float roughness = unpackUnorm4x8(orms).y; if (roughness > 0.0) { // FIXME: roughness == 0 should not disable specular light entirely // D @@ -712,7 +710,7 @@ LIGHT_SHADER_CODE blinn *= (shininess + 8.0) * (1.0 / (8.0 * M_PI)); float intensity = blinn; - specular_light += light_color * shadow_attenuation * intensity * specular_blob_intensity * attenuation; + specular_light += light_color * intensity * attenuation * specular_amount; #elif defined(SPECULAR_PHONG) @@ -723,7 +721,7 @@ LIGHT_SHADER_CODE phong *= (shininess + 8.0) * (1.0 / (8.0 * M_PI)); float intensity = (phong) / max(4.0 * cNdotV * cNdotL, 0.75); - specular_light += light_color * shadow_attenuation * intensity * specular_blob_intensity * attenuation; + specular_light += light_color * intensity * attenuation * specular_amount; #elif defined(SPECULAR_TOON) @@ -732,7 +730,7 @@ LIGHT_SHADER_CODE float mid = 1.0 - roughness; mid *= mid; float intensity = smoothstep(mid - roughness * 0.5, mid + roughness * 0.5, RdotV) * mid; - diffuse_light += light_color * shadow_attenuation * intensity * specular_blob_intensity * attenuation; // write to diffuse_light, as in toon shading you generally want no reflection + diffuse_light += light_color * intensity * attenuation * specular_amount; // write to diffuse_light, as in toon shading you generally want no reflection #elif defined(SPECULAR_DISABLED) // none.. @@ -757,13 +755,12 @@ LIGHT_SHADER_CODE float G = G_GGX_2cos(cNdotL, alpha_ggx) * G_GGX_2cos(cNdotV, alpha_ggx); #endif // F - vec3 f0 = F0(metallic, specular, diffuse_color); float cLdotH5 = SchlickFresnel(cLdotH); vec3 F = mix(vec3(cLdotH5), vec3(1.0), f0); vec3 specular_brdf_NL = cNdotL * D * F * G; - specular_light += specular_brdf_NL * light_color * shadow_attenuation * specular_blob_intensity * attenuation; + specular_light += specular_brdf_NL * light_color * attenuation * specular_amount; #endif #if defined(LIGHT_CLEARCOAT_USED) @@ -777,12 +774,12 @@ LIGHT_SHADER_CODE float clearcoat_specular_brdf_NL = 0.25 * clearcoat * Gr * Fr * Dr * cNdotL; - specular_light += clearcoat_specular_brdf_NL * light_color * shadow_attenuation * specular_blob_intensity * attenuation; + specular_light += clearcoat_specular_brdf_NL * light_color * attenuation * specular_amount; #endif } #ifdef USE_SHADOW_TO_OPACITY - alpha = min(alpha, clamp(1.0 - length(shadow_attenuation * attenuation), 0.0, 1.0)); + alpha = min(alpha, clamp(1.0 - attenuation), 0.0, 1.0)); #endif #endif //defined(USE_LIGHT_SHADER_CODE) @@ -895,69 +892,39 @@ float sample_directional_soft_shadow(texture2D shadow, vec3 pssm_coord, vec2 tex #endif //USE_NO_SHADOWS -void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 albedo, float roughness, float metallic, float specular, float p_blob_intensity, -#ifdef LIGHT_BACKLIGHT_USED - vec3 backlight, -#endif -#ifdef LIGHT_TRANSMITTANCE_USED - vec4 transmittance_color, - float transmittance_depth, - float transmittance_curve, - float transmittance_boost, -#endif -#ifdef LIGHT_RIM_USED - float rim, float rim_tint, -#endif -#ifdef LIGHT_CLEARCOAT_USED - float clearcoat, float clearcoat_gloss, -#endif -#ifdef LIGHT_ANISOTROPY_USED - vec3 binormal, vec3 tangent, float anisotropy, -#endif -#ifdef USE_SHADOW_TO_OPACITY - inout float alpha, -#endif - inout vec3 diffuse_light, inout vec3 specular_light) { - vec3 light_rel_vec = lights.data[idx].position - vertex; - float light_length = length(light_rel_vec); - float normalized_distance = light_length * lights.data[idx].inv_radius; - vec2 attenuation_energy = unpackHalf2x16(lights.data[idx].attenuation_energy); - float omni_attenuation = pow(max(1.0 - normalized_distance, 0.0), attenuation_energy.x); - float light_attenuation = omni_attenuation; - vec3 shadow_attenuation = vec3(1.0); - vec4 color_specular = unpackUnorm4x8(lights.data[idx].color_specular); - color_specular.rgb *= attenuation_energy.y; - float size_A = 0.0; - - if (lights.data[idx].size > 0.0) { - float t = lights.data[idx].size / max(0.001, light_length); - size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t)); - } - -#ifdef LIGHT_TRANSMITTANCE_USED - float transmittance_z = transmittance_depth; //no transmittance by default -#endif +float get_omni_attenuation(float distance, float inv_range, float decay) { + float nd = distance * inv_range; + nd *= nd; + nd *= nd; // nd^4 + nd = max(1.0 - nd, 0.0); + nd *= nd; // nd^2 + return nd * pow(max(distance, 0.0001), -decay); +} +float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { #ifndef USE_NO_SHADOWS - vec4 shadow_color_enabled = unpackUnorm4x8(lights.data[idx].shadow_color_enabled); - if (shadow_color_enabled.w > 0.5) { + if (omni_lights.data[idx].shadow_enabled) { // there is a shadowmap + vec3 light_rel_vec = omni_lights.data[idx].position - vertex; + float light_length = length(light_rel_vec); + vec4 v = vec4(vertex, 1.0); - vec4 splane = (lights.data[idx].shadow_matrix * v); + vec4 splane = (omni_lights.data[idx].shadow_matrix * v); float shadow_len = length(splane.xyz); //need to remember shadow len from here { - vec3 nofs = normal_interp * lights.data[idx].shadow_normal_bias / lights.data[idx].inv_radius; + vec3 nofs = normal_interp * omni_lights.data[idx].shadow_normal_bias / omni_lights.data[idx].inv_radius; nofs *= (1.0 - max(0.0, dot(normalize(light_rel_vec), normalize(normal_interp)))); v.xyz += nofs; - splane = (lights.data[idx].shadow_matrix * v); + splane = (omni_lights.data[idx].shadow_matrix * v); } float shadow; - if (lights.data[idx].soft_shadow_size > 0.0) { +#ifdef USE_SOFT_SHADOWS + if (omni_lights.data[idx].soft_shadow_size > 0.0) { //soft shadow //find blocker @@ -977,10 +944,10 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); vec3 tangent = normalize(cross(v0, normal)); vec3 bitangent = normalize(cross(tangent, normal)); - float z_norm = shadow_len * lights.data[idx].inv_radius; + float z_norm = shadow_len * omni_lights.data[idx].inv_radius; - tangent *= lights.data[idx].soft_shadow_size * lights.data[idx].soft_shadow_scale; - bitangent *= lights.data[idx].soft_shadow_size * lights.data[idx].soft_shadow_scale; + tangent *= omni_lights.data[idx].soft_shadow_size * omni_lights.data[idx].soft_shadow_scale; + bitangent *= omni_lights.data[idx].soft_shadow_size * omni_lights.data[idx].soft_shadow_scale; for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) { vec2 disk = disk_rotation * scene_data.penumbra_shadow_kernel[i].xy; @@ -988,7 +955,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v vec3 pos = splane.xyz + tangent * disk.x + bitangent * disk.y; pos = normalize(pos); - vec4 uv_rect = lights.data[idx].atlas_rect; + vec4 uv_rect = omni_lights.data[idx].atlas_rect; if (pos.z >= 0.0) { pos.z += 1.0; @@ -1016,7 +983,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v tangent *= penumbra; bitangent *= penumbra; - z_norm -= lights.data[idx].inv_radius * lights.data[idx].shadow_bias; + z_norm -= omni_lights.data[idx].inv_radius * omni_lights.data[idx].shadow_bias; shadow = 0.0; for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) { @@ -1024,7 +991,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v vec3 pos = splane.xyz + tangent * disk.x + bitangent * disk.y; pos = normalize(pos); - vec4 uv_rect = lights.data[idx].atlas_rect; + vec4 uv_rect = omni_lights.data[idx].atlas_rect; if (pos.z >= 0.0) { pos.z += 1.0; @@ -1047,8 +1014,9 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v shadow = 1.0; } } else { +#endif splane.xyz = normalize(splane.xyz); - vec4 clamp_rect = lights.data[idx].atlas_rect; + vec4 clamp_rect = omni_lights.data[idx].atlas_rect; if (splane.z >= 0.0) { splane.z += 1.0; @@ -1062,101 +1030,149 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v splane.xy /= splane.z; splane.xy = splane.xy * 0.5 + 0.5; - splane.z = (shadow_len - lights.data[idx].shadow_bias) * lights.data[idx].inv_radius; + splane.z = (shadow_len - omni_lights.data[idx].shadow_bias) * omni_lights.data[idx].inv_radius; splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; splane.w = 1.0; //needed? i think it should be 1 already - shadow = sample_pcf_shadow(shadow_atlas, lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, splane); + shadow = sample_pcf_shadow(shadow_atlas, omni_lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, splane); +#ifdef USE_SOFT_SHADOWS } +#endif + + return shadow; + } +#endif + return 1.0; +} + +void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 f0, uint orms, float shadow, +#ifdef LIGHT_BACKLIGHT_USED + vec3 backlight, +#endif #ifdef LIGHT_TRANSMITTANCE_USED - { - vec4 clamp_rect = lights.data[idx].atlas_rect; + vec4 transmittance_color, + float transmittance_depth, + float transmittance_curve, + float transmittance_boost, +#endif +#ifdef LIGHT_RIM_USED + float rim, float rim_tint, vec3 rim_color, +#endif +#ifdef LIGHT_CLEARCOAT_USED + float clearcoat, float clearcoat_gloss, +#endif +#ifdef LIGHT_ANISOTROPY_USED + vec3 binormal, vec3 tangent, float anisotropy, +#endif +#ifdef USE_SHADOW_TO_OPACITY + inout float alpha, +#endif + inout vec3 diffuse_light, inout vec3 specular_light) { + vec3 light_rel_vec = omni_lights.data[idx].position - vertex; + float light_length = length(light_rel_vec); + float omni_attenuation = get_omni_attenuation(light_length, omni_lights.data[idx].inv_radius, omni_lights.data[idx].attenuation); + float light_attenuation = omni_attenuation; + vec3 color = omni_lights.data[idx].color; - //redo shadowmapping, but shrink the model a bit to avoid arctifacts - splane = (lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * lights.data[idx].transmittance_bias, 1.0)); +#ifdef USE_SOFT_SHADOWS + float size_A = 0.0; - shadow_len = length(splane.xyz); - splane = normalize(splane.xyz); + if (omni_lights.data[idx].size > 0.0) { + float t = omni_lights.data[idx].size / max(0.001, light_length); + size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t)); + } +#endif - if (splane.z >= 0.0) { - splane.z += 1.0; +#ifdef LIGHT_TRANSMITTANCE_USED + float transmittance_z = transmittance_depth; //no transmittance by default + transmittance_color.a *= light_attenuation; + { + vec4 clamp_rect = omni_lights.data[idx].atlas_rect; - } else { - splane.z = 1.0 - splane.z; - } + //redo shadowmapping, but shrink the model a bit to avoid arctifacts + vec4 splane = (omni_lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * omni_lights.data[idx].transmittance_bias, 1.0)); - splane.xy /= splane.z; - splane.xy = splane.xy * 0.5 + 0.5; - splane.z = shadow_len * lights.data[idx].inv_radius; - splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; - splane.w = 1.0; //needed? i think it should be 1 already + shadow_len = length(splane.xyz); + splane = normalize(splane.xyz); + + if (splane.z >= 0.0) { + splane.z += 1.0; - float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r; - transmittance_z = (splane.z - shadow_z) / lights.data[idx].inv_radius; + } else { + splane.z = 1.0 - splane.z; } -#endif - vec3 no_shadow = vec3(1.0); + splane.xy /= splane.z; + splane.xy = splane.xy * 0.5 + 0.5; + splane.z = shadow_len * omni_lights.data[idx].inv_radius; + splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; + splane.w = 1.0; //needed? i think it should be 1 already - if (lights.data[idx].projector_rect != vec4(0.0)) { - vec3 local_v = (lights.data[idx].shadow_matrix * vec4(vertex, 1.0)).xyz; - local_v = normalize(local_v); + float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r; + transmittance_z = (splane.z - shadow_z) / omni_lights.data[idx].inv_radius; + } +#endif - vec4 atlas_rect = lights.data[idx].projector_rect; +#if 0 - if (local_v.z >= 0.0) { - local_v.z += 1.0; - atlas_rect.y += atlas_rect.w; + if (omni_lights.data[idx].projector_rect != vec4(0.0)) { + vec3 local_v = (omni_lights.data[idx].shadow_matrix * vec4(vertex, 1.0)).xyz; + local_v = normalize(local_v); - } else { - local_v.z = 1.0 - local_v.z; - } + vec4 atlas_rect = omni_lights.data[idx].projector_rect; - local_v.xy /= local_v.z; - local_v.xy = local_v.xy * 0.5 + 0.5; - vec2 proj_uv = local_v.xy * atlas_rect.zw; + if (local_v.z >= 0.0) { + local_v.z += 1.0; + atlas_rect.y += atlas_rect.w; - vec2 proj_uv_ddx; - vec2 proj_uv_ddy; - { - vec3 local_v_ddx = (lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddx, 1.0)).xyz; - local_v_ddx = normalize(local_v_ddx); + } else { + local_v.z = 1.0 - local_v.z; + } - if (local_v_ddx.z >= 0.0) { - local_v_ddx.z += 1.0; - } else { - local_v_ddx.z = 1.0 - local_v_ddx.z; - } + local_v.xy /= local_v.z; + local_v.xy = local_v.xy * 0.5 + 0.5; + vec2 proj_uv = local_v.xy * atlas_rect.zw; - local_v_ddx.xy /= local_v_ddx.z; - local_v_ddx.xy = local_v_ddx.xy * 0.5 + 0.5; + vec2 proj_uv_ddx; + vec2 proj_uv_ddy; + { + vec3 local_v_ddx = (omni_lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddx, 1.0)).xyz; + local_v_ddx = normalize(local_v_ddx); - proj_uv_ddx = local_v_ddx.xy * atlas_rect.zw - proj_uv; + if (local_v_ddx.z >= 0.0) { + local_v_ddx.z += 1.0; + } else { + local_v_ddx.z = 1.0 - local_v_ddx.z; + } - vec3 local_v_ddy = (lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddy, 1.0)).xyz; - local_v_ddy = normalize(local_v_ddy); + local_v_ddx.xy /= local_v_ddx.z; + local_v_ddx.xy = local_v_ddx.xy * 0.5 + 0.5; - if (local_v_ddy.z >= 0.0) { - local_v_ddy.z += 1.0; - } else { - local_v_ddy.z = 1.0 - local_v_ddy.z; - } + proj_uv_ddx = local_v_ddx.xy * atlas_rect.zw - proj_uv; - local_v_ddy.xy /= local_v_ddy.z; - local_v_ddy.xy = local_v_ddy.xy * 0.5 + 0.5; + vec3 local_v_ddy = (omni_lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddy, 1.0)).xyz; + local_v_ddy = normalize(local_v_ddy); - proj_uv_ddy = local_v_ddy.xy * atlas_rect.zw - proj_uv; + if (local_v_ddy.z >= 0.0) { + local_v_ddy.z += 1.0; + } else { + local_v_ddy.z = 1.0 - local_v_ddy.z; } - vec4 proj = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), proj_uv + atlas_rect.xy, proj_uv_ddx, proj_uv_ddy); - no_shadow = mix(no_shadow, proj.rgb, proj.a); + local_v_ddy.xy /= local_v_ddy.z; + local_v_ddy.xy = local_v_ddy.xy * 0.5 + 0.5; + + proj_uv_ddy = local_v_ddy.xy * atlas_rect.zw - proj_uv; } - shadow_attenuation = mix(shadow_color_enabled.rgb, no_shadow, shadow); + vec4 proj = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), proj_uv + atlas_rect.xy, proj_uv_ddx, proj_uv_ddy); + no_shadow = mix(no_shadow, proj.rgb, proj.a); } -#endif //USE_NO_SHADOWS +#endif + + light_attenuation *= shadow; - light_compute(normal, normalize(light_rel_vec), eye_vec, size_A, color_specular.rgb, light_attenuation, shadow_attenuation, albedo, roughness, metallic, specular, color_specular.a * p_blob_intensity, + light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms, omni_lights.data[idx].specular_amount, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -1168,7 +1184,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v transmittance_z, #endif #ifdef LIGHT_RIM_USED - rim * omni_attenuation, rim_tint, + rim * omni_attenuation, rim_tint, rim_color, #endif #ifdef LIGHT_CLEARCOAT_USED clearcoat, clearcoat_gloss, @@ -1176,6 +1192,9 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v #ifdef LIGHT_ANISOTROPY_USED binormal, tangent, anisotropy, #endif +#ifdef USE_SOFT_SHADOWS + size_A, +#endif #ifdef USE_SHADOW_TO_OPACITY alpha, #endif @@ -1183,89 +1202,39 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v specular_light); } -void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 albedo, float roughness, float metallic, float specular, float p_blob_intensity, -#ifdef LIGHT_BACKLIGHT_USED - vec3 backlight, -#endif -#ifdef LIGHT_TRANSMITTANCE_USED - vec4 transmittance_color, - float transmittance_depth, - float transmittance_curve, - float transmittance_boost, -#endif -#ifdef LIGHT_RIM_USED - float rim, float rim_tint, -#endif -#ifdef LIGHT_CLEARCOAT_USED - float clearcoat, float clearcoat_gloss, -#endif -#ifdef LIGHT_ANISOTROPY_USED - vec3 binormal, vec3 tangent, float anisotropy, -#endif -#ifdef USE_SHADOW_TO_OPACITY - inout float alpha, -#endif - inout vec3 diffuse_light, - inout vec3 specular_light) { - vec3 light_rel_vec = lights.data[idx].position - vertex; - float light_length = length(light_rel_vec); - float normalized_distance = light_length * lights.data[idx].inv_radius; - vec2 attenuation_energy = unpackHalf2x16(lights.data[idx].attenuation_energy); - float spot_attenuation = pow(max(1.0 - normalized_distance, 0.001), attenuation_energy.x); - vec3 spot_dir = lights.data[idx].direction; - vec2 spot_att_angle = unpackHalf2x16(lights.data[idx].cone_attenuation_angle); - float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_att_angle.y); - float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_att_angle.y)); - spot_attenuation *= 1.0 - pow(spot_rim, spot_att_angle.x); - float light_attenuation = spot_attenuation; - vec3 shadow_attenuation = vec3(1.0); - vec4 color_specular = unpackUnorm4x8(lights.data[idx].color_specular); - color_specular.rgb *= attenuation_energy.y; - - float size_A = 0.0; - - if (lights.data[idx].size > 0.0) { - float t = lights.data[idx].size / max(0.001, light_length); - size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t)); - } -/* - if (lights.data[idx].atlas_rect!=vec4(0.0)) { - //use projector texture - } - */ -#ifdef LIGHT_TRANSMITTANCE_USED - float transmittance_z = transmittance_depth; -#endif - +float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) { #ifndef USE_NO_SHADOWS - vec4 shadow_color_enabled = unpackUnorm4x8(lights.data[idx].shadow_color_enabled); - if (shadow_color_enabled.w > 0.5) { + if (spot_lights.data[idx].shadow_enabled) { + vec3 light_rel_vec = spot_lights.data[idx].position - vertex; + float light_length = length(light_rel_vec); + vec3 spot_dir = spot_lights.data[idx].direction; //there is a shadowmap vec4 v = vec4(vertex, 1.0); - v.xyz -= spot_dir * lights.data[idx].shadow_bias; + v.xyz -= spot_dir * spot_lights.data[idx].shadow_bias; - float z_norm = dot(spot_dir, -light_rel_vec) * lights.data[idx].inv_radius; + float z_norm = dot(spot_dir, -light_rel_vec) * spot_lights.data[idx].inv_radius; float depth_bias_scale = 1.0 / (max(0.0001, z_norm)); //the closer to the light origin, the more you have to offset to reach 1px in the map - vec3 normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(spot_dir, -normalize(normal_interp)))) * lights.data[idx].shadow_normal_bias * depth_bias_scale; + vec3 normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(spot_dir, -normalize(normal_interp)))) * spot_lights.data[idx].shadow_normal_bias * depth_bias_scale; normal_bias -= spot_dir * dot(spot_dir, normal_bias); //only XY, no Z v.xyz += normal_bias; //adjust with bias - z_norm = dot(spot_dir, v.xyz - lights.data[idx].position) * lights.data[idx].inv_radius; + z_norm = dot(spot_dir, v.xyz - spot_lights.data[idx].position) * spot_lights.data[idx].inv_radius; float shadow; - vec4 splane = (lights.data[idx].shadow_matrix * v); + vec4 splane = (spot_lights.data[idx].shadow_matrix * v); splane /= splane.w; - if (lights.data[idx].soft_shadow_size > 0.0) { +#ifdef USE_SOFT_SHADOWS + if (spot_lights.data[idx].soft_shadow_size > 0.0) { //soft shadow //find blocker - vec2 shadow_uv = splane.xy * lights.data[idx].atlas_rect.zw + lights.data[idx].atlas_rect.xy; + vec2 shadow_uv = splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy; float blocker_count = 0.0; float blocker_average = 0.0; @@ -1278,11 +1247,11 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v disk_rotation = mat2(vec2(cr, -sr), vec2(sr, cr)); } - float uv_size = lights.data[idx].soft_shadow_size * z_norm * lights.data[idx].soft_shadow_scale; - vec2 clamp_max = lights.data[idx].atlas_rect.xy + lights.data[idx].atlas_rect.zw; + float uv_size = spot_lights.data[idx].soft_shadow_size * z_norm * spot_lights.data[idx].soft_shadow_scale; + vec2 clamp_max = spot_lights.data[idx].atlas_rect.xy + spot_lights.data[idx].atlas_rect.zw; for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) { vec2 suv = shadow_uv + (disk_rotation * scene_data.penumbra_shadow_kernel[i].xy) * uv_size; - suv = clamp(suv, lights.data[idx].atlas_rect.xy, clamp_max); + suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max); float d = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), suv, 0.0).r; if (d < z_norm) { blocker_average += d; @@ -1299,7 +1268,7 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v shadow = 0.0; for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) { vec2 suv = shadow_uv + (disk_rotation * scene_data.penumbra_shadow_kernel[i].xy) * uv_size; - suv = clamp(suv, lights.data[idx].atlas_rect.xy, clamp_max); + suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max); shadow += textureProj(sampler2DShadow(shadow_atlas, shadow_sampler), vec4(suv, z_norm, 1.0)); } @@ -1311,54 +1280,93 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v } } else { +#endif //hard shadow - vec4 shadow_uv = vec4(splane.xy * lights.data[idx].atlas_rect.zw + lights.data[idx].atlas_rect.xy, z_norm, 1.0); + vec4 shadow_uv = vec4(splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy, z_norm, 1.0); - shadow = sample_pcf_shadow(shadow_atlas, lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, shadow_uv); + shadow = sample_pcf_shadow(shadow_atlas, spot_lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, shadow_uv); +#ifdef USE_SOFT_SHADOWS } +#endif - vec3 no_shadow = vec3(1.0); + return shadow; + } - if (lights.data[idx].projector_rect != vec4(0.0)) { - splane = (lights.data[idx].shadow_matrix * vec4(vertex, 1.0)); - splane /= splane.w; +#endif //USE_NO_SHADOWS - vec2 proj_uv = splane.xy * lights.data[idx].projector_rect.zw; + return 1.0; +} - //ensure we have proper mipmaps - vec4 splane_ddx = (lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddx, 1.0)); - splane_ddx /= splane_ddx.w; - vec2 proj_uv_ddx = splane_ddx.xy * lights.data[idx].projector_rect.zw - proj_uv; +void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 f0, uint orms, float shadow, +#ifdef LIGHT_BACKLIGHT_USED + vec3 backlight, +#endif +#ifdef LIGHT_TRANSMITTANCE_USED + vec4 transmittance_color, + float transmittance_depth, + float transmittance_curve, + float transmittance_boost, +#endif +#ifdef LIGHT_RIM_USED + float rim, float rim_tint, vec3 rim_color, +#endif +#ifdef LIGHT_CLEARCOAT_USED + float clearcoat, float clearcoat_gloss, +#endif +#ifdef LIGHT_ANISOTROPY_USED + vec3 binormal, vec3 tangent, float anisotropy, +#endif +#ifdef USE_SHADOW_TO_OPACITY + inout float alpha, +#endif + inout vec3 diffuse_light, + inout vec3 specular_light) { + vec3 light_rel_vec = spot_lights.data[idx].position - vertex; + float light_length = length(light_rel_vec); + float spot_attenuation = get_omni_attenuation(light_length, spot_lights.data[idx].inv_radius, spot_lights.data[idx].attenuation); + vec3 spot_dir = spot_lights.data[idx].direction; + float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_lights.data[idx].cone_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_lights.data[idx].cone_angle)); + spot_attenuation *= 1.0 - pow(spot_rim, spot_lights.data[idx].cone_attenuation); + float light_attenuation = spot_attenuation; + vec3 color = spot_lights.data[idx].color; + float specular_amount = spot_lights.data[idx].specular_amount; - vec4 splane_ddy = (lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddy, 1.0)); - splane_ddy /= splane_ddy.w; - vec2 proj_uv_ddy = splane_ddy.xy * lights.data[idx].projector_rect.zw - proj_uv; +#ifdef USE_SOFT_SHADOWS + float size_A = 0.0; - vec4 proj = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), proj_uv + lights.data[idx].projector_rect.xy, proj_uv_ddx, proj_uv_ddy); - no_shadow = mix(no_shadow, proj.rgb, proj.a); - } + if (spot_lights.data[idx].size > 0.0) { + float t = spot_lights.data[idx].size / max(0.001, light_length); + size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t)); + } +#endif - shadow_attenuation = mix(shadow_color_enabled.rgb, no_shadow, shadow); + /* + if (spot_lights.data[idx].atlas_rect!=vec4(0.0)) { + //use projector texture + } + */ #ifdef LIGHT_TRANSMITTANCE_USED - { - splane = (lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * lights.data[idx].transmittance_bias, 1.0)); - splane /= splane.w; - splane.xy = splane.xy * lights.data[idx].atlas_rect.zw + lights.data[idx].atlas_rect.xy; - - float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r; - //reconstruct depth - shadow_z /= lights.data[idx].inv_radius; - //distance to light plane - float z = dot(spot_dir, -light_rel_vec); - transmittance_z = z - shadow_z; - } -#endif //LIGHT_TRANSMITTANCE_USED + float transmittance_z = transmittance_depth; + transmittance_color.a *= light_attenuation; + { + splane = (spot_lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * spot_lights.data[idx].transmittance_bias, 1.0)); + splane /= splane.w; + splane.xy = splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy; + + float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r; + //reconstruct depth + shadow_z /= spot_lights.data[idx].inv_radius; + //distance to light plane + float z = dot(spot_dir, -light_rel_vec); + transmittance_z = z - shadow_z; } +#endif //LIGHT_TRANSMITTANCE_USED -#endif //USE_NO_SHADOWS + light_attenuation *= shadow; - light_compute(normal, normalize(light_rel_vec), eye_vec, size_A, color_specular.rgb, light_attenuation, shadow_attenuation, albedo, roughness, metallic, specular, color_specular.a * p_blob_intensity, + light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms, spot_lights.data[idx].specular_amount, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -1370,7 +1378,7 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v transmittance_z, #endif #ifdef LIGHT_RIM_USED - rim * spot_attenuation, rim_tint, + rim * spot_attenuation, rim_tint, rim_color, #endif #ifdef LIGHT_CLEARCOAT_USED clearcoat, clearcoat_gloss, @@ -1378,6 +1386,9 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v #ifdef LIGHT_ANISOTROPY_USED binormal, tangent, anisotropy, #endif +#ifdef USE_SOFT_SHADOW + size_A, +#endif #ifdef USE_SHADOW_TO_OPACITY alpha, #endif @@ -1401,11 +1412,11 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 normal, float roughnes blend *= blend; blend = max(0.0, 1.0 - blend); - if (reflections.data[ref_index].params.x > 0.0) { // compute reflection + if (reflections.data[ref_index].intensity > 0.0) { // compute reflection vec3 local_ref_vec = (reflections.data[ref_index].local_matrix * vec4(ref_vec, 0.0)).xyz; - if (reflections.data[ref_index].params.w > 0.5) { //box project + if (reflections.data[ref_index].box_project) { //box project vec3 nrdir = normalize(local_ref_vec); vec3 rbmax = (box_extents - local_pos) / nrdir; @@ -1422,11 +1433,11 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 normal, float roughnes reflection.rgb = textureLod(samplerCubeArray(reflection_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(local_ref_vec, reflections.data[ref_index].index), roughness * MAX_ROUGHNESS_LOD).rgb; - if (reflections.data[ref_index].params.z < 0.5) { + if (reflections.data[ref_index].exterior) { reflection.rgb = mix(specular_light, reflection.rgb, blend); } - reflection.rgb *= reflections.data[ref_index].params.x; + reflection.rgb *= reflections.data[ref_index].intensity; //intensity reflection.a = blend; reflection.rgb *= reflection.a; @@ -1445,7 +1456,7 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 normal, float roughnes ambient_out.rgb = textureLod(samplerCubeArray(reflection_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(local_amb_vec, reflections.data[ref_index].index), MAX_ROUGHNESS_LOD).rgb; ambient_out.a = blend; - if (reflections.data[ref_index].params.z < 0.5) { //interior + if (reflections.data[ref_index].exterior) { ambient_out.rgb = mix(ambient_light, ambient_out.rgb, blend); } @@ -1456,7 +1467,7 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 normal, float roughnes vec4 ambient_out; ambient_out.a = blend; ambient_out.rgb = reflections.data[ref_index].ambient; - if (reflections.data[ref_index].params.z < 0.5) { + if (reflections.data[ref_index].exterior) { ambient_out.rgb = mix(ambient_light, ambient_out.rgb, blend); } ambient_out.rgb *= ambient_out.a; @@ -1774,7 +1785,43 @@ vec4 fog_process(vec3 vertex) { return vec4(fog_color, fog_amount); } +void cluster_get_item_range(uint p_offset, out uint item_min, out uint item_max, out uint item_from, out uint item_to) { + uint item_min_max = cluster_buffer.data[p_offset]; + item_min = item_min_max & 0xFFFF; + item_max = item_min_max >> 16; + ; + + item_from = item_min >> 5; + item_to = (item_max == 0) ? 0 : ((item_max - 1) >> 5) + 1; //side effect of how it is stored, as item_max 0 means no elements +} + +uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) { + int local_min = clamp(int(z_min) - int(i) * 32, 0, 31); + int mask_width = min(int(z_max) - int(z_min), 32 - local_min); + return bitfieldInsert(uint(0), uint(0xFFFFFFFF), local_min, mask_width); +} + +float blur_shadow(float shadow) { + return shadow; +#if 0 + //disabling for now, will investigate later + float interp_shadow = shadow; + if (gl_HelperInvocation) { + interp_shadow = -4.0; // technically anything below -4 will do but just to make sure + } + + uvec2 fc2 = uvec2(gl_FragCoord.xy); + interp_shadow -= dFdx(interp_shadow) * (float(fc2.x & 1) - 0.5); + interp_shadow -= dFdy(interp_shadow) * (float(fc2.y & 1) - 0.5); + + if (interp_shadow >= 0.0) { + shadow = interp_shadow; + } + return shadow; #endif +} + +#endif //!MODE_RENDER DEPTH void main() { #ifdef MODE_DUAL_PARABOLOID @@ -1802,9 +1849,7 @@ void main() { float clearcoat_gloss = 0.0; float anisotropy = 0.0; vec2 anisotropy_flow = vec2(1.0, 0.0); -#if defined(CUSTOM_FOG_USED) - vec4 custom_fog = vec4(0.0); -#endif + vec4 fog = vec4(0.0); #if defined(CUSTOM_RADIANCE_USED) vec4 custom_radiance = vec4(0.0); #endif @@ -1812,14 +1857,12 @@ void main() { vec4 custom_irradiance = vec4(0.0); #endif -#if defined(AO_USED) float ao = 1.0; float ao_light_affect = 0.0; -#endif float alpha = 1.0; -#if defined(TANGENT_USED) || defined(NORMALMAP_USED) || defined(LIGHT_ANISOTROPY_USED) +#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) vec3 binormal = normalize(binormal_interp); vec3 tangent = normalize(tangent_interp); #else @@ -1850,12 +1893,12 @@ void main() { vec4 color = color_interp; #endif -#if defined(NORMALMAP_USED) +#if defined(NORMAL_MAP_USED) - vec3 normalmap = vec3(0.5); + vec3 normal_map = vec3(0.5); #endif - float normaldepth = 1.0; + float normal_map_depth = 1.0; vec2 screen_uv = gl_FragCoord.xy * scene_data.screen_pixel_size + scene_data.screen_pixel_size * 0.5; //account for center @@ -1926,12 +1969,12 @@ FRAGMENT_SHADER_CODE #endif // !USE_SHADOW_TO_OPACITY -#ifdef NORMALMAP_USED +#ifdef NORMAL_MAP_USED - normalmap.xy = normalmap.xy * 2.0 - 1.0; - normalmap.z = sqrt(max(0.0, 1.0 - dot(normalmap.xy, normalmap.xy))); //always ignore Z, as it can be RG packed, Z may be pos/neg, etc. + normal_map.xy = normal_map.xy * 2.0 - 1.0; + normal_map.z = sqrt(max(0.0, 1.0 - dot(normal_map.xy, normal_map.xy))); //always ignore Z, as it can be RG packed, Z may be pos/neg, etc. - normal = normalize(mix(normal, tangent * normalmap.x + binormal * normalmap.y + normal * normalmap.z, normaldepth)); + normal = normalize(mix(normal, tangent * normal_map.x + binormal * normal_map.y + normal * normal_map.z, normal_map_depth)); #endif @@ -1953,77 +1996,147 @@ FRAGMENT_SHADER_CODE discard; } #endif + + /////////////////////// FOG ////////////////////// +#ifndef MODE_RENDER_DEPTH + +#ifndef CUSTOM_FOG_USED + // fog must be processed as early as possible and then packed. + // to maximize VGPR usage + // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky. + + if (scene_data.fog_enabled) { + fog = fog_process(vertex); + } + +#ifndef LOW_END_MODE + if (scene_data.volumetric_fog_enabled) { + vec4 volumetric_fog = volumetric_fog_process(screen_uv, -vertex.z); + if (scene_data.fog_enabled) { + //must use the full blending equation here to blend fogs + vec4 res; + float sa = 1.0 - volumetric_fog.a; + res.a = fog.a * sa + volumetric_fog.a; + if (res.a == 0.0) { + res.rgb = vec3(0.0); + } else { + res.rgb = (fog.rgb * fog.a * sa + volumetric_fog.rgb * volumetric_fog.a) / res.a; + } + fog = res; + } else { + fog = volumetric_fog; + } + } +#endif //!LOW_END_MODE +#endif //!CUSTOM_FOG_USED + + uint fog_rg = packHalf2x16(fog.rg); + uint fog_ba = packHalf2x16(fog.ba); + +#endif //!MODE_RENDER_DEPTH + /////////////////////// DECALS //////////////////////////////// #ifndef MODE_RENDER_DEPTH - uvec4 cluster_cell = texture(usampler3D(cluster_texture, material_samplers[SAMPLER_NEAREST_CLAMP]), vec3(screen_uv, (abs(vertex.z) - scene_data.z_near) / (scene_data.z_far - scene_data.z_near))); + uvec2 cluster_pos = uvec2(gl_FragCoord.xy) >> scene_data.cluster_shift; + uint cluster_offset = (scene_data.cluster_width * cluster_pos.y + cluster_pos.x) * (scene_data.max_cluster_element_count_div_32 + 32); + + uint cluster_z = uint(clamp((-vertex.z / scene_data.z_far) * 32.0, 0.0, 31.0)); + //used for interpolating anything cluster related vec3 vertex_ddx = dFdx(vertex); vec3 vertex_ddy = dFdy(vertex); { // process decals - uint decal_count = cluster_cell.w >> CLUSTER_COUNTER_SHIFT; - uint decal_pointer = cluster_cell.w & CLUSTER_POINTER_MASK; + uint cluster_decal_offset = cluster_offset + scene_data.cluster_type_size * 2; - //do outside for performance and avoiding arctifacts + uint item_min; + uint item_max; + uint item_from; + uint item_to; - for (uint i = 0; i < decal_count; i++) { - uint decal_index = cluster_data.indices[decal_pointer + i]; - if (!bool(decals.data[decal_index].mask & instances.data[instance_index].layer_mask)) { - continue; //not masked - } + cluster_get_item_range(cluster_decal_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); - vec3 uv_local = (decals.data[decal_index].xform * vec4(vertex, 1.0)).xyz; - if (any(lessThan(uv_local, vec3(0.0, -1.0, 0.0))) || any(greaterThan(uv_local, vec3(1.0)))) { - continue; //out of decal - } +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif - //we need ddx/ddy for mipmaps, so simulate them - vec2 ddx = (decals.data[decal_index].xform * vec4(vertex_ddx, 0.0)).xz; - vec2 ddy = (decals.data[decal_index].xform * vec4(vertex_ddy, 0.0)).xz; + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_decal_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif - float fade = pow(1.0 - (uv_local.y > 0.0 ? uv_local.y : -uv_local.y), uv_local.y > 0.0 ? decals.data[decal_index].upper_fade : decals.data[decal_index].lower_fade); + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + uint decal_index = 32 * i + bit; - if (decals.data[decal_index].normal_fade > 0.0) { - fade *= smoothstep(decals.data[decal_index].normal_fade, 1.0, dot(normal_interp, decals.data[decal_index].normal) * 0.5 + 0.5); - } + if (!bool(decals.data[decal_index].mask & draw_call.layer_mask)) { + continue; //not masked + } - if (decals.data[decal_index].albedo_rect != vec4(0.0)) { - //has albedo - vec4 decal_albedo = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].albedo_rect.zw + decals.data[decal_index].albedo_rect.xy, ddx * decals.data[decal_index].albedo_rect.zw, ddy * decals.data[decal_index].albedo_rect.zw); - decal_albedo *= decals.data[decal_index].modulate; - decal_albedo.a *= fade; - albedo = mix(albedo, decal_albedo.rgb, decal_albedo.a * decals.data[decal_index].albedo_mix); - - if (decals.data[decal_index].normal_rect != vec4(0.0)) { - vec3 decal_normal = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].normal_rect.zw + decals.data[decal_index].normal_rect.xy, ddx * decals.data[decal_index].normal_rect.zw, ddy * decals.data[decal_index].normal_rect.zw).xyz; - decal_normal.xy = decal_normal.xy * vec2(2.0, -2.0) - vec2(1.0, -1.0); //users prefer flipped y normal maps in most authoring software - decal_normal.z = sqrt(max(0.0, 1.0 - dot(decal_normal.xy, decal_normal.xy))); - //convert to view space, use xzy because y is up - decal_normal = (decals.data[decal_index].normal_xform * decal_normal.xzy).xyz; - - normal = normalize(mix(normal, decal_normal, decal_albedo.a)); + vec3 uv_local = (decals.data[decal_index].xform * vec4(vertex, 1.0)).xyz; + if (any(lessThan(uv_local, vec3(0.0, -1.0, 0.0))) || any(greaterThan(uv_local, vec3(1.0)))) { + continue; //out of decal } - if (decals.data[decal_index].orm_rect != vec4(0.0)) { - vec3 decal_orm = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].orm_rect.zw + decals.data[decal_index].orm_rect.xy, ddx * decals.data[decal_index].orm_rect.zw, ddy * decals.data[decal_index].orm_rect.zw).xyz; -#if defined(AO_USED) - ao = mix(ao, decal_orm.r, decal_albedo.a); -#endif - roughness = mix(roughness, decal_orm.g, decal_albedo.a); - metallic = mix(metallic, decal_orm.b, decal_albedo.a); + //we need ddx/ddy for mipmaps, so simulate them + vec2 ddx = (decals.data[decal_index].xform * vec4(vertex_ddx, 0.0)).xz; + vec2 ddy = (decals.data[decal_index].xform * vec4(vertex_ddy, 0.0)).xz; + + float fade = pow(1.0 - (uv_local.y > 0.0 ? uv_local.y : -uv_local.y), uv_local.y > 0.0 ? decals.data[decal_index].upper_fade : decals.data[decal_index].lower_fade); + + if (decals.data[decal_index].normal_fade > 0.0) { + fade *= smoothstep(decals.data[decal_index].normal_fade, 1.0, dot(normal_interp, decals.data[decal_index].normal) * 0.5 + 0.5); + } + + if (decals.data[decal_index].albedo_rect != vec4(0.0)) { + //has albedo + vec4 decal_albedo = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].albedo_rect.zw + decals.data[decal_index].albedo_rect.xy, ddx * decals.data[decal_index].albedo_rect.zw, ddy * decals.data[decal_index].albedo_rect.zw); + decal_albedo *= decals.data[decal_index].modulate; + decal_albedo.a *= fade; + albedo = mix(albedo, decal_albedo.rgb, decal_albedo.a * decals.data[decal_index].albedo_mix); + + if (decals.data[decal_index].normal_rect != vec4(0.0)) { + vec3 decal_normal = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].normal_rect.zw + decals.data[decal_index].normal_rect.xy, ddx * decals.data[decal_index].normal_rect.zw, ddy * decals.data[decal_index].normal_rect.zw).xyz; + decal_normal.xy = decal_normal.xy * vec2(2.0, -2.0) - vec2(1.0, -1.0); //users prefer flipped y normal maps in most authoring software + decal_normal.z = sqrt(max(0.0, 1.0 - dot(decal_normal.xy, decal_normal.xy))); + //convert to view space, use xzy because y is up + decal_normal = (decals.data[decal_index].normal_xform * decal_normal.xzy).xyz; + + normal = normalize(mix(normal, decal_normal, decal_albedo.a)); + } + + if (decals.data[decal_index].orm_rect != vec4(0.0)) { + vec3 decal_orm = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].orm_rect.zw + decals.data[decal_index].orm_rect.xy, ddx * decals.data[decal_index].orm_rect.zw, ddy * decals.data[decal_index].orm_rect.zw).xyz; + ao = mix(ao, decal_orm.r, decal_albedo.a); + roughness = mix(roughness, decal_orm.g, decal_albedo.a); + metallic = mix(metallic, decal_orm.b, decal_albedo.a); + } } - } - if (decals.data[decal_index].emission_rect != vec4(0.0)) { - //emission is additive, so its independent from albedo - emission += textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].emission_energy * fade; + if (decals.data[decal_index].emission_rect != vec4(0.0)) { + //emission is additive, so its independent from albedo + emission += textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].emission_energy * fade; + } } } } + //pack albedo until needed again, saves 2 VGPRs in the meantime + #endif //not render depth /////////////////////// LIGHTING ////////////////////////////// @@ -2091,19 +2204,14 @@ FRAGMENT_SHADER_CODE //radiance - float specular_blob_intensity = 1.0; - -#if defined(SPECULAR_TOON) - specular_blob_intensity *= specular * 2.0; -#endif - +/// GI /// #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) #ifdef USE_LIGHTMAP //lightmap - if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE)) { //has lightmap capture - uint index = instances.data[instance_index].gi_offset; + if (bool(draw_call.flags & INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE)) { //has lightmap capture + uint index = draw_call.gi_offset; vec3 wnormal = mat3(scene_data.camera_matrix) * normal; const float c1 = 0.429043; @@ -2122,12 +2230,12 @@ FRAGMENT_SHADER_CODE 2.0 * c2 * lightmap_captures.data[index].sh[1].rgb * wnormal.y + 2.0 * c2 * lightmap_captures.data[index].sh[2].rgb * wnormal.z); - } else if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap - bool uses_sh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP); - uint ofs = instances.data[instance_index].gi_offset & 0xFFF; + } else if (bool(draw_call.flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap + bool uses_sh = bool(draw_call.flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP); + uint ofs = draw_call.gi_offset & 0xFFFF; vec3 uvw; - uvw.xy = uv2 * instances.data[instance_index].lightmap_uv_scale.zw + instances.data[instance_index].lightmap_uv_scale.xy; - uvw.z = float((instances.data[instance_index].gi_offset >> 12) & 0xFF); + uvw.xy = uv2 * draw_call.lightmap_uv_scale.zw + draw_call.lightmap_uv_scale.xy; + uvw.z = float((draw_call.gi_offset >> 16) & 0xFFFF); if (uses_sh) { uvw.z *= 4.0; //SH textures use 4 times more data @@ -2136,7 +2244,7 @@ FRAGMENT_SHADER_CODE vec3 lm_light_l1_0 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 2.0), 0.0).rgb; vec3 lm_light_l1p1 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 3.0), 0.0).rgb; - uint idx = instances.data[instance_index].gi_offset >> 20; + uint idx = draw_call.gi_offset >> 20; vec3 n = normalize(lightmaps.data[idx].normal_xform * normal); ambient_light += lm_light_l0 * 0.282095f; @@ -2156,7 +2264,7 @@ FRAGMENT_SHADER_CODE } #elif defined(USE_FORWARD_GI) - if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture + if (bool(draw_call.flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture //make vertex orientation the world one, but still align to camera vec3 cam_pos = mat3(scene_data.camera_matrix) * vertex; @@ -2228,9 +2336,9 @@ FRAGMENT_SHADER_CODE } } - if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes + if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes - uint index1 = instances.data[instance_index].gi_offset & 0xFFFF; + uint index1 = draw_call.gi_offset & 0xFFFF; vec3 ref_vec = normalize(reflect(normalize(vertex), normal)); //find arbitrary tangent and bitangent, then build a matrix vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); @@ -2242,7 +2350,7 @@ FRAGMENT_SHADER_CODE vec4 spec_accum = vec4(0.0); gi_probe_compute(index1, vertex, normal, ref_vec, normal_mat, roughness * roughness, ambient_light, specular_light, spec_accum, amb_accum); - uint index2 = instances.data[instance_index].gi_offset >> 16; + uint index2 = draw_call.gi_offset >> 16; if (index2 != 0xFFFF) { gi_probe_compute(index2, vertex, normal, ref_vec, normal_mat, roughness * roughness, ambient_light, specular_light, spec_accum, amb_accum); @@ -2261,7 +2369,7 @@ FRAGMENT_SHADER_CODE } #elif !defined(LOW_END_MODE) - if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers + if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers ivec2 coord; @@ -2294,17 +2402,58 @@ FRAGMENT_SHADER_CODE } #endif +#ifndef LOW_END_MODE + if (scene_data.ssao_enabled) { + float ssao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), screen_uv).r; + ao = min(ao, ssao); + ao_light_affect = mix(ao_light_affect, max(ao_light_affect, scene_data.ssao_light_affect), scene_data.ssao_ao_affect); + } +#endif //LOW_END_MODE + { // process reflections vec4 reflection_accum = vec4(0.0, 0.0, 0.0, 0.0); vec4 ambient_accum = vec4(0.0, 0.0, 0.0, 0.0); - uint reflection_probe_count = cluster_cell.z >> CLUSTER_COUNTER_SHIFT; - uint reflection_probe_pointer = cluster_cell.z & CLUSTER_POINTER_MASK; + uint cluster_reflection_offset = cluster_offset + scene_data.cluster_type_size * 3; + + uint item_min; + uint item_max; + uint item_from; + uint item_to; + + cluster_get_item_range(cluster_reflection_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); - for (uint i = 0; i < reflection_probe_count; i++) { - uint ref_index = cluster_data.indices[reflection_probe_pointer + i]; - reflection_process(ref_index, vertex, normal, roughness, ambient_light, specular_light, ambient_accum, reflection_accum); +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif + + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_reflection_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif + + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + uint reflection_index = 32 * i + bit; + + if (!bool(reflections.data[reflection_index].mask & draw_call.layer_mask)) { + continue; //not masked + } + + reflection_process(reflection_index, vertex, normal, roughness, ambient_light, specular_light, ambient_accum, reflection_accum); + } } if (reflection_accum.a > 0.0) { @@ -2318,6 +2467,16 @@ FRAGMENT_SHADER_CODE #endif } + //finalize ambient light here + ambient_light *= albedo.rgb; + ambient_light *= ao; + + // convert ao to direct light ao + ao = mix(1.0, ao, ao_light_affect); + + //this saves some VGPRs + vec3 f0 = F0(metallic, specular, albedo); + { #if defined(DIFFUSE_TOON) //simplify for toon, as @@ -2335,24 +2494,39 @@ FRAGMENT_SHADER_CODE float a004 = min(r.x * r.x, exp2(-9.28 * ndotv)) * r.x + r.y; vec2 env = vec2(-1.04, 1.04) * a004 + r.zw; - vec3 f0 = F0(metallic, specular, albedo); specular_light *= env.x * f0 + env.y; #endif } +#endif //GI !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) + +#if !defined(MODE_RENDER_DEPTH) + //this saves some VGPRs + uint orms = packUnorm4x8(vec4(ao, roughness, metallic, specular)); +#endif + +// LIGHTING +#if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) + { //directional light - for (uint i = 0; i < scene_data.directional_light_count; i++) { - if (!bool(directional_lights.data[i].mask & instances.data[instance_index].layer_mask)) { - continue; //not masked + // Do shadow and lighting in two passes to reduce register pressure + uint shadow0 = 0; + uint shadow1 = 0; + + for (uint i = 0; i < 8; i++) { + if (i >= scene_data.directional_light_count) { + break; } - vec3 shadow_attenuation = vec3(1.0); + if (!bool(directional_lights.data[i].mask & draw_call.layer_mask)) { + continue; //not masked + } -#ifdef LIGHT_TRANSMITTANCE_USED - float transmittance_z = transmittance_depth; -#endif + float shadow = 1.0; +#ifdef USE_SOFT_SHADOWS + //version with soft shadows, more expensive if (directional_lights.data[i].shadow_enabled) { float depth_z = -vertex.z; @@ -2366,8 +2540,6 @@ FRAGMENT_SHADER_CODE normal_bias -= light_dir * dot(light_dir, normal_bias); \ m_var.xyz += normal_bias; - float shadow = 0.0; - if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { vec4 v = vec4(vertex, 1.0); @@ -2388,19 +2560,6 @@ FRAGMENT_SHADER_CODE shadow_color = directional_lights.data[i].shadow_color1.rgb; -#ifdef LIGHT_TRANSMITTANCE_USED - { - vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.x, 1.0); - vec4 trans_coord = directional_lights.data[i].shadow_matrix1 * trans_vertex; - trans_coord /= trans_coord.w; - - float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; - shadow_z *= directional_lights.data[i].shadow_z_range.x; - float z = trans_coord.z * directional_lights.data[i].shadow_z_range.x; - - transmittance_z = z - shadow_z; - } -#endif } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { vec4 v = vec4(vertex, 1.0); @@ -2420,19 +2579,6 @@ FRAGMENT_SHADER_CODE } shadow_color = directional_lights.data[i].shadow_color2.rgb; -#ifdef LIGHT_TRANSMITTANCE_USED - { - vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.y, 1.0); - vec4 trans_coord = directional_lights.data[i].shadow_matrix2 * trans_vertex; - trans_coord /= trans_coord.w; - - float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; - shadow_z *= directional_lights.data[i].shadow_z_range.y; - float z = trans_coord.z * directional_lights.data[i].shadow_z_range.y; - - transmittance_z = z - shadow_z; - } -#endif } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { vec4 v = vec4(vertex, 1.0); @@ -2452,19 +2598,6 @@ FRAGMENT_SHADER_CODE } shadow_color = directional_lights.data[i].shadow_color3.rgb; -#ifdef LIGHT_TRANSMITTANCE_USED - { - vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.z, 1.0); - vec4 trans_coord = directional_lights.data[i].shadow_matrix3 * trans_vertex; - trans_coord /= trans_coord.w; - - float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; - shadow_z *= directional_lights.data[i].shadow_z_range.z; - float z = trans_coord.z * directional_lights.data[i].shadow_z_range.z; - - transmittance_z = z - shadow_z; - } -#endif } else { vec4 v = vec4(vertex, 1.0); @@ -2485,20 +2618,6 @@ FRAGMENT_SHADER_CODE } shadow_color = directional_lights.data[i].shadow_color4.rgb; - -#ifdef LIGHT_TRANSMITTANCE_USED - { - vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.w, 1.0); - vec4 trans_coord = directional_lights.data[i].shadow_matrix4 * trans_vertex; - trans_coord /= trans_coord.w; - - float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; - shadow_z *= directional_lights.data[i].shadow_z_range.w; - float z = trans_coord.z * directional_lights.data[i].shadow_z_range.w; - - transmittance_z = z - shadow_z; - } -#endif } if (directional_lights.data[i].blend_splits) { @@ -2572,130 +2691,407 @@ FRAGMENT_SHADER_CODE shadow = mix(shadow, 1.0, smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, vertex.z)); //done with negative values for performance - shadow_attenuation = mix(shadow_color, vec3(1.0), shadow); +#undef BIAS_FUNC + } +#else + // Soft shadow disabled version + + if (directional_lights.data[i].shadow_enabled) { + float depth_z = -vertex.z; + + vec4 pssm_coord; + vec3 light_dir = directional_lights.data[i].direction; + vec3 base_normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(light_dir, -normalize(normal_interp)))); + +#define BIAS_FUNC(m_var, m_idx) \ + m_var.xyz += light_dir * directional_lights.data[i].shadow_bias[m_idx]; \ + vec3 normal_bias = base_normal_bias * directional_lights.data[i].shadow_normal_bias[m_idx]; \ + normal_bias -= light_dir * dot(light_dir, normal_bias); \ + m_var.xyz += normal_bias; + + if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { + vec4 v = vec4(vertex, 1.0); + + BIAS_FUNC(v, 0) + + pssm_coord = (directional_lights.data[i].shadow_matrix1 * v); +#ifdef LIGHT_TRANSMITTANCE_USED + { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.x, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix1 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.x; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.x; + + transmittance_z = z - shadow_z; + } +#endif + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { + vec4 v = vec4(vertex, 1.0); + + BIAS_FUNC(v, 1) + + pssm_coord = (directional_lights.data[i].shadow_matrix2 * v); +#ifdef LIGHT_TRANSMITTANCE_USED + { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.y, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix2 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.y; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.y; + + transmittance_z = z - shadow_z; + } +#endif + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { + vec4 v = vec4(vertex, 1.0); + + BIAS_FUNC(v, 2) + + pssm_coord = (directional_lights.data[i].shadow_matrix3 * v); +#ifdef LIGHT_TRANSMITTANCE_USED + { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.z, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix3 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.z; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.z; + + transmittance_z = z - shadow_z; + } +#endif + + } else { + vec4 v = vec4(vertex, 1.0); + + BIAS_FUNC(v, 3) + + pssm_coord = (directional_lights.data[i].shadow_matrix4 * v); +#ifdef LIGHT_TRANSMITTANCE_USED + { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.w, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix4 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.w; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.w; + + transmittance_z = z - shadow_z; + } +#endif + } + + pssm_coord /= pssm_coord.w; + + shadow = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); + + if (directional_lights.data[i].blend_splits) { + float pssm_blend; + + if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { + vec4 v = vec4(vertex, 1.0); + BIAS_FUNC(v, 1) + pssm_coord = (directional_lights.data[i].shadow_matrix2 * v); + pssm_blend = smoothstep(0.0, directional_lights.data[i].shadow_split_offsets.x, depth_z); + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { + vec4 v = vec4(vertex, 1.0); + BIAS_FUNC(v, 2) + pssm_coord = (directional_lights.data[i].shadow_matrix3 * v); + pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.x, directional_lights.data[i].shadow_split_offsets.y, depth_z); + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { + vec4 v = vec4(vertex, 1.0); + BIAS_FUNC(v, 3) + pssm_coord = (directional_lights.data[i].shadow_matrix4 * v); + pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.y, directional_lights.data[i].shadow_split_offsets.z, depth_z); + } else { + pssm_blend = 0.0; //if no blend, same coord will be used (divide by z will result in same value, and already cached) + } + + pssm_coord /= pssm_coord.w; + + float shadow2 = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); + shadow = mix(shadow, shadow2, pssm_blend); + } + + shadow = mix(shadow, 1.0, smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, vertex.z)); //done with negative values for performance #undef BIAS_FUNC } +#endif + + if (i < 4) { + shadow0 |= uint(clamp(shadow * 255.0, 0.0, 255.0)) << (i * 8); + } else { + shadow1 |= uint(clamp(shadow * 255.0, 0.0, 255.0)) << ((i - 4) * 8); + } + } + + for (uint i = 0; i < 8; i++) { + if (i >= scene_data.directional_light_count) { + break; + } + + if (!bool(directional_lights.data[i].mask & draw_call.layer_mask)) { + continue; //not masked + } - light_compute(normal, directional_lights.data[i].direction, normalize(view), directional_lights.data[i].size, directional_lights.data[i].color * directional_lights.data[i].energy, 1.0, shadow_attenuation, albedo, roughness, metallic, specular, directional_lights.data[i].specular * specular_blob_intensity, +#ifdef LIGHT_TRANSMITTANCE_USED + float transmittance_z = transmittance_depth; + + if (directional_lights.data[i].shadow_enabled) { + float depth_z = -vertex.z; + + if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.x, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix1 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.x; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.x; + + transmittance_z = z - shadow_z; + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.y, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix2 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.y; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.y; + + transmittance_z = z - shadow_z; + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.z, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix3 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.z; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.z; + + transmittance_z = z - shadow_z; + + } else { + vec4 trans_vertex = vec4(vertex - normalize(normal_interp) * directional_lights.data[i].shadow_transmittance_bias.w, 1.0); + vec4 trans_coord = directional_lights.data[i].shadow_matrix4 * trans_vertex; + trans_coord /= trans_coord.w; + + float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; + shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.w; + float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.w; + + transmittance_z = z - shadow_z; + } +#endif + + float shadow = 1.0; + + if (i < 4) { + shadow = float(shadow0 >> (i * 8) & 0xFF) / 255.0; + } else { + shadow = float(shadow1 >> ((i - 4) * 8) & 0xFF) / 255.0; + } + + blur_shadow(shadow); + + light_compute(normal, directional_lights.data[i].direction, normalize(view), directional_lights.data[i].color * directional_lights.data[i].energy, shadow, f0, orms, 1.0, #ifdef LIGHT_BACKLIGHT_USED - backlight, + backlight, #endif #ifdef LIGHT_TRANSMITTANCE_USED - transmittance_color, - transmittance_depth, - transmittance_curve, - transmittance_boost, - transmittance_z, + transmittance_color, + transmittance_depth, + transmittance_curve, + transmittance_boost, + transmittance_z, #endif #ifdef LIGHT_RIM_USED - rim, rim_tint, + rim, rim_tint, albedo, #endif #ifdef LIGHT_CLEARCOAT_USED - clearcoat, clearcoat_gloss, + clearcoat, clearcoat_gloss, #endif #ifdef LIGHT_ANISOTROPY_USED - binormal, tangent, anisotropy, + binormal, tangent, anisotropy, +#endif +#ifdef USE_SOFT_SHADOW + directional_lights.data[i].size, #endif #ifdef USE_SHADOW_TO_OPACITY - alpha, + alpha, #endif - diffuse_light, - specular_light); + diffuse_light, + specular_light); + } } - } - { //omni lights + { //omni lights - uint omni_light_count = cluster_cell.x >> CLUSTER_COUNTER_SHIFT; - uint omni_light_pointer = cluster_cell.x & CLUSTER_POINTER_MASK; + uint cluster_omni_offset = cluster_offset; - for (uint i = 0; i < omni_light_count; i++) { - uint light_index = cluster_data.indices[omni_light_pointer + i]; + uint item_min; + uint item_max; + uint item_from; + uint item_to; - if (!bool(lights.data[light_index].mask & instances.data[instance_index].layer_mask)) { - continue; //not masked - } + cluster_get_item_range(cluster_omni_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); - light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, albedo, roughness, metallic, specular, specular_blob_intensity, +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif + + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_omni_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif + + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + uint light_index = 32 * i + bit; + + if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) { + continue; //not masked + } + + float shadow = light_process_omni_shadow(light_index, vertex, view); + + shadow = blur_shadow(shadow); + + light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow, #ifdef LIGHT_BACKLIGHT_USED - backlight, + backlight, #endif #ifdef LIGHT_TRANSMITTANCE_USED - transmittance_color, - transmittance_depth, - transmittance_curve, - transmittance_boost, + transmittance_color, + transmittance_depth, + transmittance_curve, + transmittance_boost, #endif #ifdef LIGHT_RIM_USED - rim, - rim_tint, + rim, + rim_tint, + albedo, #endif #ifdef LIGHT_CLEARCOAT_USED - clearcoat, clearcoat_gloss, + clearcoat, clearcoat_gloss, #endif #ifdef LIGHT_ANISOTROPY_USED - tangent, binormal, anisotropy, + tangent, binormal, anisotropy, #endif #ifdef USE_SHADOW_TO_OPACITY - alpha, + alpha, #endif - diffuse_light, specular_light); + diffuse_light, specular_light); + } + } } - } - { //spot lights - uint spot_light_count = cluster_cell.y >> CLUSTER_COUNTER_SHIFT; - uint spot_light_pointer = cluster_cell.y & CLUSTER_POINTER_MASK; + { //spot lights - for (uint i = 0; i < spot_light_count; i++) { - uint light_index = cluster_data.indices[spot_light_pointer + i]; + uint cluster_spot_offset = cluster_offset + scene_data.cluster_type_size; - if (!bool(lights.data[light_index].mask & instances.data[instance_index].layer_mask)) { - continue; //not masked - } + uint item_min; + uint item_max; + uint item_from; + uint item_to; + + cluster_get_item_range(cluster_spot_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); - light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, albedo, roughness, metallic, specular, specular_blob_intensity, +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif + + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_spot_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif + + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + + uint light_index = 32 * i + bit; + + if (!bool(spot_lights.data[light_index].mask & draw_call.layer_mask)) { + continue; //not masked + } + + float shadow = light_process_spot_shadow(light_index, vertex, view); + + shadow = blur_shadow(shadow); + + light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow, #ifdef LIGHT_BACKLIGHT_USED - backlight, + backlight, #endif #ifdef LIGHT_TRANSMITTANCE_USED - transmittance_color, - transmittance_depth, - transmittance_curve, - transmittance_boost, + transmittance_color, + transmittance_depth, + transmittance_curve, + transmittance_boost, #endif #ifdef LIGHT_RIM_USED - rim, - rim_tint, + rim, + rim_tint, + albedo, #endif #ifdef LIGHT_CLEARCOAT_USED - clearcoat, clearcoat_gloss, + clearcoat, clearcoat_gloss, #endif #ifdef LIGHT_ANISOTROPY_USED - tangent, binormal, anisotropy, + tangent, binormal, anisotropy, #endif #ifdef USE_SHADOW_TO_OPACITY - alpha, + alpha, #endif - diffuse_light, specular_light); + diffuse_light, specular_light); + } + } } - } #ifdef USE_SHADOW_TO_OPACITY - alpha = min(alpha, clamp(length(ambient_light), 0.0, 1.0)); + alpha = min(alpha, clamp(length(ambient_light), 0.0, 1.0)); #if defined(ALPHA_SCISSOR_USED) - if (alpha < alpha_scissor) { - discard; - } + if (alpha < alpha_scissor) { + discard; + } #endif // ALPHA_SCISSOR_USED #ifdef USE_OPAQUE_PREPASS - if (alpha < opaque_prepass_threshold) { - discard; - } + if (alpha < opaque_prepass_threshold) { + discard; + } #endif // USE_OPAQUE_PREPASS @@ -2707,173 +3103,149 @@ FRAGMENT_SHADER_CODE #ifdef MODE_RENDER_SDF - { - vec3 local_pos = (scene_data.sdf_to_bounds * vec4(vertex, 1.0)).xyz; - ivec3 grid_pos = scene_data.sdf_offset + ivec3(local_pos * vec3(scene_data.sdf_size)); - - uint albedo16 = 0x1; //solid flag - albedo16 |= clamp(uint(albedo.r * 31.0), 0, 31) << 11; - albedo16 |= clamp(uint(albedo.g * 31.0), 0, 31) << 6; - albedo16 |= clamp(uint(albedo.b * 31.0), 0, 31) << 1; - - imageStore(albedo_volume_grid, grid_pos, uvec4(albedo16)); - - uint facing_bits = 0; - const vec3 aniso_dir[6] = vec3[]( - vec3(1, 0, 0), - vec3(0, 1, 0), - vec3(0, 0, 1), - vec3(-1, 0, 0), - vec3(0, -1, 0), - vec3(0, 0, -1)); - - vec3 cam_normal = mat3(scene_data.camera_matrix) * normalize(normal_interp); - - float closest_dist = -1e20; - - for (uint i = 0; i < 6; i++) { - float d = dot(cam_normal, aniso_dir[i]); - if (d > closest_dist) { - closest_dist = d; - facing_bits = (1 << i); + { + vec3 local_pos = (scene_data.sdf_to_bounds * vec4(vertex, 1.0)).xyz; + ivec3 grid_pos = scene_data.sdf_offset + ivec3(local_pos * vec3(scene_data.sdf_size)); + + uint albedo16 = 0x1; //solid flag + albedo16 |= clamp(uint(albedo.r * 31.0), 0, 31) << 11; + albedo16 |= clamp(uint(albedo.g * 31.0), 0, 31) << 6; + albedo16 |= clamp(uint(albedo.b * 31.0), 0, 31) << 1; + + imageStore(albedo_volume_grid, grid_pos, uvec4(albedo16)); + + uint facing_bits = 0; + const vec3 aniso_dir[6] = vec3[]( + vec3(1, 0, 0), + vec3(0, 1, 0), + vec3(0, 0, 1), + vec3(-1, 0, 0), + vec3(0, -1, 0), + vec3(0, 0, -1)); + + vec3 cam_normal = mat3(scene_data.camera_matrix) * normalize(normal_interp); + + float closest_dist = -1e20; + + for (uint i = 0; i < 6; i++) { + float d = dot(cam_normal, aniso_dir[i]); + if (d > closest_dist) { + closest_dist = d; + facing_bits = (1 << i); + } } - } - imageAtomicOr(geom_facing_grid, grid_pos, facing_bits); //store facing bits + imageAtomicOr(geom_facing_grid, grid_pos, facing_bits); //store facing bits - if (length(emission) > 0.001) { - float lumas[6]; - vec3 light_total = vec3(0); + if (length(emission) > 0.001) { + float lumas[6]; + vec3 light_total = vec3(0); - for (int i = 0; i < 6; i++) { - float strength = max(0.0, dot(cam_normal, aniso_dir[i])); - vec3 light = emission * strength; - light_total += light; - lumas[i] = max(light.r, max(light.g, light.b)); - } + for (int i = 0; i < 6; i++) { + float strength = max(0.0, dot(cam_normal, aniso_dir[i])); + vec3 light = emission * strength; + light_total += light; + lumas[i] = max(light.r, max(light.g, light.b)); + } - float luma_total = max(light_total.r, max(light_total.g, light_total.b)); + float luma_total = max(light_total.r, max(light_total.g, light_total.b)); - uint light_aniso = 0; + uint light_aniso = 0; - for (int i = 0; i < 6; i++) { - light_aniso |= min(31, uint((lumas[i] / luma_total) * 31.0)) << (i * 5); - } + for (int i = 0; i < 6; i++) { + light_aniso |= min(31, uint((lumas[i] / luma_total) * 31.0)) << (i * 5); + } - //compress to RGBE9995 to save space + //compress to RGBE9995 to save space - const float pow2to9 = 512.0f; - const float B = 15.0f; - const float N = 9.0f; - const float LN2 = 0.6931471805599453094172321215; + const float pow2to9 = 512.0f; + const float B = 15.0f; + const float N = 9.0f; + const float LN2 = 0.6931471805599453094172321215; - float cRed = clamp(light_total.r, 0.0, 65408.0); - float cGreen = clamp(light_total.g, 0.0, 65408.0); - float cBlue = clamp(light_total.b, 0.0, 65408.0); + float cRed = clamp(light_total.r, 0.0, 65408.0); + float cGreen = clamp(light_total.g, 0.0, 65408.0); + float cBlue = clamp(light_total.b, 0.0, 65408.0); - float cMax = max(cRed, max(cGreen, cBlue)); + float cMax = max(cRed, max(cGreen, cBlue)); - float expp = max(-B - 1.0f, floor(log(cMax) / LN2)) + 1.0f + B; + float expp = max(-B - 1.0f, floor(log(cMax) / LN2)) + 1.0f + B; - float sMax = floor((cMax / pow(2.0f, expp - B - N)) + 0.5f); + float sMax = floor((cMax / pow(2.0f, expp - B - N)) + 0.5f); - float exps = expp + 1.0f; + float exps = expp + 1.0f; - if (0.0 <= sMax && sMax < pow2to9) { - exps = expp; - } + if (0.0 <= sMax && sMax < pow2to9) { + exps = expp; + } - float sRed = floor((cRed / pow(2.0f, exps - B - N)) + 0.5f); - float sGreen = floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f); - float sBlue = floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f); - //store as 8985 to have 2 extra neighbour bits - uint light_rgbe = ((uint(sRed) & 0x1FF) >> 1) | ((uint(sGreen) & 0x1FF) << 8) | (((uint(sBlue) & 0x1FF) >> 1) << 17) | ((uint(exps) & 0x1F) << 25); + float sRed = floor((cRed / pow(2.0f, exps - B - N)) + 0.5f); + float sGreen = floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f); + float sBlue = floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f); + //store as 8985 to have 2 extra neighbour bits + uint light_rgbe = ((uint(sRed) & 0x1FF) >> 1) | ((uint(sGreen) & 0x1FF) << 8) | (((uint(sBlue) & 0x1FF) >> 1) << 17) | ((uint(exps) & 0x1F) << 25); - imageStore(emission_grid, grid_pos, uvec4(light_rgbe)); - imageStore(emission_aniso_grid, grid_pos, uvec4(light_aniso)); + imageStore(emission_grid, grid_pos, uvec4(light_rgbe)); + imageStore(emission_aniso_grid, grid_pos, uvec4(light_aniso)); + } } - } #endif #ifdef MODE_RENDER_MATERIAL - albedo_output_buffer.rgb = albedo; - albedo_output_buffer.a = alpha; + albedo_output_buffer.rgb = albedo; + albedo_output_buffer.a = alpha; - normal_output_buffer.rgb = normal * 0.5 + 0.5; - normal_output_buffer.a = 0.0; - depth_output_buffer.r = -vertex.z; + normal_output_buffer.rgb = normal * 0.5 + 0.5; + normal_output_buffer.a = 0.0; + depth_output_buffer.r = -vertex.z; -#if defined(AO_USED) - orm_output_buffer.r = ao; -#else - orm_output_buffer.r = 0.0; -#endif - orm_output_buffer.g = roughness; - orm_output_buffer.b = metallic; - orm_output_buffer.a = sss_strength; + orm_output_buffer.r = ao; + orm_output_buffer.g = roughness; + orm_output_buffer.b = metallic; + orm_output_buffer.a = sss_strength; - emission_output_buffer.rgb = emission; - emission_output_buffer.a = 0.0; + emission_output_buffer.rgb = emission; + emission_output_buffer.a = 0.0; #endif #ifdef MODE_RENDER_NORMAL_ROUGHNESS - normal_roughness_output_buffer = vec4(normal * 0.5 + 0.5, roughness); + normal_roughness_output_buffer = vec4(normal * 0.5 + 0.5, roughness); #ifdef MODE_RENDER_GIPROBE - if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes - uint index1 = instances.data[instance_index].gi_offset & 0xFFFF; - uint index2 = instances.data[instance_index].gi_offset >> 16; - giprobe_buffer.x = index1 & 0xFF; - giprobe_buffer.y = index2 & 0xFF; - } else { - giprobe_buffer.x = 0xFF; - giprobe_buffer.y = 0xFF; - } + if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes + uint index1 = draw_call.gi_offset & 0xFFFF; + uint index2 = draw_call.gi_offset >> 16; + giprobe_buffer.x = index1 & 0xFF; + giprobe_buffer.y = index2 & 0xFF; + } else { + giprobe_buffer.x = 0xFF; + giprobe_buffer.y = 0xFF; + } #endif -#endif //MODE_RENDER_NORMAL +#endif //MODE_RENDER_NORMAL_ROUGHNESS //nothing happens, so a tree-ssa optimizer will result in no fragment shader :) #else - specular_light *= scene_data.reflection_multiplier; - ambient_light *= albedo; //ambient must be multiplied by albedo at the end - -//ambient occlusion -#if defined(AO_USED) - -#ifndef LOW_END_MODE - if (scene_data.ssao_enabled && scene_data.ssao_ao_affect > 0.0) { - float ssao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), screen_uv).r; - ao = mix(ao, min(ao, ssao), scene_data.ssao_ao_affect); - ao_light_affect = mix(ao_light_affect, max(ao_light_affect, scene_data.ssao_light_affect), scene_data.ssao_ao_affect); - } -#endif //LOW_END_MODE - - ambient_light = mix(scene_data.ao_color.rgb, ambient_light, ao); - ao_light_affect = mix(1.0, ao, ao_light_affect); - specular_light = mix(scene_data.ao_color.rgb, specular_light, ao_light_affect); - diffuse_light = mix(scene_data.ao_color.rgb, diffuse_light, ao_light_affect); -#else - -#ifndef LOW_END_MODE - if (scene_data.ssao_enabled) { - float ao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), screen_uv).r; - ambient_light = mix(scene_data.ao_color.rgb, ambient_light, ao); - float ao_light_affect = mix(1.0, ao, scene_data.ssao_light_affect); - specular_light = mix(scene_data.ao_color.rgb, specular_light, ao_light_affect); - diffuse_light = mix(scene_data.ao_color.rgb, diffuse_light, ao_light_affect); - } -#endif //LOW_END_MODE + // multiply by albedo + diffuse_light *= albedo; // ambient must be multiplied by albedo at the end -#endif // AO_USED + // apply direct light AO + ao = unpackUnorm4x8(orms).x; + specular_light *= ao; + diffuse_light *= ao; - // base color remapping - diffuse_light *= 1.0 - metallic; // TODO: avoid all diffuse and ambient light calculations when metallic == 1 up to this point + // apply metallic + metallic = unpackUnorm4x8(orms).z; + diffuse_light *= 1.0 - metallic; ambient_light *= 1.0 - metallic; + //restore fog + fog = vec4(unpackHalf2x16(fog_rg), unpackHalf2x16(fog_ba)); + #ifdef MODE_MULTIPLE_RENDER_TARGETS #ifdef MODE_UNSHADED @@ -2889,25 +3261,8 @@ FRAGMENT_SHADER_CODE specular_buffer = vec4(specular_light, metallic); #endif - // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky. - if (scene_data.fog_enabled) { - vec4 fog = fog_process(vertex); - diffuse_buffer.rgb = mix(diffuse_buffer.rgb, fog.rgb, fog.a); - specular_buffer.rgb = mix(specular_buffer.rgb, vec3(0.0), fog.a); - } - -#ifndef LOW_END_MODE - if (scene_data.volumetric_fog_enabled) { - vec4 fog = volumetric_fog_process(screen_uv, -vertex.z); - diffuse_buffer.rgb = mix(diffuse_buffer.rgb, fog.rgb, fog.a); - specular_buffer.rgb = mix(specular_buffer.rgb, vec3(0.0), fog.a); - } -#endif // LOW_END_MODE - -#if defined(CUSTOM_FOG_USED) - diffuse_buffer.rgb = mix(diffuse_buffer.rgb, custom_fog.rgb, custom_fog.a); - specular_buffer.rgb = mix(specular_buffer.rgb, vec3(0.0), custom_fog.a); -#endif //CUSTOM_FOG_USED + diffuse_buffer.rgb = mix(diffuse_buffer.rgb, fog.rgb, fog.a); + specular_buffer.rgb = mix(specular_buffer.rgb, vec3(0.0), fog.a); #else //MODE_MULTIPLE_RENDER_TARGETS @@ -2919,22 +3274,9 @@ FRAGMENT_SHADER_CODE #endif //USE_NO_SHADING // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky. - if (scene_data.fog_enabled) { - vec4 fog = fog_process(vertex); - frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); - } -#ifndef LOW_END_MODE - if (scene_data.volumetric_fog_enabled) { - vec4 fog = volumetric_fog_process(screen_uv, -vertex.z); - frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); - } -#endif - -#if defined(CUSTOM_FOG_USED) - frag_color.rgb = mix(frag_color.rgb, custom_fog.rgb, custom_fog.a); -#endif //CUSTOM_FOG_USED + frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); #endif //MODE_MULTIPLE_RENDER_TARGETS #endif //MODE_RENDER_DEPTH -} + } diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl index fdc9941bba..a37e32e1fc 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl @@ -3,18 +3,30 @@ #define MAX_GI_PROBES 8 +#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic) + +#extension GL_KHR_shader_subgroup_ballot : enable +#extension GL_KHR_shader_subgroup_arithmetic : enable + +#define USE_SUBGROUPS + +#endif + #include "cluster_data_inc.glsl" -#if !defined(MODE_RENDER_DEPTH) || defined(MODE_RENDER_MATERIAL) || defined(MODE_RENDER_SDF) || defined(MODE_RENDER_NORMAL_ROUGHNESS) || defined(MODE_RENDER_GIPROBE) || defined(TANGENT_USED) || defined(NORMALMAP_USED) +#if !defined(MODE_RENDER_DEPTH) || defined(MODE_RENDER_MATERIAL) || defined(MODE_RENDER_SDF) || defined(MODE_RENDER_NORMAL_ROUGHNESS) || defined(MODE_RENDER_GIPROBE) || defined(TANGENT_USED) || defined(NORMAL_MAP_USED) #ifndef NORMAL_USED #define NORMAL_USED #endif #endif layout(push_constant, binding = 0, std430) uniform DrawCall { - uint instance_index; - uint pad; //16 bits minimum size - vec2 bake_uv2_offset; //used for bake to uv2, ignored otherwise + mat4 transform; + uint flags; + uint instance_uniforms_ofs; //base offset in global buffer for instance variables + uint gi_offset; //GI information when using lightmapping (VCT or lightmap index) + uint layer_mask; + vec4 lightmap_uv_scale; } draw_call; @@ -49,6 +61,11 @@ layout(set = 0, binding = 3, std140) uniform SceneData { vec2 viewport_size; vec2 screen_pixel_size; + uint cluster_shift; + uint cluster_width; + uint cluster_type_size; + uint max_cluster_element_count_div_32; + //use vec4s because std140 doesnt play nice with vec2s, z and w are wasted vec4 directional_penumbra_shadow_kernel[32]; vec4 directional_soft_shadow_kernel[32]; @@ -134,33 +151,24 @@ scene_data; #define INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK 0x7 #define INSTANCE_FLAGS_SKELETON (1 << 19) +#define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 20) -struct InstanceData { - mat4 transform; - mat4 normal_transform; - uint flags; - uint instance_uniforms_ofs; //base offset in global buffer for instance variables - uint gi_offset; //GI information when using lightmapping (VCT or lightmap index) - uint layer_mask; - vec4 lightmap_uv_scale; -}; - -layout(set = 0, binding = 4, std430) restrict readonly buffer Instances { - InstanceData data[]; +layout(set = 0, binding = 5, std430) restrict readonly buffer OmniLights { + LightData data[]; } -instances; +omni_lights; -layout(set = 0, binding = 5, std430) restrict readonly buffer Lights { +layout(set = 0, binding = 6, std430) restrict readonly buffer SpotLights { LightData data[]; } -lights; +spot_lights; -layout(set = 0, binding = 6) buffer restrict readonly ReflectionProbeData { +layout(set = 0, binding = 7) buffer restrict readonly ReflectionProbeData { ReflectionData data[]; } reflections; -layout(set = 0, binding = 7, std140) uniform DirectionalLights { +layout(set = 0, binding = 8, std140) uniform DirectionalLights { DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; } directional_lights; @@ -177,35 +185,26 @@ layout(set = 0, binding = 10, std140) restrict readonly buffer Lightmaps { } lightmaps; -layout(set = 0, binding = 11) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES]; - struct LightmapCapture { vec4 sh[9]; }; -layout(set = 0, binding = 12, std140) restrict readonly buffer LightmapCaptures { +layout(set = 0, binding = 11, std140) restrict readonly buffer LightmapCaptures { LightmapCapture data[]; } lightmap_captures; -layout(set = 0, binding = 13) uniform texture2D decal_atlas; -layout(set = 0, binding = 14) uniform texture2D decal_atlas_srgb; +layout(set = 0, binding = 12) uniform texture2D decal_atlas; +layout(set = 0, binding = 13) uniform texture2D decal_atlas_srgb; -layout(set = 0, binding = 15, std430) restrict readonly buffer Decals { +layout(set = 0, binding = 14, std430) restrict readonly buffer Decals { DecalData data[]; } decals; -layout(set = 0, binding = 16) uniform utexture3D cluster_texture; - -layout(set = 0, binding = 17, std430) restrict readonly buffer ClusterData { - uint indices[]; -} -cluster_data; - -layout(set = 0, binding = 18) uniform texture2D directional_shadow_atlas; +layout(set = 0, binding = 15) uniform texture2D directional_shadow_atlas; -layout(set = 0, binding = 19, std430) restrict readonly buffer GlobalVariableData { +layout(set = 0, binding = 16, std430) restrict readonly buffer GlobalVariableData { vec4 data[]; } global_variables; @@ -219,7 +218,7 @@ struct SDFGIProbeCascadeData { float to_cell; // 1/bounds * grid_size }; -layout(set = 0, binding = 20, std140) uniform SDFGI { +layout(set = 0, binding = 17, std140) uniform SDFGI { vec3 grid_size; uint max_cascades; @@ -269,18 +268,25 @@ layout(set = 1, binding = 1) uniform textureCubeArray reflection_atlas; layout(set = 1, binding = 2) uniform texture2D shadow_atlas; +layout(set = 1, binding = 3) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES]; + #ifndef LOW_END_MODE -layout(set = 1, binding = 3) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; +layout(set = 1, binding = 4) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; #endif +layout(set = 1, binding = 5, std430) buffer restrict readonly ClusterBuffer { + uint data[]; +} +cluster_buffer; + /* Set 3, Render Buffers */ #ifdef MODE_RENDER_SDF -layout(r16ui, set = 1, binding = 4) uniform restrict writeonly uimage3D albedo_volume_grid; -layout(r32ui, set = 1, binding = 5) uniform restrict writeonly uimage3D emission_grid; -layout(r32ui, set = 1, binding = 6) uniform restrict writeonly uimage3D emission_aniso_grid; -layout(r32ui, set = 1, binding = 7) uniform restrict uimage3D geom_facing_grid; +layout(r16ui, set = 1, binding = 6) uniform restrict writeonly uimage3D albedo_volume_grid; +layout(r32ui, set = 1, binding = 7) uniform restrict writeonly uimage3D emission_grid; +layout(r32ui, set = 1, binding = 8) uniform restrict writeonly uimage3D emission_aniso_grid; +layout(r32ui, set = 1, binding = 9) uniform restrict uimage3D geom_facing_grid; //still need to be present for shaders that use it, so remap them to something #define depth_buffer shadow_atlas @@ -289,17 +295,17 @@ layout(r32ui, set = 1, binding = 7) uniform restrict uimage3D geom_facing_grid; #else -layout(set = 1, binding = 4) uniform texture2D depth_buffer; -layout(set = 1, binding = 5) uniform texture2D color_buffer; +layout(set = 1, binding = 6) uniform texture2D depth_buffer; +layout(set = 1, binding = 7) uniform texture2D color_buffer; #ifndef LOW_END_MODE -layout(set = 1, binding = 6) uniform texture2D normal_roughness_buffer; -layout(set = 1, binding = 7) uniform texture2D ao_buffer; -layout(set = 1, binding = 8) uniform texture2D ambient_buffer; -layout(set = 1, binding = 9) uniform texture2D reflection_buffer; -layout(set = 1, binding = 10) uniform texture2DArray sdfgi_lightprobe_texture; -layout(set = 1, binding = 11) uniform texture3D sdfgi_occlusion_cascades; +layout(set = 1, binding = 8) uniform texture2D normal_roughness_buffer; +layout(set = 1, binding = 9) uniform texture2D ao_buffer; +layout(set = 1, binding = 10) uniform texture2D ambient_buffer; +layout(set = 1, binding = 11) uniform texture2D reflection_buffer; +layout(set = 1, binding = 12) uniform texture2DArray sdfgi_lightprobe_texture; +layout(set = 1, binding = 13) uniform texture3D sdfgi_occlusion_cascades; struct GIProbeData { mat4 xform; @@ -317,12 +323,12 @@ struct GIProbeData { uint mipmaps; }; -layout(set = 1, binding = 12, std140) uniform GIProbes { +layout(set = 1, binding = 14, std140) uniform GIProbes { GIProbeData data[MAX_GI_PROBES]; } gi_probes; -layout(set = 1, binding = 13) uniform texture3D volumetric_fog_texture; +layout(set = 1, binding = 15) uniform texture3D volumetric_fog_texture; #endif // LOW_END_MODE diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl index 61e4bf5e18..ed0a8a4b86 100644 --- a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl +++ b/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl @@ -112,6 +112,15 @@ vec2 octahedron_encode(vec3 n) { return n.xy; } +float get_omni_attenuation(float distance, float inv_range, float decay) { + float nd = distance * inv_range; + nd *= nd; + nd *= nd; // nd^4 + nd = max(1.0 - nd, 0.0); + nd *= nd; // nd^2 + return nd * pow(max(distance, 0.0001), -decay); +} + void main() { uint voxel_index = uint(gl_GlobalInvocationID.x); @@ -134,10 +143,78 @@ void main() { uint voxel_albedo = process_voxels.data[voxel_index].albedo; vec3 albedo = vec3(uvec3(voxel_albedo >> 10, voxel_albedo >> 5, voxel_albedo) & uvec3(0x1F)) / float(0x1F); - vec3 light_accum[6]; - + vec3 light_accum[6] = vec3[](vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0)); uint valid_aniso = (voxel_albedo >> 15) & 0x3F; + const vec3 aniso_dir[6] = vec3[]( + vec3(1, 0, 0), + vec3(0, 1, 0), + vec3(0, 0, 1), + vec3(-1, 0, 0), + vec3(0, -1, 0), + vec3(0, 0, -1)); + + // Add indirect light first, in order to save computation resources +#ifdef MODE_PROCESS_DYNAMIC + if (params.multibounce) { + vec3 pos = (vec3(positioni) + vec3(0.5)) * float(params.probe_axis_size - 1) / params.grid_size; + ivec3 probe_base_pos = ivec3(pos); + + float weight_accum[6] = float[](0, 0, 0, 0, 0, 0); + + ivec3 tex_pos = ivec3(probe_base_pos.xy, int(params.cascade)); + tex_pos.x += probe_base_pos.z * int(params.probe_axis_size); + + tex_pos.xy = tex_pos.xy * (OCT_SIZE + 2) + ivec2(1); + + vec3 base_tex_posf = vec3(tex_pos); + vec2 tex_pixel_size = 1.0 / vec2(ivec2((OCT_SIZE + 2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE + 2) * params.probe_axis_size)); + vec3 probe_uv_offset = (ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx; + + for (uint j = 0; j < 8; j++) { + ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); + ivec3 probe_posi = probe_base_pos; + probe_posi += offset; + + // Compute weight + + vec3 probe_pos = vec3(probe_posi); + vec3 probe_to_pos = pos - probe_pos; + vec3 probe_dir = normalize(-probe_to_pos); + + // Compute lightprobe texture position + + vec3 trilinear = vec3(1.0) - abs(probe_to_pos); + + for (uint k = 0; k < 6; k++) { + if (bool(valid_aniso & (1 << k))) { + vec3 n = aniso_dir[k]; + float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(n, probe_dir)); + + vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0); + tex_posf.xy *= tex_pixel_size; + + vec3 pos_uvw = tex_posf; + pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy; + pos_uvw.x += float(offset.z) * probe_uv_offset.z; + vec3 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb; + + light_accum[k] += indirect_light * weight; + weight_accum[k] += weight; + } + } + } + + for (uint k = 0; k < 6; k++) { + if (weight_accum[k] > 0.0) { + light_accum[k] /= weight_accum[k]; + light_accum[k] *= albedo; + } + } + } + +#endif + { uint rgbe = process_voxels.data[voxel_index].light; @@ -153,18 +230,10 @@ void main() { uint aniso = process_voxels.data[voxel_index].light_aniso; for (uint i = 0; i < 6; i++) { float strength = ((aniso >> (i * 5)) & 0x1F) / float(0x1F); - light_accum[i] = l * strength; + light_accum[i] += l * strength; } } - const vec3 aniso_dir[6] = vec3[]( - vec3(1, 0, 0), - vec3(0, 1, 0), - vec3(0, 0, 1), - vec3(-1, 0, 0), - vec3(0, -1, 0), - vec3(0, 0, -1)); - // Raytrace light vec3 pos_to_uvw = 1.0 / params.grid_size; @@ -184,14 +253,15 @@ void main() { direction = normalize(rel_vec); light_distance = length(rel_vec); rel_vec.y /= params.y_mult; - attenuation = pow(clamp(1.0 - length(rel_vec) / lights.data[i].radius, 0.0, 1.0), lights.data[i].attenuation); + attenuation = get_omni_attenuation(light_distance, 1.0 / lights.data[i].radius, lights.data[i].attenuation); + } break; case LIGHT_TYPE_SPOT: { vec3 rel_vec = lights.data[i].position - position; direction = normalize(rel_vec); light_distance = length(rel_vec); rel_vec.y /= params.y_mult; - attenuation = pow(clamp(1.0 - length(rel_vec) / lights.data[i].radius, 0.0, 1.0), lights.data[i].attenuation); + attenuation = get_omni_attenuation(light_distance, 1.0 / lights.data[i].radius, lights.data[i].attenuation); float angle = acos(dot(normalize(rel_vec), -lights.data[i].direction)); if (angle > lights.data[i].spot_angle) { @@ -282,65 +352,6 @@ void main() { } } - // Add indirect light - - if (params.multibounce) { - vec3 pos = (vec3(positioni) + vec3(0.5)) * float(params.probe_axis_size - 1) / params.grid_size; - ivec3 probe_base_pos = ivec3(pos); - - vec4 probe_accum[6] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); - float weight_accum[6] = float[](0, 0, 0, 0, 0, 0); - - ivec3 tex_pos = ivec3(probe_base_pos.xy, int(params.cascade)); - tex_pos.x += probe_base_pos.z * int(params.probe_axis_size); - - tex_pos.xy = tex_pos.xy * (OCT_SIZE + 2) + ivec2(1); - - vec3 base_tex_posf = vec3(tex_pos); - vec2 tex_pixel_size = 1.0 / vec2(ivec2((OCT_SIZE + 2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE + 2) * params.probe_axis_size)); - vec3 probe_uv_offset = (ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx; - - for (uint j = 0; j < 8; j++) { - ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); - ivec3 probe_posi = probe_base_pos; - probe_posi += offset; - - // Compute weight - - vec3 probe_pos = vec3(probe_posi); - vec3 probe_to_pos = pos - probe_pos; - vec3 probe_dir = normalize(-probe_to_pos); - - // Compute lightprobe texture position - - vec3 trilinear = vec3(1.0) - abs(probe_to_pos); - - for (uint k = 0; k < 6; k++) { - if (bool(valid_aniso & (1 << k))) { - vec3 n = aniso_dir[k]; - float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(n, probe_dir)); - - vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0); - tex_posf.xy *= tex_pixel_size; - - vec3 pos_uvw = tex_posf; - pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy; - pos_uvw.x += float(offset.z) * probe_uv_offset.z; - vec4 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0); - - probe_accum[k] += indirect_light * weight; - weight_accum[k] += weight; - } - } - } - - for (uint k = 0; k < 6; k++) { - if (weight_accum[k] > 0.0) { - light_accum[k] += probe_accum[k].rgb * albedo / weight_accum[k]; - } - } - } - // Store the light in the light texture float lumas[6]; diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl index d516ab22c3..67630a3aa1 100644 --- a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl +++ b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl @@ -136,12 +136,24 @@ uint rgbe_encode(vec3 color) { return (uint(sRed) & 0x1FF) | ((uint(sGreen) & 0x1FF) << 9) | ((uint(sBlue) & 0x1FF) << 18) | ((uint(exps) & 0x1F) << 27); } +struct SH { +#if (SH_SIZE == 16) + float c[48]; +#else + float c[28]; +#endif +}; + +shared SH sh_accum[64]; //8x8 + void main() { ivec2 pos = ivec2(gl_GlobalInvocationID.xy); if (any(greaterThanEqual(pos, params.image_size))) { //too large, do nothing return; } + uint probe_index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 8; + #ifdef MODE_PROCESS float probe_cell_size = float(params.grid_size.x / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; @@ -154,27 +166,9 @@ void main() { vec3 probe_pos = cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size; vec3 pos_to_uvw = 1.0 / params.grid_size; - vec4 probe_sh_accum[SH_SIZE] = vec4[]( - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0) -#if (SH_SIZE == 16) - , - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0), - vec4(0.0) -#endif - ); + for (uint i = 0; i < SH_SIZE * 3; i++) { + sh_accum[probe_index].c[i] = 0.0; + } // quickly ensure each probe has a different "offset" for the vogel function, based on integer world position uvec3 h3 = hash3(uvec3(params.world_offset + probe_cell)); @@ -278,33 +272,33 @@ void main() { } vec3 ray_dir2 = ray_dir * ray_dir; - float c[SH_SIZE] = float[]( - - 0.282095, //l0 - 0.488603 * ray_dir.y, //l1n1 - 0.488603 * ray_dir.z, //l1n0 - 0.488603 * ray_dir.x, //l1p1 - 1.092548 * ray_dir.x * ray_dir.y, //l2n2 - 1.092548 * ray_dir.y * ray_dir.z, //l2n1 - 0.315392 * (3.0 * ray_dir2.z - 1.0), //l20 - 1.092548 * ray_dir.x * ray_dir.z, //l2p1 - 0.546274 * (ray_dir2.x - ray_dir2.y) //l2p2 + +#define SH_ACCUM(m_idx, m_value) \ + { \ + vec3 l = light.rgb * (m_value); \ + sh_accum[probe_index].c[m_idx * 3 + 0] += l.r; \ + sh_accum[probe_index].c[m_idx * 3 + 1] += l.g; \ + sh_accum[probe_index].c[m_idx * 3 + 2] += l.b; \ + } + SH_ACCUM(0, 0.282095); //l0 + SH_ACCUM(1, 0.488603 * ray_dir.y); //l1n1 + SH_ACCUM(2, 0.488603 * ray_dir.z); //l1n0 + SH_ACCUM(3, 0.488603 * ray_dir.x); //l1p1 + SH_ACCUM(4, 1.092548 * ray_dir.x * ray_dir.y); //l2n2 + SH_ACCUM(5, 1.092548 * ray_dir.y * ray_dir.z); //l2n1 + SH_ACCUM(6, 0.315392 * (3.0 * ray_dir2.z - 1.0)); //l20 + SH_ACCUM(7, 1.092548 * ray_dir.x * ray_dir.z); //l2p1 + SH_ACCUM(8, 0.546274 * (ray_dir2.x - ray_dir2.y)); //l2p2 #if (SH_SIZE == 16) - , - 0.590043 * ray_dir.y * (3.0f * ray_dir2.x - ray_dir2.y), - 2.890611 * ray_dir.y * ray_dir.x * ray_dir.z, - 0.646360 * ray_dir.y * (-1.0f + 5.0f * ray_dir2.z), - 0.373176 * (5.0f * ray_dir2.z * ray_dir.z - 3.0f * ray_dir.z), - 0.457045 * ray_dir.x * (-1.0f + 5.0f * ray_dir2.z), - 1.445305 * (ray_dir2.x - ray_dir2.y) * ray_dir.z, - 0.590043 * ray_dir.x * (ray_dir2.x - 3.0f * ray_dir2.y) + SH_ACCUM(9, 0.590043 * ray_dir.y * (3.0f * ray_dir2.x - ray_dir2.y)); + SH_ACCUM(10, 2.890611 * ray_dir.y * ray_dir.x * ray_dir.z); + SH_ACCUM(11, 0.646360 * ray_dir.y * (-1.0f + 5.0f * ray_dir2.z)); + SH_ACCUM(12, 0.373176 * (5.0f * ray_dir2.z * ray_dir.z - 3.0f * ray_dir.z)); + SH_ACCUM(13, 0.457045 * ray_dir.x * (-1.0f + 5.0f * ray_dir2.z)); + SH_ACCUM(14, 1.445305 * (ray_dir2.x - ray_dir2.y) * ray_dir.z); + SH_ACCUM(15, 0.590043 * ray_dir.x * (ray_dir2.x - 3.0f * ray_dir2.y)); #endif - ); - - for (uint j = 0; j < SH_SIZE; j++) { - probe_sh_accum[j] += light * c[j]; - } } for (uint i = 0; i < SH_SIZE; i++) { @@ -312,7 +306,7 @@ void main() { ivec3 prev_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(params.history_index)); ivec2 average_pos = prev_pos.xy; - vec4 value = probe_sh_accum[i] * 4.0 / float(params.ray_count); + vec4 value = vec4(sh_accum[probe_index].c[i * 3 + 0], sh_accum[probe_index].c[i * 3 + 1], sh_accum[probe_index].c[i * 3 + 2], 1.0) * 4.0 / float(params.ray_count); ivec4 ivalue = clamp(ivec4(value * float(1 << HISTORY_BITS)), -32768, 32767); //clamp to 16 bits, so higher values don't break average @@ -344,37 +338,11 @@ void main() { ivec2 oct_pos = (pos / OCT_SIZE) * (OCT_SIZE + 2) + ivec2(1); ivec2 local_pos = pos % OCT_SIZE; - //fill the spherical harmonic - vec4 sh[SH_SIZE]; - - for (uint i = 0; i < SH_SIZE; i++) { - // store in history texture - ivec2 average_pos = sh_pos + ivec2(0, i); - ivec4 average = imageLoad(lightprobe_average_texture, average_pos); - - sh[i] = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS); - } - //compute the octahedral normal for this texel vec3 normal = octahedron_encode(vec2(local_pos) / float(OCT_SIZE)); - /* + // read the spherical harmonic - const float c1 = 0.429043; - const float c2 = 0.511664; - const float c3 = 0.743125; - const float c4 = 0.886227; - const float c5 = 0.247708; - vec4 light = (c1 * sh[8] * (normal.x * normal.x - normal.y * normal.y) + - c3 * sh[6] * normal.z * normal.z + - c4 * sh[0] - - c5 * sh[6] + - 2.0 * c1 * sh[4] * normal.x * normal.y + - 2.0 * c1 * sh[7] * normal.x * normal.z + - 2.0 * c1 * sh[5] * normal.y * normal.z + - 2.0 * c2 * sh[3] * normal.x + - 2.0 * c2 * sh[1] * normal.y + - 2.0 * c2 * sh[2] * normal.z); -*/ + vec3 normal2 = normal * normal; float c[SH_SIZE] = float[]( @@ -426,7 +394,14 @@ void main() { vec3 radiance = vec3(0.0); for (uint i = 0; i < SH_SIZE; i++) { - vec3 m = sh[i].rgb * c[i] * 4.0; + // store in history texture + ivec2 average_pos = sh_pos + ivec2(0, i); + ivec4 average = imageLoad(lightprobe_average_texture, average_pos); + + vec4 sh = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS); + + vec3 m = sh.rgb * c[i] * 4.0; + irradiance += m * l_mult[i]; radiance += m; } diff --git a/servers/rendering/renderer_rd/shaders/ssao.glsl b/servers/rendering/renderer_rd/shaders/ssao.glsl index f67965ab49..231f8f91ec 100644 --- a/servers/rendering/renderer_rd/shaders/ssao.glsl +++ b/servers/rendering/renderer_rd/shaders/ssao.glsl @@ -88,7 +88,7 @@ counter; layout(rg8, set = 2, binding = 0) uniform restrict writeonly image2D dest_image; // This push_constant is full - 128 bytes - if you need to add more data, consider adding to the uniform buffer instead -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, binding = 3, std430) uniform Params { ivec2 screen_size; int pass; int quality; @@ -249,7 +249,6 @@ void SSAOTap(const int p_quality_level, inout float r_obscurance_sum, inout floa SSAO_tap_inner(p_quality_level, r_obscurance_sum, r_weight_sum, sampling_mirrored_uv, mip_level, p_pix_center_pos, p_pixel_normal, p_fallof_sq, p_weight_mod); } -// this function is designed to only work with half/half depth at the moment - there's a couple of hardcoded paths that expect pixel/texel size, so it will not work for full res void generate_SSAO_shadows_internal(out float r_shadow_term, out vec4 r_edges, out float r_weight, const vec2 p_pos, int p_quality_level, bool p_adaptive_base) { vec2 pos_rounded = trunc(p_pos); uvec2 upos = uvec2(pos_rounded); @@ -257,8 +256,8 @@ void generate_SSAO_shadows_internal(out float r_shadow_term, out vec4 r_edges, o const int number_of_taps = (p_adaptive_base) ? (SSAO_ADAPTIVE_TAP_BASE_COUNT) : (num_taps[p_quality_level]); float pix_z, pix_left_z, pix_top_z, pix_right_z, pix_bottom_z; - vec4 valuesUL = textureGather(source_depth_mipmaps, vec3(pos_rounded * params.half_screen_pixel_size, params.pass)); // g_ViewspaceDepthSource.GatherRed(g_PointMirrorSampler, pos_rounded * params.half_screen_pixel_size); - vec4 valuesBR = textureGather(source_depth_mipmaps, vec3((pos_rounded + vec2(1.0)) * params.half_screen_pixel_size, params.pass)); // g_ViewspaceDepthSource.GatherRed(g_PointMirrorSampler, pos_rounded * params.half_screen_pixel_size, ivec2(1, 1)); + vec4 valuesUL = textureGather(source_depth_mipmaps, vec3(pos_rounded * params.half_screen_pixel_size, params.pass)); + vec4 valuesBR = textureGather(source_depth_mipmaps, vec3((pos_rounded + vec2(1.0)) * params.half_screen_pixel_size, params.pass)); // get this pixel's viewspace depth pix_z = valuesUL.y; @@ -276,8 +275,7 @@ void generate_SSAO_shadows_internal(out float r_shadow_term, out vec4 r_edges, o uvec2 full_res_coord = upos * 2 * params.size_multiplier + params.pass_coord_offset.xy; vec3 pixel_normal = load_normal(ivec2(full_res_coord)); - //const vec2 pixel_size_at_center = pix_center_pos.z * params.NDC_to_view_mul * params.half_screen_pixel_size; // optimized approximation of: - vec2 pixel_size_at_center = NDC_to_view_space(normalized_screen_pos.xy + params.half_screen_pixel_size * 0.5, pix_center_pos.z).xy - pix_center_pos.xy; + const vec2 pixel_size_at_center = NDC_to_view_space(normalized_screen_pos.xy + params.half_screen_pixel_size, pix_center_pos.z).xy - pix_center_pos.xy; float pixel_lookup_radius; float fallof_sq; @@ -440,9 +438,6 @@ void generate_SSAO_shadows_internal(out float r_shadow_term, out vec4 r_edges, o fade_out *= clamp(1.0 - edge_fadeout_factor, 0.0, 1.0); } - // same as a bove, but a lot more conservative version - // fade_out *= clamp( dot( edgesLRTB, vec4( 0.9, 0.9, 0.9, 0.9 ) ) - 2.6 , 0.0, 1.0); - // strength obscurance = params.intensity * obscurance; diff --git a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl index 13b162f0c9..aa32809a06 100644 --- a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl +++ b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl @@ -4,6 +4,15 @@ VERSION_DEFINES +/* Do not use subgroups here, seems there is not much advantage and causes glitches +#extension GL_KHR_shader_subgroup_ballot: enable +#extension GL_KHR_shader_subgroup_arithmetic: enable + +#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic) +#define USE_SUBGROUPS +#endif +*/ + #if defined(MODE_FOG) || defined(MODE_FILTER) layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; @@ -23,22 +32,25 @@ layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; layout(set = 0, binding = 1) uniform texture2D shadow_atlas; layout(set = 0, binding = 2) uniform texture2D directional_shadow_atlas; -layout(set = 0, binding = 3, std430) restrict readonly buffer Lights { +layout(set = 0, binding = 3, std430) restrict readonly buffer OmniLights { LightData data[]; } -lights; +omni_lights; -layout(set = 0, binding = 4, std140) uniform DirectionalLights { +layout(set = 0, binding = 4, std430) restrict readonly buffer SpotLights { + LightData data[]; +} +spot_lights; + +layout(set = 0, binding = 5, std140) uniform DirectionalLights { DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; } directional_lights; -layout(set = 0, binding = 5) uniform utexture3D cluster_texture; - -layout(set = 0, binding = 6, std430) restrict readonly buffer ClusterData { - uint indices[]; +layout(set = 0, binding = 6, std430) buffer restrict readonly ClusterBuffer { + uint data[]; } -cluster_data; +cluster_buffer; layout(set = 0, binding = 7) uniform sampler linear_sampler; @@ -132,7 +144,7 @@ layout(set = 1, binding = 2) uniform texture3D sdfgi_occlusion_texture; #endif //SDFGI -layout(push_constant, binding = 0, std430) uniform Params { +layout(set = 0, binding = 14, std140) uniform Params { vec2 fog_frustum_size_begin; vec2 fog_frustum_size_end; @@ -150,7 +162,14 @@ layout(push_constant, binding = 0, std430) uniform Params { float detail_spread; float gi_inject; uint max_gi_probes; - uint pad; + uint cluster_type_size; + + vec2 screen_size; + uint cluster_shift; + uint cluster_width; + + uvec3 cluster_pad; + uint max_cluster_element_count_div_32; mat3x4 cam_rotation; } @@ -169,6 +188,31 @@ vec3 hash3f(uvec3 x) { return vec3(x & 0xFFFFF) / vec3(float(0xFFFFF)); } +float get_omni_attenuation(float distance, float inv_range, float decay) { + float nd = distance * inv_range; + nd *= nd; + nd *= nd; // nd^4 + nd = max(1.0 - nd, 0.0); + nd *= nd; // nd^2 + return nd * pow(max(distance, 0.0001), -decay); +} + +void cluster_get_item_range(uint p_offset, out uint item_min, out uint item_max, out uint item_from, out uint item_to) { + uint item_min_max = cluster_buffer.data[p_offset]; + item_min = item_min_max & 0xFFFF; + item_max = item_min_max >> 16; + ; + + item_from = item_min >> 5; + item_to = (item_max == 0) ? 0 : ((item_max - 1) >> 5) + 1; //side effect of how it is stored, as item_max 0 means no elements +} + +uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) { + int local_min = clamp(int(z_min) - int(i) * 32, 0, 31); + int mask_width = min(int(z_max) - int(z_min), 32 - local_min); + return bitfieldInsert(uint(0), uint(0xFFFFFFFF), local_min, mask_width); +} + void main() { vec3 fog_cell_size = 1.0 / vec3(params.fog_volume_size); @@ -184,6 +228,12 @@ void main() { //posf += mix(vec3(0.0),vec3(1.0),0.3) * hash3f(uvec3(pos)) * 2.0 - 1.0; vec3 fog_unit_pos = posf * fog_cell_size + fog_cell_size * 0.5; //center of voxels + + uvec2 screen_pos = uvec2(fog_unit_pos.xy * params.screen_size); + uvec2 cluster_pos = screen_pos >> params.cluster_shift; + uint cluster_offset = (params.cluster_width * cluster_pos.y + cluster_pos.x) * (params.max_cluster_element_count_div_32 + 32); + //positions in screen are too spread apart, no hopes for optimizing with subgroups + fog_unit_pos.z = pow(fog_unit_pos.z, params.detail_spread); vec3 view_pos; @@ -191,6 +241,8 @@ void main() { view_pos.z = -params.fog_frustum_end * fog_unit_pos.z; view_pos.y = -view_pos.y; + uint cluster_z = uint(clamp((abs(view_pos.z) / params.z_far) * 32.0, 0.0, 31.0)); + vec3 total_light = params.light_color; float total_density = params.base_density; @@ -257,108 +309,160 @@ void main() { //compute lights from cluster - vec3 cluster_pos; - cluster_pos.xy = fog_unit_pos.xy; - cluster_pos.z = clamp((abs(view_pos.z) - params.z_near) / (params.z_far - params.z_near), 0.0, 1.0); + { //omni lights - uvec4 cluster_cell = texture(usampler3D(cluster_texture, linear_sampler), cluster_pos); + uint cluster_omni_offset = cluster_offset; - uint omni_light_count = cluster_cell.x >> CLUSTER_COUNTER_SHIFT; - uint omni_light_pointer = cluster_cell.x & CLUSTER_POINTER_MASK; + uint item_min; + uint item_max; + uint item_from; + uint item_to; - for (uint i = 0; i < omni_light_count; i++) { - uint light_index = cluster_data.indices[omni_light_pointer + i]; + cluster_get_item_range(cluster_omni_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); - vec3 light_pos = lights.data[i].position; - float d = distance(lights.data[i].position, view_pos) * lights.data[i].inv_radius; - vec3 shadow_attenuation = vec3(1.0); +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif - if (d < 1.0) { - vec2 attenuation_energy = unpackHalf2x16(lights.data[i].attenuation_energy); - vec4 color_specular = unpackUnorm4x8(lights.data[i].color_specular); + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_omni_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif + + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + uint light_index = 32 * i + bit; - float attenuation = pow(max(1.0 - d, 0.0), attenuation_energy.x); + //if (!bool(omni_omni_lights.data[light_index].mask & draw_call.layer_mask)) { + // continue; //not masked + //} - vec3 light = attenuation_energy.y * color_specular.rgb / M_PI; + vec3 light_pos = omni_lights.data[light_index].position; + float d = distance(omni_lights.data[light_index].position, view_pos); + float shadow_attenuation = 1.0; - vec4 shadow_color_enabled = unpackUnorm4x8(lights.data[i].shadow_color_enabled); + if (d * omni_lights.data[light_index].inv_radius < 1.0) { + float attenuation = get_omni_attenuation(d, omni_lights.data[light_index].inv_radius, omni_lights.data[light_index].attenuation); - if (shadow_color_enabled.a > 0.5) { - //has shadow - vec4 v = vec4(view_pos, 1.0); + vec3 light = omni_lights.data[light_index].color / M_PI; - vec4 splane = (lights.data[i].shadow_matrix * v); - float shadow_len = length(splane.xyz); //need to remember shadow len from here + if (omni_lights.data[light_index].shadow_enabled) { + //has shadow + vec4 v = vec4(view_pos, 1.0); - splane.xyz = normalize(splane.xyz); - vec4 clamp_rect = lights.data[i].atlas_rect; + vec4 splane = (omni_lights.data[light_index].shadow_matrix * v); + float shadow_len = length(splane.xyz); //need to remember shadow len from here - if (splane.z >= 0.0) { - splane.z += 1.0; + splane.xyz = normalize(splane.xyz); + vec4 clamp_rect = omni_lights.data[light_index].atlas_rect; - clamp_rect.y += clamp_rect.w; + if (splane.z >= 0.0) { + splane.z += 1.0; - } else { - splane.z = 1.0 - splane.z; - } + clamp_rect.y += clamp_rect.w; + + } else { + splane.z = 1.0 - splane.z; + } - splane.xy /= splane.z; + splane.xy /= splane.z; - splane.xy = splane.xy * 0.5 + 0.5; - splane.z = shadow_len * lights.data[i].inv_radius; - splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; - splane.w = 1.0; //needed? i think it should be 1 already + splane.xy = splane.xy * 0.5 + 0.5; + splane.z = shadow_len * omni_lights.data[light_index].inv_radius; + splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; + splane.w = 1.0; //needed? i think it should be 1 already - float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; - float shadow = exp(min(0.0, (depth - splane.z)) / lights.data[i].inv_radius * lights.data[i].shadow_volumetric_fog_fade); + float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; - shadow_attenuation = mix(shadow_color_enabled.rgb, vec3(1.0), shadow); + shadow_attenuation = exp(min(0.0, (depth - splane.z)) / omni_lights.data[light_index].inv_radius * omni_lights.data[light_index].shadow_volumetric_fog_fade); + } + total_light += light * attenuation * shadow_attenuation; + } } - total_light += light * attenuation * shadow_attenuation; } } - uint spot_light_count = cluster_cell.y >> CLUSTER_COUNTER_SHIFT; - uint spot_light_pointer = cluster_cell.y & CLUSTER_POINTER_MASK; + { //spot lights - for (uint i = 0; i < spot_light_count; i++) { - uint light_index = cluster_data.indices[spot_light_pointer + i]; + uint cluster_spot_offset = cluster_offset + params.cluster_type_size; - vec3 light_pos = lights.data[i].position; - vec3 light_rel_vec = lights.data[i].position - view_pos; - float d = length(light_rel_vec) * lights.data[i].inv_radius; - vec3 shadow_attenuation = vec3(1.0); + uint item_min; + uint item_max; + uint item_from; + uint item_to; + + cluster_get_item_range(cluster_spot_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); + +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif + + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_spot_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif - if (d < 1.0) { - vec2 attenuation_energy = unpackHalf2x16(lights.data[i].attenuation_energy); - vec4 color_specular = unpackUnorm4x8(lights.data[i].color_specular); + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif - float attenuation = pow(max(1.0 - d, 0.0), attenuation_energy.x); + //if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) { + // continue; //not masked + //} - vec3 spot_dir = lights.data[i].direction; - vec2 spot_att_angle = unpackHalf2x16(lights.data[i].cone_attenuation_angle); - float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_att_angle.y); - float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_att_angle.y)); - attenuation *= 1.0 - pow(spot_rim, spot_att_angle.x); + uint light_index = 32 * i + bit; - vec3 light = attenuation_energy.y * color_specular.rgb / M_PI; + vec3 light_pos = omni_lights.data[light_index].position; + vec3 light_rel_vec = omni_lights.data[light_index].position - view_pos; + float d = length(light_rel_vec); + float shadow_attenuation = 1.0; - vec4 shadow_color_enabled = unpackUnorm4x8(lights.data[i].shadow_color_enabled); + if (d * omni_lights.data[light_index].inv_radius < 1.0) { + float attenuation = get_omni_attenuation(d, omni_lights.data[light_index].inv_radius, omni_lights.data[light_index].attenuation); - if (shadow_color_enabled.a > 0.5) { - //has shadow - vec4 v = vec4(view_pos, 1.0); + vec3 spot_dir = omni_lights.data[light_index].direction; + float scos = max(dot(-normalize(light_rel_vec), spot_dir), omni_lights.data[light_index].cone_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - omni_lights.data[light_index].cone_angle)); + attenuation *= 1.0 - pow(spot_rim, omni_lights.data[light_index].cone_attenuation); - vec4 splane = (lights.data[i].shadow_matrix * v); - splane /= splane.w; + vec3 light = omni_lights.data[light_index].color / M_PI; - float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; - float shadow = exp(min(0.0, (depth - splane.z)) / lights.data[i].inv_radius * lights.data[i].shadow_volumetric_fog_fade); + if (omni_lights.data[light_index].shadow_enabled) { + //has shadow + vec4 v = vec4(view_pos, 1.0); - shadow_attenuation = mix(shadow_color_enabled.rgb, vec3(1.0), shadow); - } + vec4 splane = (omni_lights.data[light_index].shadow_matrix * v); + splane /= splane.w; - total_light += light * attenuation * shadow_attenuation; + float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; + + shadow_attenuation = exp(min(0.0, (depth - splane.z)) / omni_lights.data[light_index].inv_radius * omni_lights.data[light_index].shadow_volumetric_fog_fade); + } + + total_light += light * attenuation * shadow_attenuation; + } + } } } |