diff options
Diffstat (limited to 'servers/rendering/renderer_rd/shaders')
3 files changed, 255 insertions, 176 deletions
diff --git a/servers/rendering/renderer_rd/shaders/scene_forward.glsl b/servers/rendering/renderer_rd/shaders/scene_forward.glsl index ea203c8abe..4286ef462c 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward.glsl @@ -89,12 +89,6 @@ MATERIAL_UNIFORMS } material; #endif -/* clang-format off */ - -VERTEX_SHADER_GLOBALS - -/* clang-format on */ - invariant gl_Position; #ifdef MODE_DUAL_PARABOLOID @@ -103,28 +97,43 @@ layout(location = 8) out float dp_clip; #endif +layout(location = 9) out flat uint instance_index; + +/* clang-format off */ + +VERTEX_SHADER_GLOBALS + +/* clang-format on */ + void main() { vec4 instance_custom = vec4(0.0); #if defined(COLOR_USED) color_interp = color_attrib; #endif - mat4 world_matrix = draw_call.transform; + uint instance_index = draw_call.instance_index; + + bool is_multimesh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH); + if (!is_multimesh) { + instance_index += gl_InstanceIndex; + } + + mat4 world_matrix = instances.data[instance_index].transform; mat3 world_normal_matrix; - if (bool(draw_call.flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) { world_normal_matrix = inverse(mat3(world_matrix)); } else { world_normal_matrix = mat3(world_matrix); } - if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH)) { + if (is_multimesh) { //multimesh, instances are for it - uint offset = (draw_call.flags >> INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT) & INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK; + uint offset = (instances.data[instance_index].flags >> INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT) & INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK; offset *= gl_InstanceIndex; mat4 matrix; - if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) { matrix = mat4(transforms.data[offset + 0], transforms.data[offset + 1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); offset += 2; } else { @@ -132,14 +141,14 @@ void main() { offset += 3; } - if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_HAS_COLOR)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_HAS_COLOR)) { #ifdef COLOR_USED color_interp *= transforms.data[offset]; #endif offset += 1; } - if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA)) { instance_custom = transforms.data[offset]; } @@ -161,7 +170,7 @@ void main() { #endif #if 0 - if (bool(draw_call.flags & INSTANCE_FLAGS_SKELETON)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_SKELETON)) { //multimesh, instances are for it uvec2 bones_01 = uvec2(bone_attrib.x & 0xFFFF, bone_attrib.x >> 16) * 3; @@ -304,7 +313,8 @@ VERTEX_SHADER_CODE #endif #ifdef MODE_RENDER_MATERIAL if (scene_data.material_uv2_mode) { - gl_Position.xy = (uv2_attrib.xy + draw_call.lightmap_uv_scale.xy) * 2.0 - 1.0; + vec2 uv_offset = unpackHalf2x16(draw_call.uv_offset); + gl_Position.xy = (uv2_attrib.xy + uv_offset) * 2.0 - 1.0; gl_Position.z = 0.00001; gl_Position.w = 1.0; } @@ -350,9 +360,11 @@ layout(location = 8) in float dp_clip; #endif +layout(location = 9) in flat uint instance_index; + //defines to keep compatibility with vertex -#define world_matrix draw_call.transform +#define world_matrix instances.data[instance_index].transform #define projection_matrix scene_data.projection_matrix #if defined(ENABLE_SSS) && defined(ENABLE_TRANSMITTANCE) @@ -1770,7 +1782,7 @@ vec4 fog_process(vec3 vertex) { } } - float fog_amount = 1.0 - exp(vertex.z * scene_data.fog_density); + float fog_amount = 1.0 - exp(min(0.0, vertex.z * scene_data.fog_density)); if (abs(scene_data.fog_height_density) > 0.001) { float y = (scene_data.camera_matrix * vec4(vertex, 1.0)).y; @@ -2083,7 +2095,7 @@ FRAGMENT_SHADER_CODE #endif uint decal_index = 32 * i + bit; - if (!bool(decals.data[decal_index].mask & draw_call.layer_mask)) { + if (!bool(decals.data[decal_index].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -2210,8 +2222,8 @@ FRAGMENT_SHADER_CODE #ifdef USE_LIGHTMAP //lightmap - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE)) { //has lightmap capture - uint index = draw_call.gi_offset; + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE)) { //has lightmap capture + uint index = instances.data[instance_index].gi_offset; vec3 wnormal = mat3(scene_data.camera_matrix) * normal; const float c1 = 0.429043; @@ -2230,12 +2242,12 @@ FRAGMENT_SHADER_CODE 2.0 * c2 * lightmap_captures.data[index].sh[1].rgb * wnormal.y + 2.0 * c2 * lightmap_captures.data[index].sh[2].rgb * wnormal.z); - } else if (bool(draw_call.flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap - bool uses_sh = bool(draw_call.flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP); - uint ofs = draw_call.gi_offset & 0xFFFF; + } else if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap + bool uses_sh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP); + uint ofs = instances.data[instance_index].gi_offset & 0xFFFF; vec3 uvw; - uvw.xy = uv2 * draw_call.lightmap_uv_scale.zw + draw_call.lightmap_uv_scale.xy; - uvw.z = float((draw_call.gi_offset >> 16) & 0xFFFF); + uvw.xy = uv2 * instances.data[instance_index].lightmap_uv_scale.zw + instances.data[instance_index].lightmap_uv_scale.xy; + uvw.z = float((instances.data[instance_index].gi_offset >> 16) & 0xFFFF); if (uses_sh) { uvw.z *= 4.0; //SH textures use 4 times more data @@ -2244,7 +2256,7 @@ FRAGMENT_SHADER_CODE vec3 lm_light_l1_0 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 2.0), 0.0).rgb; vec3 lm_light_l1p1 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 3.0), 0.0).rgb; - uint idx = draw_call.gi_offset >> 20; + uint idx = instances.data[instance_index].gi_offset >> 20; vec3 n = normalize(lightmaps.data[idx].normal_xform * normal); ambient_light += lm_light_l0 * 0.282095f; @@ -2264,7 +2276,7 @@ FRAGMENT_SHADER_CODE } #elif defined(USE_FORWARD_GI) - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture //make vertex orientation the world one, but still align to camera vec3 cam_pos = mat3(scene_data.camera_matrix) * vertex; @@ -2336,9 +2348,9 @@ FRAGMENT_SHADER_CODE } } - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes - uint index1 = draw_call.gi_offset & 0xFFFF; + uint index1 = instances.data[instance_index].gi_offset & 0xFFFF; vec3 ref_vec = normalize(reflect(normalize(vertex), normal)); //find arbitrary tangent and bitangent, then build a matrix vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); @@ -2350,7 +2362,7 @@ FRAGMENT_SHADER_CODE vec4 spec_accum = vec4(0.0); gi_probe_compute(index1, vertex, normal, ref_vec, normal_mat, roughness * roughness, ambient_light, specular_light, spec_accum, amb_accum); - uint index2 = draw_call.gi_offset >> 16; + uint index2 = instances.data[instance_index].gi_offset >> 16; if (index2 != 0xFFFF) { gi_probe_compute(index2, vertex, normal, ref_vec, normal_mat, roughness * roughness, ambient_light, specular_light, spec_accum, amb_accum); @@ -2369,7 +2381,7 @@ FRAGMENT_SHADER_CODE } #elif !defined(LOW_END_MODE) - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers vec2 coord; @@ -2448,7 +2460,7 @@ FRAGMENT_SHADER_CODE #endif uint reflection_index = 32 * i + bit; - if (!bool(reflections.data[reflection_index].mask & draw_call.layer_mask)) { + if (!bool(reflections.data[reflection_index].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -2519,7 +2531,7 @@ FRAGMENT_SHADER_CODE break; } - if (!bool(directional_lights.data[i].mask & draw_call.layer_mask)) { + if (!bool(directional_lights.data[i].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -2838,7 +2850,7 @@ FRAGMENT_SHADER_CODE break; } - if (!bool(directional_lights.data[i].mask & draw_call.layer_mask)) { + if (!bool(directional_lights.data[i].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -2968,7 +2980,7 @@ FRAGMENT_SHADER_CODE #endif uint light_index = 32 * i + bit; - if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) { + if (!bool(omni_lights.data[light_index].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -3041,7 +3053,7 @@ FRAGMENT_SHADER_CODE uint light_index = 32 * i + bit; - if (!bool(spot_lights.data[light_index].mask & draw_call.layer_mask)) { + if (!bool(spot_lights.data[light_index].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -3214,9 +3226,9 @@ FRAGMENT_SHADER_CODE normal_roughness_output_buffer = vec4(normal * 0.5 + 0.5, roughness); #ifdef MODE_RENDER_GIPROBE - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes - uint index1 = draw_call.gi_offset & 0xFFFF; - uint index2 = draw_call.gi_offset >> 16; + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes + uint index1 = instances.data[instance_index].gi_offset & 0xFFFF; + uint index2 = instances.data[instance_index].gi_offset >> 16; giprobe_buffer.x = index1 & 0xFF; giprobe_buffer.y = index2 & 0xFF; } else { @@ -3275,6 +3287,7 @@ FRAGMENT_SHADER_CODE // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky. frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); + ; #endif //MODE_MULTIPLE_RENDER_TARGETS diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl index e9b79e1560..d78890fa9e 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl @@ -21,12 +21,10 @@ #endif layout(push_constant, binding = 0, std430) uniform DrawCall { - mat4 transform; - uint flags; - uint instance_uniforms_ofs; //base offset in global buffer for instance variables - uint gi_offset; //GI information when using lightmapping (VCT or lightmap index) - uint layer_mask; - vec4 lightmap_uv_scale; + uint instance_index; + uint uv_offset; + uint pad0; + uint pad1; } draw_call; @@ -45,96 +43,13 @@ draw_call; #define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_REPEAT 10 #define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_REPEAT 11 -layout(set = 0, binding = 1) uniform sampler material_samplers[12]; - -layout(set = 0, binding = 2) uniform sampler shadow_sampler; - #define SDFGI_MAX_CASCADES 8 -layout(set = 0, binding = 3, std140) uniform SceneData { - mat4 projection_matrix; - mat4 inv_projection_matrix; - - mat4 camera_matrix; - mat4 inv_camera_matrix; - - vec2 viewport_size; - vec2 screen_pixel_size; - - uint cluster_shift; - uint cluster_width; - uint cluster_type_size; - uint max_cluster_element_count_div_32; - - //use vec4s because std140 doesnt play nice with vec2s, z and w are wasted - vec4 directional_penumbra_shadow_kernel[32]; - vec4 directional_soft_shadow_kernel[32]; - vec4 penumbra_shadow_kernel[32]; - vec4 soft_shadow_kernel[32]; - - uint directional_penumbra_shadow_samples; - uint directional_soft_shadow_samples; - uint penumbra_shadow_samples; - uint soft_shadow_samples; - - vec4 ambient_light_color_energy; +/* Set 1: Base Pass (never changes) */ - float ambient_color_sky_mix; - bool use_ambient_light; - bool use_ambient_cubemap; - bool use_reflection_cubemap; - - mat3 radiance_inverse_xform; - - vec2 shadow_atlas_pixel_size; - vec2 directional_shadow_pixel_size; - - uint directional_light_count; - float dual_paraboloid_side; - float z_far; - float z_near; - - bool ssao_enabled; - float ssao_light_affect; - float ssao_ao_affect; - bool roughness_limiter_enabled; - - float roughness_limiter_amount; - float roughness_limiter_limit; - uvec2 roughness_limiter_pad; - - vec4 ao_color; - - mat4 sdf_to_bounds; - - ivec3 sdf_offset; - bool material_uv2_mode; - - ivec3 sdf_size; - bool gi_upscale_for_msaa; - - bool volumetric_fog_enabled; - float volumetric_fog_inv_length; - float volumetric_fog_detail_spread; - uint volumetric_fog_pad; - - bool fog_enabled; - float fog_density; - float fog_height; - float fog_height_density; - - vec3 fog_light_color; - float fog_sun_scatter; - - float fog_aerial_perspective; - - float time; - float reflection_multiplier; // one normally, zero when rendering reflections - - bool pancake_shadows; -} +layout(set = 0, binding = 1) uniform sampler material_samplers[12]; -scene_data; +layout(set = 0, binding = 2) uniform sampler shadow_sampler; #define INSTANCE_FLAGS_USE_GI_BUFFERS (1 << 6) #define INSTANCE_FLAGS_USE_SDFGI (1 << 7) @@ -153,22 +68,22 @@ scene_data; #define INSTANCE_FLAGS_SKELETON (1 << 19) #define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 20) -layout(set = 0, binding = 4, std430) restrict readonly buffer OmniLights { +layout(set = 0, binding = 3, std430) restrict readonly buffer OmniLights { LightData data[]; } omni_lights; -layout(set = 0, binding = 5, std430) restrict readonly buffer SpotLights { +layout(set = 0, binding = 4, std430) restrict readonly buffer SpotLights { LightData data[]; } spot_lights; -layout(set = 0, binding = 6) buffer restrict readonly ReflectionProbeData { +layout(set = 0, binding = 5) buffer restrict readonly ReflectionProbeData { ReflectionData data[]; } reflections; -layout(set = 0, binding = 7, std140) uniform DirectionalLights { +layout(set = 0, binding = 6, std140) uniform DirectionalLights { DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; } directional_lights; @@ -180,7 +95,7 @@ struct Lightmap { mat3 normal_xform; }; -layout(set = 0, binding = 8, std140) restrict readonly buffer Lightmaps { +layout(set = 0, binding = 7, std140) restrict readonly buffer Lightmaps { Lightmap data[]; } lightmaps; @@ -189,20 +104,20 @@ struct LightmapCapture { vec4 sh[9]; }; -layout(set = 0, binding = 9, std140) restrict readonly buffer LightmapCaptures { +layout(set = 0, binding = 8, std140) restrict readonly buffer LightmapCaptures { LightmapCapture data[]; } lightmap_captures; -layout(set = 0, binding = 10) uniform texture2D decal_atlas; -layout(set = 0, binding = 11) uniform texture2D decal_atlas_srgb; +layout(set = 0, binding = 9) uniform texture2D decal_atlas; +layout(set = 0, binding = 10) uniform texture2D decal_atlas_srgb; -layout(set = 0, binding = 12, std430) restrict readonly buffer Decals { +layout(set = 0, binding = 11, std430) restrict readonly buffer Decals { DecalData data[]; } decals; -layout(set = 0, binding = 13, std430) restrict readonly buffer GlobalVariableData { +layout(set = 0, binding = 12, std430) restrict readonly buffer GlobalVariableData { vec4 data[]; } global_variables; @@ -216,7 +131,7 @@ struct SDFGIProbeCascadeData { float to_cell; // 1/bounds * grid_size }; -layout(set = 0, binding = 14, std140) uniform SDFGI { +layout(set = 0, binding = 13, std140) uniform SDFGI { vec3 grid_size; uint max_cascades; @@ -246,47 +161,140 @@ sdfgi; #endif //LOW_END_MODE -// decal atlas +/* Set 2: Render Pass (changes per render pass) */ -/* Set 1, Radiance */ +layout(set = 1, binding = 0, std140) uniform SceneData { + mat4 projection_matrix; + mat4 inv_projection_matrix; + + mat4 camera_matrix; + mat4 inv_camera_matrix; + + vec2 viewport_size; + vec2 screen_pixel_size; + + uint cluster_shift; + uint cluster_width; + uint cluster_type_size; + uint max_cluster_element_count_div_32; + + //use vec4s because std140 doesnt play nice with vec2s, z and w are wasted + vec4 directional_penumbra_shadow_kernel[32]; + vec4 directional_soft_shadow_kernel[32]; + vec4 penumbra_shadow_kernel[32]; + vec4 soft_shadow_kernel[32]; + + uint directional_penumbra_shadow_samples; + uint directional_soft_shadow_samples; + uint penumbra_shadow_samples; + uint soft_shadow_samples; + + vec4 ambient_light_color_energy; + + float ambient_color_sky_mix; + bool use_ambient_light; + bool use_ambient_cubemap; + bool use_reflection_cubemap; + + mat3 radiance_inverse_xform; + + vec2 shadow_atlas_pixel_size; + vec2 directional_shadow_pixel_size; + + uint directional_light_count; + float dual_paraboloid_side; + float z_far; + float z_near; + + bool ssao_enabled; + float ssao_light_affect; + float ssao_ao_affect; + bool roughness_limiter_enabled; + + float roughness_limiter_amount; + float roughness_limiter_limit; + uvec2 roughness_limiter_pad; + + vec4 ao_color; + + mat4 sdf_to_bounds; + + ivec3 sdf_offset; + bool material_uv2_mode; + + ivec3 sdf_size; + bool gi_upscale_for_msaa; + + bool volumetric_fog_enabled; + float volumetric_fog_inv_length; + float volumetric_fog_detail_spread; + uint volumetric_fog_pad; + + bool fog_enabled; + float fog_density; + float fog_height; + float fog_height_density; + + vec3 fog_light_color; + float fog_sun_scatter; + + float fog_aerial_perspective; + + float time; + float reflection_multiplier; // one normally, zero when rendering reflections + + bool pancake_shadows; +} + +scene_data; + +struct InstanceData { + mat4 transform; + uint flags; + uint instance_uniforms_ofs; //base offset in global buffer for instance variables + uint gi_offset; //GI information when using lightmapping (VCT or lightmap index) + uint layer_mask; + vec4 lightmap_uv_scale; +}; + +layout(set = 1, binding = 1, std430) buffer restrict readonly InstanceDataBuffer { + InstanceData data[]; +} +instances; #ifdef USE_RADIANCE_CUBEMAP_ARRAY -layout(set = 1, binding = 0) uniform textureCubeArray radiance_cubemap; +layout(set = 1, binding = 2) uniform textureCubeArray radiance_cubemap; #else -layout(set = 1, binding = 0) uniform textureCube radiance_cubemap; +layout(set = 1, binding = 2) uniform textureCube radiance_cubemap; #endif -/* Set 2, Reflection and Shadow Atlases (view dependent) */ - -layout(set = 1, binding = 1) uniform textureCubeArray reflection_atlas; +layout(set = 1, binding = 3) uniform textureCubeArray reflection_atlas; -layout(set = 1, binding = 2) uniform texture2D shadow_atlas; +layout(set = 1, binding = 4) uniform texture2D shadow_atlas; -layout(set = 1, binding = 3) uniform texture2D directional_shadow_atlas; +layout(set = 1, binding = 5) uniform texture2D directional_shadow_atlas; -layout(set = 1, binding = 4) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES]; +layout(set = 1, binding = 6) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES]; -#ifndef LOW_END_MODE -layout(set = 1, binding = 5) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; +#ifndef LOW_END_MOD +layout(set = 1, binding = 7) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; #endif -layout(set = 1, binding = 6, std430) buffer restrict readonly ClusterBuffer { +layout(set = 1, binding = 8, std430) buffer restrict readonly ClusterBuffer { uint data[]; } cluster_buffer; -/* Set 3, Render Buffers */ - #ifdef MODE_RENDER_SDF -layout(r16ui, set = 1, binding = 7) uniform restrict writeonly uimage3D albedo_volume_grid; -layout(r32ui, set = 1, binding = 8) uniform restrict writeonly uimage3D emission_grid; -layout(r32ui, set = 1, binding = 9) uniform restrict writeonly uimage3D emission_aniso_grid; -layout(r32ui, set = 1, binding = 10) uniform restrict uimage3D geom_facing_grid; +layout(r16ui, set = 1, binding = 9) uniform restrict writeonly uimage3D albedo_volume_grid; +layout(r32ui, set = 1, binding = 10) uniform restrict writeonly uimage3D emission_grid; +layout(r32ui, set = 1, binding = 11) uniform restrict writeonly uimage3D emission_aniso_grid; +layout(r32ui, set = 1, binding = 12) uniform restrict uimage3D geom_facing_grid; //still need to be present for shaders that use it, so remap them to something #define depth_buffer shadow_atlas @@ -295,17 +303,17 @@ layout(r32ui, set = 1, binding = 10) uniform restrict uimage3D geom_facing_grid; #else -layout(set = 1, binding = 7) uniform texture2D depth_buffer; -layout(set = 1, binding = 8) uniform texture2D color_buffer; +layout(set = 1, binding = 9) uniform texture2D depth_buffer; +layout(set = 1, binding = 10) uniform texture2D color_buffer; #ifndef LOW_END_MODE -layout(set = 1, binding = 9) uniform texture2D normal_roughness_buffer; -layout(set = 1, binding = 10) uniform texture2D ao_buffer; -layout(set = 1, binding = 11) uniform texture2D ambient_buffer; -layout(set = 1, binding = 12) uniform texture2D reflection_buffer; -layout(set = 1, binding = 13) uniform texture2DArray sdfgi_lightprobe_texture; -layout(set = 1, binding = 14) uniform texture3D sdfgi_occlusion_cascades; +layout(set = 1, binding = 11) uniform texture2D normal_roughness_buffer; +layout(set = 1, binding = 12) uniform texture2D ao_buffer; +layout(set = 1, binding = 13) uniform texture2D ambient_buffer; +layout(set = 1, binding = 14) uniform texture2D reflection_buffer; +layout(set = 1, binding = 15) uniform texture2DArray sdfgi_lightprobe_texture; +layout(set = 1, binding = 16) uniform texture3D sdfgi_occlusion_cascades; struct GIProbeData { mat4 xform; @@ -323,22 +331,22 @@ struct GIProbeData { uint mipmaps; }; -layout(set = 1, binding = 15, std140) uniform GIProbes { +layout(set = 1, binding = 17, std140) uniform GIProbes { GIProbeData data[MAX_GI_PROBES]; } gi_probes; -layout(set = 1, binding = 16) uniform texture3D volumetric_fog_texture; +layout(set = 1, binding = 18) uniform texture3D volumetric_fog_texture; #endif // LOW_END_MODE #endif -/* Set 4 Skeleton & Instancing (Multimesh) */ +/* Set 2 Skeleton & Instancing (can change per item) */ layout(set = 2, binding = 0, std430) restrict readonly buffer Transforms { vec4 data[]; } transforms; -/* Set 5 User Material */ +/* Set 3 User Material */ diff --git a/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl b/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl index 29443ae7db..a29b24e560 100644 --- a/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl +++ b/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl @@ -6,8 +6,20 @@ VERSION_DEFINES #define BLOCK_SIZE 8 +#ifdef MODE_REDUCE_SUBGROUP + +#extension GL_KHR_shader_subgroup_ballot : enable +#extension GL_KHR_shader_subgroup_arithmetic : enable + +//nvidia friendly, max 32 +layout(local_size_x = 8, local_size_y = 4, local_size_z = 1) in; + +#else + layout(local_size_x = BLOCK_SIZE, local_size_y = BLOCK_SIZE, local_size_z = 1) in; +#endif + #ifdef MODE_REDUCE shared float tmp_data[BLOCK_SIZE * BLOCK_SIZE]; @@ -16,8 +28,12 @@ const uint unswizzle_table[BLOCK_SIZE] = uint[](0, 0, 0, 1, 0, 2, 1, 3); #endif -layout(r32f, set = 0, binding = 0) uniform restrict readonly image2D source_depth; -layout(r32f, set = 0, binding = 1) uniform restrict writeonly image2D dst_depth; +#if defined(MODE_REDUCE) || defined(MODE_REDUCE_SUBGROUP) +layout(set = 0, binding = 0) uniform sampler2D source_depth; +#else +layout(r16, set = 0, binding = 0) uniform restrict readonly image2D source_depth; +#endif +layout(r16, set = 1, binding = 0) uniform restrict writeonly image2D dst_depth; layout(push_constant, binding = 1, std430) uniform Params { ivec2 source_size; @@ -29,6 +45,48 @@ layout(push_constant, binding = 1, std430) uniform Params { params; void main() { +#ifdef MODE_REDUCE_SUBGROUP + + uvec2 local_pos = gl_LocalInvocationID.xy; + ivec2 image_offset = params.source_offset; + ivec2 image_pos = image_offset + ivec2(gl_GlobalInvocationID.xy * ivec2(1, 2)); + + float depth = texelFetch(source_depth, min(image_pos, params.source_size - ivec2(1)), 0).r; + depth += texelFetch(source_depth, min(image_pos + ivec2(0, 1), params.source_size - ivec2(1)), 0).r; + depth *= 0.5; + +#ifdef MODE_REDUCE_8 + //fast version, reduce all + float depth_average = subgroupAdd(depth) / 32.0; + if (local_pos == uvec2(0)) { + imageStore(dst_depth, image_pos / 8, vec4(depth_average)); + } +#else + //bit slower version, reduce by regions + uint group_size = (8 / params.min_size); + uvec2 group_id = local_pos / (8 / params.min_size); + + uvec4 mask; + float depth_average = 0; + + while (true) { + uvec2 first = subgroupBroadcastFirst(group_id); + mask = subgroupBallot(first == group_id); + if (first == group_id) { + depth_average = subgroupAdd(depth); + break; + } + } + + depth_average /= float(group_size * group_size); + + if (local_pos == group_id) { + imageStore(dst_depth, image_pos / int(group_size), vec4(depth_average)); + } +#endif + +#endif + #ifdef MODE_REDUCE uvec2 pos = gl_LocalInvocationID.xy; @@ -36,7 +94,7 @@ void main() { ivec2 image_offset = params.source_offset; ivec2 image_pos = image_offset + ivec2(gl_GlobalInvocationID.xy); uint dst_t = swizzle_table[pos.y] * BLOCK_SIZE + swizzle_table[pos.x]; - tmp_data[dst_t] = imageLoad(source_depth, min(image_pos, params.source_size - ivec2(1))).r; + tmp_data[dst_t] = texelFetch(source_depth, min(image_pos, params.source_size - ivec2(1)), 0).r; ivec2 image_size = params.source_size; uint t = pos.y * BLOCK_SIZE + pos.x; |