diff options
Diffstat (limited to 'servers/rendering/rasterizer_rd/shaders')
14 files changed, 1491 insertions, 162 deletions
diff --git a/servers/rendering/rasterizer_rd/shaders/SCsub b/servers/rendering/rasterizer_rd/shaders/SCsub index 67f4edc626..9d531d63ad 100644 --- a/servers/rendering/rasterizer_rd/shaders/SCsub +++ b/servers/rendering/rasterizer_rd/shaders/SCsub @@ -35,3 +35,8 @@ if "RD_GLSL" in env["BUILDERS"]: env.RD_GLSL("sdfgi_direct_light.glsl") env.RD_GLSL("sdfgi_debug.glsl") env.RD_GLSL("sdfgi_debug_probes.glsl") + env.RD_GLSL("volumetric_fog.glsl") + env.RD_GLSL("shadow_reduce.glsl") + env.RD_GLSL("particles.glsl") + env.RD_GLSL("particles_copy.glsl") + env.RD_GLSL("sort.glsl") diff --git a/servers/rendering/rasterizer_rd/shaders/cluster_data_inc.glsl b/servers/rendering/rasterizer_rd/shaders/cluster_data_inc.glsl new file mode 100644 index 0000000000..e723468dd8 --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/cluster_data_inc.glsl @@ -0,0 +1,95 @@ + +#define CLUSTER_COUNTER_SHIFT 20 +#define CLUSTER_POINTER_MASK ((1 << CLUSTER_COUNTER_SHIFT) - 1) +#define CLUSTER_COUNTER_MASK 0xfff + +struct LightData { //this structure needs to be as packed as possible + vec3 position; + float inv_radius; + vec3 direction; + float size; + uint attenuation_energy; //attenuation + uint color_specular; //rgb color, a specular (8 bit unorm) + uint cone_attenuation_angle; // attenuation and angle, (16bit float) + uint shadow_color_enabled; //shadow rgb color, a>0.5 enabled (8bit unorm) + vec4 atlas_rect; // rect in the shadow atlas + mat4 shadow_matrix; + float shadow_bias; + float shadow_normal_bias; + float transmittance_bias; + float soft_shadow_size; // for spot, it's the size in uv coordinates of the light, for omni it's the span angle + float soft_shadow_scale; // scales the shadow kernel for blurrier shadows + uint mask; + float shadow_volumetric_fog_fade; + uint pad; + vec4 projector_rect; //projector rect in srgb decal atlas +}; + +#define REFLECTION_AMBIENT_DISABLED 0 +#define REFLECTION_AMBIENT_ENVIRONMENT 1 +#define REFLECTION_AMBIENT_COLOR 2 + +struct ReflectionData { + vec3 box_extents; + float index; + vec3 box_offset; + uint mask; + vec4 params; // intensity, 0, interior , boxproject + vec3 ambient; // ambient color + uint ambient_mode; + mat4 local_matrix; // up to here for spot and omni, rest is for directional + // notes: for ambientblend, use distance to edge to blend between already existing global environment +}; + +struct DirectionalLightData { + vec3 direction; + float energy; + vec3 color; + float size; + float specular; + uint mask; + float softshadow_angle; + float soft_shadow_scale; + bool blend_splits; + bool shadow_enabled; + float fade_from; + float fade_to; + uvec3 pad; + float shadow_volumetric_fog_fade; + vec4 shadow_bias; + vec4 shadow_normal_bias; + vec4 shadow_transmittance_bias; + vec4 shadow_z_range; + vec4 shadow_range_begin; + vec4 shadow_split_offsets; + mat4 shadow_matrix1; + mat4 shadow_matrix2; + mat4 shadow_matrix3; + mat4 shadow_matrix4; + vec4 shadow_color1; + vec4 shadow_color2; + vec4 shadow_color3; + vec4 shadow_color4; + vec2 uv_scale1; + vec2 uv_scale2; + vec2 uv_scale3; + vec2 uv_scale4; +}; + +struct DecalData { + mat4 xform; //to decal transform + vec3 inv_extents; + float albedo_mix; + vec4 albedo_rect; + vec4 normal_rect; + vec4 orm_rect; + vec4 emission_rect; + vec4 modulate; + float emission_energy; + uint mask; + float upper_fade; + float lower_fade; + mat3x4 normal_xform; + vec3 normal; + float normal_fade; +}; diff --git a/servers/rendering/rasterizer_rd/shaders/copy.glsl b/servers/rendering/rasterizer_rd/shaders/copy.glsl index eb39c28fa9..e565bd8e3d 100644 --- a/servers/rendering/rasterizer_rd/shaders/copy.glsl +++ b/servers/rendering/rasterizer_rd/shaders/copy.glsl @@ -14,6 +14,7 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; #define FLAG_FLIP_Y (1 << 5) #define FLAG_FORCE_LUMINANCE (1 << 6) #define FLAG_COPY_ALL_SOURCE (1 << 7) +#define FLAG_HIGH_QUALITY_GLOW (1 << 8) layout(push_constant, binding = 1, std430) uniform Params { ivec4 section; @@ -116,17 +117,42 @@ void main() { vec4 color = vec4(0.0); if (bool(params.flags & FLAG_HORIZONTAL)) { - ivec2 base_pos = (pos + params.section.xy) << 1; + ivec2 base_pos = ((pos + params.section.xy) << 1) + ivec2(1); ivec2 section_begin = params.section.xy << 1; ivec2 section_end = section_begin + (params.section.zw << 1); - GLOW_ADD(ivec2(0, 0), 0.174938); - GLOW_ADD(ivec2(1, 0), 0.165569); - GLOW_ADD(ivec2(2, 0), 0.140367); - GLOW_ADD(ivec2(3, 0), 0.106595); - GLOW_ADD(ivec2(-1, 0), 0.165569); - GLOW_ADD(ivec2(-2, 0), 0.140367); - GLOW_ADD(ivec2(-3, 0), 0.106595); + if (bool(params.flags & FLAG_HIGH_QUALITY_GLOW)) { + //Sample from two lines to capture single pixel features + GLOW_ADD(ivec2(0, 0), 0.152781); + GLOW_ADD(ivec2(1, 0), 0.144599); + GLOW_ADD(ivec2(2, 0), 0.122589); + GLOW_ADD(ivec2(3, 0), 0.093095); + GLOW_ADD(ivec2(4, 0), 0.063327); + GLOW_ADD(ivec2(-1, 0), 0.144599); + GLOW_ADD(ivec2(-2, 0), 0.122589); + GLOW_ADD(ivec2(-3, 0), 0.093095); + GLOW_ADD(ivec2(-4, 0), 0.063327); + + GLOW_ADD(ivec2(0, 1), 0.152781); + GLOW_ADD(ivec2(1, 1), 0.144599); + GLOW_ADD(ivec2(2, 1), 0.122589); + GLOW_ADD(ivec2(3, 1), 0.093095); + GLOW_ADD(ivec2(4, 1), 0.063327); + GLOW_ADD(ivec2(-1, 1), 0.144599); + GLOW_ADD(ivec2(-2, 1), 0.122589); + GLOW_ADD(ivec2(-3, 1), 0.093095); + GLOW_ADD(ivec2(-4, 1), 0.063327); + color *= 0.5; + } else { + GLOW_ADD(ivec2(0, 0), 0.174938); + GLOW_ADD(ivec2(1, 0), 0.165569); + GLOW_ADD(ivec2(2, 0), 0.140367); + GLOW_ADD(ivec2(3, 0), 0.106595); + GLOW_ADD(ivec2(-1, 0), 0.165569); + GLOW_ADD(ivec2(-2, 0), 0.140367); + GLOW_ADD(ivec2(-3, 0), 0.106595); + } + color *= params.glow_strength; } else { ivec2 base_pos = pos + params.section.xy; diff --git a/servers/rendering/rasterizer_rd/shaders/gi.glsl b/servers/rendering/rasterizer_rd/shaders/gi.glsl index a1939f75ad..8011dadc72 100644 --- a/servers/rendering/rasterizer_rd/shaders/gi.glsl +++ b/servers/rendering/rasterizer_rd/shaders/gi.glsl @@ -80,7 +80,7 @@ struct GIProbeData { float anisotropy_strength; float ambient_occlusion; float ambient_occlusion_size; - uint pad2; + uint mipmaps; }; layout(set = 0, binding = 16, std140) uniform GIProbes { diff --git a/servers/rendering/rasterizer_rd/shaders/particles.glsl b/servers/rendering/rasterizer_rd/shaders/particles.glsl new file mode 100644 index 0000000000..7cdedfcbfe --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/particles.glsl @@ -0,0 +1,262 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +#define SAMPLER_NEAREST_CLAMP 0 +#define SAMPLER_LINEAR_CLAMP 1 +#define SAMPLER_NEAREST_WITH_MIPMAPS_CLAMP 2 +#define SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP 3 +#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_CLAMP 4 +#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_CLAMP 5 +#define SAMPLER_NEAREST_REPEAT 6 +#define SAMPLER_LINEAR_REPEAT 7 +#define SAMPLER_NEAREST_WITH_MIPMAPS_REPEAT 8 +#define SAMPLER_LINEAR_WITH_MIPMAPS_REPEAT 9 +#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_REPEAT 10 +#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_REPEAT 11 + +/* SET 0: GLOBAL DATA */ + +layout(set = 0, binding = 1) uniform sampler material_samplers[12]; + +layout(set = 0, binding = 2, std430) restrict readonly buffer GlobalVariableData { + vec4 data[]; +} +global_variables; + +/* Set 1: FRAME AND PARTICLE DATA */ + +// a frame history is kept for trail deterministic behavior +struct FrameParams { + bool emitting; + float system_phase; + float prev_system_phase; + uint cycle; + + float explosiveness; + float randomness; + float time; + float delta; + + uint random_seed; + uint pad[3]; + + mat4 emission_transform; +}; + +layout(set = 1, binding = 0, std430) restrict buffer FrameHistory { + FrameParams data[]; +} +frame_history; + +struct ParticleData { + mat4 xform; + vec3 velocity; + bool is_active; + vec4 color; + vec4 custom; +}; + +layout(set = 1, binding = 1, std430) restrict buffer Particles { + ParticleData data[]; +} +particles; + +/* SET 2: MATERIAL */ + +#ifdef USE_MATERIAL_UNIFORMS +layout(set = 2, binding = 0, std140) uniform MaterialUniforms{ + /* clang-format off */ +MATERIAL_UNIFORMS + /* clang-format on */ +} material; +#endif + +layout(push_constant, binding = 0, std430) uniform Params { + float lifetime; + bool clear; + uint total_particles; + uint trail_size; + bool use_fractional_delta; + uint pad[3]; +} +params; + +uint hash(uint x) { + x = ((x >> uint(16)) ^ x) * uint(0x45d9f3b); + x = ((x >> uint(16)) ^ x) * uint(0x45d9f3b); + x = (x >> uint(16)) ^ x; + return x; +} + +/* clang-format off */ + +COMPUTE_SHADER_GLOBALS + +/* clang-format on */ + +void main() { + uint particle = gl_GlobalInvocationID.x; + + if (particle >= params.total_particles * params.trail_size) { + return; //discard + } + + uint index = particle / params.trail_size; + uint frame = (particle % params.trail_size); + +#define FRAME frame_history.data[frame] +#define PARTICLE particles.data[particle] + + bool apply_forces = true; + bool apply_velocity = true; + float local_delta = FRAME.delta; + + float mass = 1.0; + + float restart_phase = float(index) / float(params.total_particles); + + if (FRAME.randomness > 0.0) { + uint seed = FRAME.cycle; + if (restart_phase >= FRAME.system_phase) { + seed -= uint(1); + } + seed *= uint(params.total_particles); + seed += uint(index); + float random = float(hash(seed) % uint(65536)) / 65536.0; + restart_phase += FRAME.randomness * random * 1.0 / float(params.total_particles); + } + + restart_phase *= (1.0 - FRAME.explosiveness); + + bool restart = false; + + if (FRAME.system_phase > FRAME.prev_system_phase) { + // restart_phase >= prev_system_phase is used so particles emit in the first frame they are processed + + if (restart_phase >= FRAME.prev_system_phase && restart_phase < FRAME.system_phase) { + restart = true; + if (params.use_fractional_delta) { + local_delta = (FRAME.system_phase - restart_phase) * params.lifetime; + } + } + + } else if (FRAME.delta > 0.0) { + if (restart_phase >= FRAME.prev_system_phase) { + restart = true; + if (params.use_fractional_delta) { + local_delta = (1.0 - restart_phase + FRAME.system_phase) * params.lifetime; + } + + } else if (restart_phase < FRAME.system_phase) { + restart = true; + if (params.use_fractional_delta) { + local_delta = (FRAME.system_phase - restart_phase) * params.lifetime; + } + } + } + + uint current_cycle = FRAME.cycle; + + if (FRAME.system_phase < restart_phase) { + current_cycle -= uint(1); + } + + uint particle_number = current_cycle * uint(params.total_particles) + particle; + + if (restart) { + PARTICLE.is_active = FRAME.emitting; + } + +#ifdef ENABLE_KEEP_DATA + if (params.clear) { +#else + if (params.clear || restart) { +#endif + PARTICLE.color = vec4(1.0); + PARTICLE.custom = vec4(0.0); + PARTICLE.velocity = vec3(0.0); + if (!restart) { + PARTICLE.is_active = false; + } + PARTICLE.xform = mat4( + vec4(1.0, 0.0, 0.0, 0.0), + vec4(0.0, 1.0, 0.0, 0.0), + vec4(0.0, 0.0, 1.0, 0.0), + vec4(0.0, 0.0, 0.0, 1.0)); + } + + if (PARTICLE.is_active) { + /* clang-format off */ + +COMPUTE_SHADER_CODE + + /* clang-format on */ + } + +#if !defined(DISABLE_VELOCITY) + + if (PARTICLE.is_active) { + PARTICLE.xform[3].xyz += PARTICLE.velocity * local_delta; + } +#endif + +#if 0 + if (PARTICLE.is_active) { + //execute shader + + + + + //!defined(DISABLE_FORCE) + + if (false) { + vec3 force = vec3(0.0); + for (int i = 0; i < attractor_count; i++) { + vec3 rel_vec = xform[3].xyz - attractors[i].pos; + float dist = length(rel_vec); + if (attractors[i].radius < dist) + continue; + if (attractors[i].eat_radius > 0.0 && attractors[i].eat_radius > dist) { + out_velocity_active.a = 0.0; + } + + rel_vec = normalize(rel_vec); + + float attenuation = pow(dist / attractors[i].radius, attractors[i].attenuation); + + if (attractors[i].dir == vec3(0.0)) { + //towards center + force += attractors[i].strength * rel_vec * attenuation * mass; + } else { + force += attractors[i].strength * attractors[i].dir * attenuation * mass; + } + } + + out_velocity_active.xyz += force * local_delta; + } + +#if !defined(DISABLE_VELOCITY) + + if (true) { + xform[3].xyz += out_velocity_active.xyz * local_delta; + } +#endif + } else { + xform = mat4(0.0); + } + + + xform = transpose(xform); + + out_velocity_active.a = mix(0.0, 1.0, shader_active); + + out_xform_1 = xform[0]; + out_xform_2 = xform[1]; + out_xform_3 = xform[2]; +#endif +} diff --git a/servers/rendering/rasterizer_rd/shaders/particles_copy.glsl b/servers/rendering/rasterizer_rd/shaders/particles_copy.glsl new file mode 100644 index 0000000000..6c782b6045 --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/particles_copy.glsl @@ -0,0 +1,82 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +struct ParticleData { + mat4 xform; + vec3 velocity; + bool is_active; + vec4 color; + vec4 custom; +}; + +layout(set = 0, binding = 1, std430) restrict readonly buffer Particles { + ParticleData data[]; +} +particles; + +layout(set = 0, binding = 2, std430) restrict writeonly buffer Transforms { + vec4 data[]; +} +instances; + +#ifdef USE_SORT_BUFFER + +layout(set = 1, binding = 0, std430) restrict buffer SortBuffer { + vec2 data[]; +} +sort_buffer; + +#endif // USE_SORT_BUFFER + +layout(push_constant, binding = 0, std430) uniform Params { + vec3 sort_direction; + uint total_particles; +} +params; + +void main() { +#ifdef MODE_FILL_SORT_BUFFER + + uint particle = gl_GlobalInvocationID.x; + if (particle >= params.total_particles) { + return; //discard + } + + sort_buffer.data[particle].x = dot(params.sort_direction, particles.data[particle].xform[3].xyz); + sort_buffer.data[particle].y = float(particle); +#endif + +#ifdef MODE_FILL_INSTANCES + + uint particle = gl_GlobalInvocationID.x; + uint write_offset = gl_GlobalInvocationID.x * (3 + 1 + 1); //xform + color + custom + + if (particle >= params.total_particles) { + return; //discard + } + +#ifdef USE_SORT_BUFFER + particle = uint(sort_buffer.data[particle].y); //use index from sort buffer +#endif + + mat4 txform; + + if (particles.data[particle].is_active) { + txform = transpose(particles.data[particle].xform); + } else { + txform = mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); //zero scale, becomes invisible + } + + instances.data[write_offset + 0] = txform[0]; + instances.data[write_offset + 1] = txform[1]; + instances.data[write_offset + 2] = txform[2]; + instances.data[write_offset + 3] = particles.data[particle].color; + instances.data[write_offset + 4] = particles.data[particle].custom; + +#endif +} diff --git a/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl b/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl index 792a1aa05f..5993e68317 100644 --- a/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl +++ b/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl @@ -1237,7 +1237,7 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r; //reconstruct depth - shadow_z / lights.data[idx].inv_radius; + shadow_z /= lights.data[idx].inv_radius; //distance to light plane float z = dot(spot_dir, -light_rel_vec); transmittance_z = z - shadow_z; @@ -1601,6 +1601,51 @@ void sdfgi_process(uint cascade, vec3 cascade_pos, vec3 cam_pos, vec3 cam_normal #endif //!defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) +#ifndef MODE_RENDER_DEPTH + +vec4 volumetric_fog_process(vec2 screen_uv, float z) { + vec3 fog_pos = vec3(screen_uv, z * scene_data.volumetric_fog_inv_length); + if (fog_pos.z < 0.0) { + return vec4(0.0); + } else if (fog_pos.z < 1.0) { + fog_pos.z = pow(fog_pos.z, scene_data.volumetric_fog_detail_spread); + } + + return texture(sampler3D(volumetric_fog_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), fog_pos); +} + +vec4 fog_process(vec3 vertex) { + vec3 fog_color = scene_data.fog_light_color; + + if (scene_data.fog_sun_scatter > 0.001) { + vec4 sun_scatter = vec4(0.0); + float sun_total = 0.0; + vec3 view = normalize(vertex); + + for (uint i = 0; i < scene_data.directional_light_count; i++) { + vec3 light_color = directional_lights.data[i].color * directional_lights.data[i].energy; + float light_amount = pow(max(dot(view, directional_lights.data[i].direction), 0.0), 8.0); + fog_color += light_color * light_amount * scene_data.fog_sun_scatter; + } + } + + float fog_amount = 1.0 - exp(vertex.z * scene_data.fog_density); + + if (abs(scene_data.fog_height_density) > 0.001) { + float y = (scene_data.camera_matrix * vec4(vertex, 1.0)).y; + + float y_dist = scene_data.fog_height - y; + + float vfog_amount = clamp(exp(y_dist * scene_data.fog_height_density), 0.0, 1.0); + + fog_amount = max(vfog_amount, fog_amount); + } + + return vec4(fog_color, fog_amount); +} + +#endif + void main() { #ifdef MODE_DUAL_PARABOLOID @@ -2187,8 +2232,8 @@ FRAGMENT_SHADER_CODE trans_coord /= trans_coord.w; float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; - shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.x; - float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.x; + shadow_z *= directional_lights.data[i].shadow_z_range.x; + float z = trans_coord.z * directional_lights.data[i].shadow_z_range.x; transmittance_z = z - shadow_z; } @@ -2219,8 +2264,8 @@ FRAGMENT_SHADER_CODE trans_coord /= trans_coord.w; float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; - shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.y; - float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.y; + shadow_z *= directional_lights.data[i].shadow_z_range.y; + float z = trans_coord.z * directional_lights.data[i].shadow_z_range.y; transmittance_z = z - shadow_z; } @@ -2251,8 +2296,8 @@ FRAGMENT_SHADER_CODE trans_coord /= trans_coord.w; float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; - shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.z; - float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.z; + shadow_z *= directional_lights.data[i].shadow_z_range.z; + float z = trans_coord.z * directional_lights.data[i].shadow_z_range.z; transmittance_z = z - shadow_z; } @@ -2285,8 +2330,8 @@ FRAGMENT_SHADER_CODE trans_coord /= trans_coord.w; float shadow_z = textureLod(sampler2D(directional_shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), trans_coord.xy, 0.0).r; - shadow_z *= directional_lights.data[i].shadow_transmittance_z_scale.w; - float z = trans_coord.z * directional_lights.data[i].shadow_transmittance_z_scale.w; + shadow_z *= directional_lights.data[i].shadow_z_range.w; + float z = trans_coord.z * directional_lights.data[i].shadow_z_range.w; transmittance_z = z - shadow_z; } @@ -2662,8 +2707,6 @@ FRAGMENT_SHADER_CODE diffuse_light *= 1.0 - metallic; // TODO: avoid all diffuse and ambient light calculations when metallic == 1 up to this point ambient_light *= 1.0 - metallic; - //fog - #ifdef MODE_MULTIPLE_RENDER_TARGETS #ifdef MODE_UNSHADED @@ -2679,16 +2722,37 @@ FRAGMENT_SHADER_CODE specular_buffer = vec4(specular_light, metallic); #endif + if (scene_data.volumetric_fog_enabled) { + vec4 fog = volumetric_fog_process(screen_uv, -vertex.z); + diffuse_buffer.rgb = mix(diffuse_buffer.rgb, fog.rgb, fog.a); + specular_buffer.rgb = mix(specular_buffer.rgb, vec3(0.0), fog.a); + } + + if (scene_data.fog_enabled) { + vec4 fog = fog_process(vertex); + diffuse_buffer.rgb = mix(diffuse_buffer.rgb, fog.rgb, fog.a); + specular_buffer.rgb = mix(specular_buffer.rgb, vec3(0.0), fog.a); + } + #else //MODE_MULTIPLE_RENDER_TARGETS #ifdef MODE_UNSHADED frag_color = vec4(albedo, alpha); #else frag_color = vec4(emission + ambient_light + diffuse_light + specular_light, alpha); - //frag_color = vec4(1.0);;; - + //frag_color = vec4(1.0); #endif //USE_NO_SHADING + if (scene_data.volumetric_fog_enabled) { + vec4 fog = volumetric_fog_process(screen_uv, -vertex.z); + frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); + } + + if (scene_data.fog_enabled) { + vec4 fog = fog_process(vertex); + frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); + } + #endif //MODE_MULTIPLE_RENDER_TARGETS #endif //MODE_RENDER_DEPTH diff --git a/servers/rendering/rasterizer_rd/shaders/scene_high_end_inc.glsl b/servers/rendering/rasterizer_rd/shaders/scene_high_end_inc.glsl index c4dc7bd675..66bfefbe89 100644 --- a/servers/rendering/rasterizer_rd/shaders/scene_high_end_inc.glsl +++ b/servers/rendering/rasterizer_rd/shaders/scene_high_end_inc.glsl @@ -3,6 +3,8 @@ #define MAX_GI_PROBES 8 +#include "cluster_data_inc.glsl" + layout(push_constant, binding = 0, std430) uniform DrawCall { uint instance_index; uint pad; //16 bits minimum size @@ -94,40 +96,18 @@ layout(set = 0, binding = 3, std140) uniform SceneData { ivec3 sdf_size; bool gi_upscale_for_msaa; -#if 0 - vec4 ambient_light_color; - vec4 bg_color; - - vec4 fog_color_enabled; - vec4 fog_sun_color_amount; - - float ambient_energy; - float bg_energy; -#endif - -#if 0 - vec2 shadow_atlas_pixel_size; - vec2 directional_shadow_pixel_size; + bool volumetric_fog_enabled; + float volumetric_fog_inv_length; + float volumetric_fog_detail_spread; + uint volumetric_fog_pad; - float z_far; - - float subsurface_scatter_width; - float ambient_occlusion_affect_light; - float ambient_occlusion_affect_ao_channel; - float opaque_prepass_threshold; - - bool fog_depth_enabled; - float fog_depth_begin; - float fog_depth_end; + bool fog_enabled; float fog_density; - float fog_depth_curve; - bool fog_transmit_enabled; - float fog_transmit_curve; - bool fog_height_enabled; - float fog_height_min; - float fog_height_max; - float fog_height_curve; -#endif + float fog_height; + float fog_height_density; + + vec3 fog_light_color; + float fog_sun_scatter; } scene_data; @@ -163,86 +143,16 @@ layout(set = 0, binding = 4, std430) restrict readonly buffer Instances { } instances; -struct LightData { //this structure needs to be as packed as possible - vec3 position; - float inv_radius; - vec3 direction; - float size; - uint attenuation_energy; //attenuation - uint color_specular; //rgb color, a specular (8 bit unorm) - uint cone_attenuation_angle; // attenuation and angle, (16bit float) - uint shadow_color_enabled; //shadow rgb color, a>0.5 enabled (8bit unorm) - vec4 atlas_rect; // rect in the shadow atlas - mat4 shadow_matrix; - float shadow_bias; - float shadow_normal_bias; - float transmittance_bias; - float soft_shadow_size; // for spot, it's the size in uv coordinates of the light, for omni it's the span angle - float soft_shadow_scale; // scales the shadow kernel for blurrier shadows - uint mask; - uint pad[2]; - vec4 projector_rect; //projector rect in srgb decal atlas -}; - layout(set = 0, binding = 5, std430) restrict readonly buffer Lights { LightData data[]; } lights; -#define REFLECTION_AMBIENT_DISABLED 0 -#define REFLECTION_AMBIENT_ENVIRONMENT 1 -#define REFLECTION_AMBIENT_COLOR 2 - -struct ReflectionData { - vec3 box_extents; - float index; - vec3 box_offset; - uint mask; - vec4 params; // intensity, 0, interior , boxproject - vec3 ambient; // ambient color - uint ambient_mode; - mat4 local_matrix; // up to here for spot and omni, rest is for directional - // notes: for ambientblend, use distance to edge to blend between already existing global environment -}; - layout(set = 0, binding = 6) buffer restrict readonly ReflectionProbeData { ReflectionData data[]; } reflections; -struct DirectionalLightData { - vec3 direction; - float energy; - vec3 color; - float size; - float specular; - uint mask; - float softshadow_angle; - float soft_shadow_scale; - bool blend_splits; - bool shadow_enabled; - float fade_from; - float fade_to; - vec4 shadow_bias; - vec4 shadow_normal_bias; - vec4 shadow_transmittance_bias; - vec4 shadow_transmittance_z_scale; - vec4 shadow_range_begin; - vec4 shadow_split_offsets; - mat4 shadow_matrix1; - mat4 shadow_matrix2; - mat4 shadow_matrix3; - mat4 shadow_matrix4; - vec4 shadow_color1; - vec4 shadow_color2; - vec4 shadow_color3; - vec4 shadow_color4; - vec2 uv_scale1; - vec2 uv_scale2; - vec2 uv_scale3; - vec2 uv_scale4; -}; - layout(set = 0, binding = 7, std140) uniform DirectionalLights { DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; } @@ -271,31 +181,9 @@ layout(set = 0, binding = 12, std140) restrict readonly buffer LightmapCaptures } lightmap_captures; -#define CLUSTER_COUNTER_SHIFT 20 -#define CLUSTER_POINTER_MASK ((1 << CLUSTER_COUNTER_SHIFT) - 1) -#define CLUSTER_COUNTER_MASK 0xfff - layout(set = 0, binding = 13) uniform texture2D decal_atlas; layout(set = 0, binding = 14) uniform texture2D decal_atlas_srgb; -struct DecalData { - mat4 xform; //to decal transform - vec3 inv_extents; - float albedo_mix; - vec4 albedo_rect; - vec4 normal_rect; - vec4 orm_rect; - vec4 emission_rect; - vec4 modulate; - float emission_energy; - uint mask; - float upper_fade; - float lower_fade; - mat3x4 normal_xform; - vec3 normal; - float normal_fade; -}; - layout(set = 0, binding = 15, std430) restrict readonly buffer Decals { DecalData data[]; } @@ -394,9 +282,7 @@ layout(set = 3, binding = 2) uniform texture2D normal_roughness_buffer; layout(set = 3, binding = 4) uniform texture2D ao_buffer; layout(set = 3, binding = 5) uniform texture2D ambient_buffer; layout(set = 3, binding = 6) uniform texture2D reflection_buffer; - layout(set = 3, binding = 7) uniform texture2DArray sdfgi_lightprobe_texture; - layout(set = 3, binding = 8) uniform texture3D sdfgi_occlusion_cascades; struct GIProbeData { @@ -412,7 +298,7 @@ struct GIProbeData { float anisotropy_strength; float ambient_occlusion; float ambient_occlusion_size; - uint pad2; + uint mipmaps; }; layout(set = 3, binding = 9, std140) uniform GIProbes { @@ -420,6 +306,8 @@ layout(set = 3, binding = 9, std140) uniform GIProbes { } gi_probes; +layout(set = 3, binding = 10) uniform texture3D volumetric_fog_texture; + #endif /* Set 4 Skeleton & Instancing (Multimesh) */ diff --git a/servers/rendering/rasterizer_rd/shaders/sdfgi_integrate.glsl b/servers/rendering/rasterizer_rd/shaders/sdfgi_integrate.glsl index e4779aafaf..1ec471d204 100644 --- a/servers/rendering/rasterizer_rd/shaders/sdfgi_integrate.glsl +++ b/servers/rendering/rasterizer_rd/shaders/sdfgi_integrate.glsl @@ -37,6 +37,8 @@ layout(rgba32i, set = 0, binding = 12) uniform restrict iimage2D lightprobe_aver layout(rgba32i, set = 0, binding = 13) uniform restrict iimage2D lightprobe_average_parent_texture; +layout(rgba16f, set = 0, binding = 14) uniform restrict writeonly image2DArray lightprobe_ambient_texture; + layout(set = 1, binding = 0) uniform textureCube sky_irradiance; layout(set = 1, binding = 1) uniform sampler linear_sampler_mipmaps; @@ -68,6 +70,9 @@ layout(push_constant, binding = 0, std430) uniform Params { vec3 sky_color; float y_mult; + + bool store_ambient_texture; + uint pad[3]; } params; @@ -319,6 +324,13 @@ void main() { imageStore(lightprobe_history_texture, prev_pos, ivalue); imageStore(lightprobe_average_texture, average_pos, average); + + if (params.store_ambient_texture && i == 0) { + ivec3 ambient_pos = ivec3(pos, int(params.cascade)); + vec4 ambient_light = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS); + ambient_light *= 0.88622; // SHL0 + imageStore(lightprobe_ambient_texture, ambient_pos, ambient_light); + } } #endif // MODE PROCESS diff --git a/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl b/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl index d7d19897e3..dd0ca5c506 100644 --- a/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl +++ b/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl @@ -338,7 +338,7 @@ void main() { continue; //was not initialized yet, ignore } - float q_dist = distance(posf, vec3(p.xyz)); + float q_dist = distance(posf, vec3(q.xyz)); if (p.w == 0 || q_dist < p_dist) { p = q; //just replace because current is unused p_dist = q_dist; diff --git a/servers/rendering/rasterizer_rd/shaders/shadow_reduce.glsl b/servers/rendering/rasterizer_rd/shaders/shadow_reduce.glsl new file mode 100644 index 0000000000..29443ae7db --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/shadow_reduce.glsl @@ -0,0 +1,105 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +#define BLOCK_SIZE 8 + +layout(local_size_x = BLOCK_SIZE, local_size_y = BLOCK_SIZE, local_size_z = 1) in; + +#ifdef MODE_REDUCE + +shared float tmp_data[BLOCK_SIZE * BLOCK_SIZE]; +const uint swizzle_table[BLOCK_SIZE] = uint[](0, 4, 2, 6, 1, 5, 3, 7); +const uint unswizzle_table[BLOCK_SIZE] = uint[](0, 0, 0, 1, 0, 2, 1, 3); + +#endif + +layout(r32f, set = 0, binding = 0) uniform restrict readonly image2D source_depth; +layout(r32f, set = 0, binding = 1) uniform restrict writeonly image2D dst_depth; + +layout(push_constant, binding = 1, std430) uniform Params { + ivec2 source_size; + ivec2 source_offset; + uint min_size; + uint gaussian_kernel_version; + ivec2 filter_dir; +} +params; + +void main() { +#ifdef MODE_REDUCE + + uvec2 pos = gl_LocalInvocationID.xy; + + ivec2 image_offset = params.source_offset; + ivec2 image_pos = image_offset + ivec2(gl_GlobalInvocationID.xy); + uint dst_t = swizzle_table[pos.y] * BLOCK_SIZE + swizzle_table[pos.x]; + tmp_data[dst_t] = imageLoad(source_depth, min(image_pos, params.source_size - ivec2(1))).r; + ivec2 image_size = params.source_size; + + uint t = pos.y * BLOCK_SIZE + pos.x; + + //neighbours + uint size = BLOCK_SIZE; + + do { + groupMemoryBarrier(); + barrier(); + + size >>= 1; + image_size >>= 1; + image_offset >>= 1; + + if (all(lessThan(pos, uvec2(size)))) { + uint nx = t + size; + uint ny = t + (BLOCK_SIZE * size); + uint nxy = ny + size; + + tmp_data[t] += tmp_data[nx]; + tmp_data[t] += tmp_data[ny]; + tmp_data[t] += tmp_data[nxy]; + tmp_data[t] /= 4.0; + } + + } while (size > params.min_size); + + if (all(lessThan(pos, uvec2(size)))) { + image_pos = ivec2(unswizzle_table[size + pos.x], unswizzle_table[size + pos.y]); + image_pos += image_offset + ivec2(gl_WorkGroupID.xy) * int(size); + + image_size = max(ivec2(1), image_size); //in case image size became 0 + + if (all(lessThan(image_pos, uvec2(image_size)))) { + imageStore(dst_depth, image_pos, vec4(tmp_data[t])); + } + } +#endif + +#ifdef MODE_FILTER + + ivec2 image_pos = params.source_offset + ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(image_pos, params.source_size))) { + return; + } + + ivec2 clamp_min = ivec2(params.source_offset); + ivec2 clamp_max = ivec2(params.source_size) - 1; + + //gaussian kernel, size 9, sigma 4 + const int kernel_size = 9; + const float gaussian_kernel[kernel_size * 3] = float[]( + 0.000229, 0.005977, 0.060598, 0.241732, 0.382928, 0.241732, 0.060598, 0.005977, 0.000229, + 0.028532, 0.067234, 0.124009, 0.179044, 0.20236, 0.179044, 0.124009, 0.067234, 0.028532, + 0.081812, 0.101701, 0.118804, 0.130417, 0.134535, 0.130417, 0.118804, 0.101701, 0.081812); + float accum = 0.0; + for (int i = 0; i < kernel_size; i++) { + ivec2 ofs = clamp(image_pos + params.filter_dir * (i - kernel_size / 2), clamp_min, clamp_max); + accum += imageLoad(source_depth, ofs).r * gaussian_kernel[params.gaussian_kernel_version + i]; + } + + imageStore(dst_depth, image_pos, vec4(accum)); + +#endif +} diff --git a/servers/rendering/rasterizer_rd/shaders/sky.glsl b/servers/rendering/rasterizer_rd/shaders/sky.glsl index 9c59be6841..7b6de6a555 100644 --- a/servers/rendering/rasterizer_rd/shaders/sky.glsl +++ b/servers/rendering/rasterizer_rd/shaders/sky.glsl @@ -58,6 +58,35 @@ layout(set = 0, binding = 1, std430) restrict readonly buffer GlobalVariableData } global_variables; +layout(set = 0, binding = 2, std140) uniform SceneData { + bool volumetric_fog_enabled; + float volumetric_fog_inv_length; + float volumetric_fog_detail_spread; + uint volumetric_fog_pad; + + vec3 fog_light_color; + float fog_sun_scatter; + + bool fog_enabled; + float fog_density; + + float z_far; + uint directional_light_count; +} +scene_data; + +struct DirectionalLightData { + vec4 direction_energy; + vec4 color_size; + bool enabled; +}; + +layout(set = 0, binding = 3, std140) uniform DirectionalLights { + DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; +} + +directional_lights; + #ifdef USE_MATERIAL_UNIFORMS layout(set = 1, binding = 0, std140) uniform MaterialUniforms{ /* clang-format off */ @@ -77,6 +106,8 @@ layout(set = 2, binding = 1) uniform texture2D half_res; layout(set = 2, binding = 2) uniform texture2D quarter_res; #endif +layout(set = 3, binding = 0) uniform texture3D volumetric_fog_texture; + #ifdef USE_CUBEMAP_PASS #define AT_CUBEMAP_PASS true #else @@ -95,18 +126,6 @@ layout(set = 2, binding = 2) uniform texture2D quarter_res; #define AT_QUARTER_RES_PASS false #endif -struct DirectionalLightData { - vec4 direction_energy; - vec4 color_size; - bool enabled; -}; - -layout(set = 3, binding = 0, std140) uniform DirectionalLights { - DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; -} - -directional_lights; - /* clang-format off */ FRAGMENT_SHADER_GLOBALS @@ -115,6 +134,30 @@ FRAGMENT_SHADER_GLOBALS layout(location = 0) out vec4 frag_color; +vec4 volumetric_fog_process(vec2 screen_uv) { + vec3 fog_pos = vec3(screen_uv, 1.0); + + return texture(sampler3D(volumetric_fog_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), fog_pos); +} + +vec4 fog_process(vec3 view) { + vec3 fog_color = scene_data.fog_light_color; + + if (scene_data.fog_sun_scatter > 0.001) { + vec4 sun_scatter = vec4(0.0); + float sun_total = 0.0; + for (uint i = 0; i < scene_data.directional_light_count; i++) { + vec3 light_color = directional_lights.data[i].color_size.xyz * directional_lights.data[i].direction_energy.w; + float light_amount = pow(max(dot(view, directional_lights.data[i].direction_energy.xyz), 0.0), 8.0); + fog_color += light_color * light_amount * scene_data.fog_sun_scatter; + } + } + + float fog_amount = clamp(1.0 - exp(-scene_data.z_far * scene_data.fog_density), 0.0, 1.0); + + return vec4(fog_color, fog_amount); +} + void main() { vec3 cube_normal; cube_normal.z = -1.0; @@ -178,6 +221,20 @@ FRAGMENT_SHADER_CODE frag_color.rgb = color * params.position_multiplier.w; frag_color.a = alpha; +#if !defined(DISABLE_FOG) && !defined(USE_CUBEMAP_PASS) + + if (scene_data.volumetric_fog_enabled) { + vec4 fog = volumetric_fog_process(uv); + frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); + } + + if (scene_data.fog_enabled) { + vec4 fog = fog_process(cube_normal); + frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); + } + +#endif // DISABLE_FOG + // Blending is disabled for Sky, so alpha doesn't blend // alpha is used for subsurface scattering so make sure it doesn't get applied to Sky if (!AT_CUBEMAP_PASS && !AT_HALF_RES_PASS && !AT_QUARTER_RES_PASS) { diff --git a/servers/rendering/rasterizer_rd/shaders/sort.glsl b/servers/rendering/rasterizer_rd/shaders/sort.glsl new file mode 100644 index 0000000000..e5ebb9c64b --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/sort.glsl @@ -0,0 +1,203 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +// Original version here: +// https://github.com/GPUOpen-LibrariesAndSDKs/GPUParticles11/blob/master/gpuparticles11/src/Shaders + +// +// Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +#define SORT_SIZE 512 +#define NUM_THREADS (SORT_SIZE / 2) +#define INVERSION (16 * 2 + 8 * 3) +#define ITERATIONS 1 + +layout(local_size_x = NUM_THREADS, local_size_y = 1, local_size_z = 1) in; + +#ifndef MODE_SORT_STEP + +shared vec2 g_LDS[SORT_SIZE]; + +#endif + +layout(set = 1, binding = 0, std430) restrict buffer SortBuffer { + vec2 data[]; +} +sort_buffer; + +layout(push_constant, binding = 0, std430) uniform Params { + uint total_elements; + uint pad[3]; + ivec4 job_params; +} +params; + +void main() { +#ifdef MODE_SORT_BLOCK + + uvec3 Gid = gl_WorkGroupID; + uvec3 DTid = gl_GlobalInvocationID; + uvec3 GTid = gl_LocalInvocationID; + uint GI = gl_LocalInvocationIndex; + + int GlobalBaseIndex = int((Gid.x * SORT_SIZE) + GTid.x); + int LocalBaseIndex = int(GI); + int numElementsInThreadGroup = int(min(SORT_SIZE, params.total_elements - (Gid.x * SORT_SIZE))); + + // Load shared data + + int i; + for (i = 0; i < 2 * ITERATIONS; ++i) { + if (GI + i * NUM_THREADS < numElementsInThreadGroup) + g_LDS[LocalBaseIndex + i * NUM_THREADS] = sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS]; + } + + groupMemoryBarrier(); + barrier(); + + // Bitonic sort + for (int nMergeSize = 2; nMergeSize <= SORT_SIZE; nMergeSize = nMergeSize * 2) { + for (int nMergeSubSize = nMergeSize >> 1; nMergeSubSize > 0; nMergeSubSize = nMergeSubSize >> 1) { + for (i = 0; i < ITERATIONS; ++i) { + int tmp_index = int(GI + NUM_THREADS * i); + int index_low = tmp_index & (nMergeSubSize - 1); + int index_high = 2 * (tmp_index - index_low); + int index = index_high + index_low; + + int nSwapElem = nMergeSubSize == nMergeSize >> 1 ? index_high + (2 * nMergeSubSize - 1) - index_low : index_high + nMergeSubSize + index_low; + if (nSwapElem < numElementsInThreadGroup) { + vec2 a = g_LDS[index]; + vec2 b = g_LDS[nSwapElem]; + + if (a.x > b.x) { + g_LDS[index] = b; + g_LDS[nSwapElem] = a; + } + } + groupMemoryBarrier(); + barrier(); + } + } + } + + // Store shared data + for (i = 0; i < 2 * ITERATIONS; ++i) { + if (GI + i * NUM_THREADS < numElementsInThreadGroup) { + sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS] = g_LDS[LocalBaseIndex + i * NUM_THREADS]; + } + } + +#endif + +#ifdef MODE_SORT_STEP + + uvec3 Gid = gl_WorkGroupID; + uvec3 GTid = gl_LocalInvocationID; + + ivec4 tgp; + + tgp.x = int(Gid.x) * 256; + tgp.y = 0; + tgp.z = int(params.total_elements); + tgp.w = min(512, max(0, tgp.z - int(Gid.x) * 512)); + + uint localID = int(tgp.x) + GTid.x; // calculate threadID within this sortable-array + + uint index_low = localID & (params.job_params.x - 1); + uint index_high = 2 * (localID - index_low); + + uint index = tgp.y + index_high + index_low; + uint nSwapElem = tgp.y + index_high + params.job_params.y + params.job_params.z * index_low; + + if (nSwapElem < tgp.y + tgp.z) { + vec2 a = sort_buffer.data[index]; + vec2 b = sort_buffer.data[nSwapElem]; + + if (a.x > b.x) { + sort_buffer.data[index] = b; + sort_buffer.data[nSwapElem] = a; + } + } + +#endif + +#ifdef MODE_SORT_INNER + + uvec3 Gid = gl_WorkGroupID; + uvec3 DTid = gl_GlobalInvocationID; + uvec3 GTid = gl_LocalInvocationID; + uint GI = gl_LocalInvocationIndex; + + ivec4 tgp; + + tgp.x = int(Gid.x * 256); + tgp.y = 0; + tgp.z = int(params.total_elements.x); + tgp.w = int(min(512, max(0, params.total_elements - Gid.x * 512))); + + int GlobalBaseIndex = int(tgp.y + tgp.x * 2 + GTid.x); + int LocalBaseIndex = int(GI); + int i; + + // Load shared data + for (i = 0; i < 2; ++i) { + if (GI + i * NUM_THREADS < tgp.w) + g_LDS[LocalBaseIndex + i * NUM_THREADS] = sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS]; + } + + groupMemoryBarrier(); + barrier(); + + // sort threadgroup shared memory + for (int nMergeSubSize = SORT_SIZE >> 1; nMergeSubSize > 0; nMergeSubSize = nMergeSubSize >> 1) { + int tmp_index = int(GI); + int index_low = tmp_index & (nMergeSubSize - 1); + int index_high = 2 * (tmp_index - index_low); + int index = index_high + index_low; + + int nSwapElem = index_high + nMergeSubSize + index_low; + + if (nSwapElem < tgp.w) { + vec2 a = g_LDS[index]; + vec2 b = g_LDS[nSwapElem]; + + if (a.x > b.x) { + g_LDS[index] = b; + g_LDS[nSwapElem] = a; + } + } + groupMemoryBarrier(); + barrier(); + } + + // Store shared data + for (i = 0; i < 2; ++i) { + if (GI + i * NUM_THREADS < tgp.w) { + sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS] = g_LDS[LocalBaseIndex + i * NUM_THREADS]; + } + } + +#endif +} diff --git a/servers/rendering/rasterizer_rd/shaders/volumetric_fog.glsl b/servers/rendering/rasterizer_rd/shaders/volumetric_fog.glsl new file mode 100644 index 0000000000..cb19fb0b69 --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/volumetric_fog.glsl @@ -0,0 +1,530 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +#if defined(MODE_FOG) || defined(MODE_FILTER) + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#endif + +#if defined(MODE_DENSITY) + +layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; + +#endif + +#include "cluster_data_inc.glsl" + +#define M_PI 3.14159265359 + +layout(set = 0, binding = 1) uniform texture2D shadow_atlas; +layout(set = 0, binding = 2) uniform texture2D directional_shadow_atlas; + +layout(set = 0, binding = 3, std430) restrict readonly buffer Lights { + LightData data[]; +} +lights; + +layout(set = 0, binding = 4, std140) uniform DirectionalLights { + DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; +} +directional_lights; + +layout(set = 0, binding = 5) uniform utexture3D cluster_texture; + +layout(set = 0, binding = 6, std430) restrict readonly buffer ClusterData { + uint indices[]; +} +cluster_data; + +layout(set = 0, binding = 7) uniform sampler linear_sampler; + +#ifdef MODE_DENSITY +layout(rgba16f, set = 0, binding = 8) uniform restrict writeonly image3D density_map; +layout(rgba16f, set = 0, binding = 9) uniform restrict readonly image3D fog_map; //unused +#endif + +#ifdef MODE_FOG +layout(rgba16f, set = 0, binding = 8) uniform restrict readonly image3D density_map; +layout(rgba16f, set = 0, binding = 9) uniform restrict writeonly image3D fog_map; +#endif + +#ifdef MODE_FILTER +layout(rgba16f, set = 0, binding = 8) uniform restrict readonly image3D source_map; +layout(rgba16f, set = 0, binding = 9) uniform restrict writeonly image3D dest_map; +#endif + +layout(set = 0, binding = 10) uniform sampler shadow_sampler; + +#define MAX_GI_PROBES 8 + +struct GIProbeData { + mat4 xform; + vec3 bounds; + float dynamic_range; + + float bias; + float normal_bias; + bool blend_ambient; + uint texture_slot; + + float anisotropy_strength; + float ambient_occlusion; + float ambient_occlusion_size; + uint mipmaps; +}; + +layout(set = 0, binding = 11, std140) uniform GIProbes { + GIProbeData data[MAX_GI_PROBES]; +} +gi_probes; + +layout(set = 0, binding = 12) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; + +layout(set = 0, binding = 13) uniform sampler linear_sampler_with_mipmaps; + +#ifdef ENABLE_SDFGI + +// SDFGI Integration on set 1 +#define SDFGI_MAX_CASCADES 8 + +struct SDFGIProbeCascadeData { + vec3 position; + float to_probe; + ivec3 probe_world_offset; + float to_cell; // 1/bounds * grid_size +}; + +layout(set = 1, binding = 0, std140) uniform SDFGI { + vec3 grid_size; + uint max_cascades; + + bool use_occlusion; + int probe_axis_size; + float probe_to_uvw; + float normal_bias; + + vec3 lightprobe_tex_pixel_size; + float energy; + + vec3 lightprobe_uv_offset; + float y_mult; + + vec3 occlusion_clamp; + uint pad3; + + vec3 occlusion_renormalize; + uint pad4; + + vec3 cascade_probe_size; + uint pad5; + + SDFGIProbeCascadeData cascades[SDFGI_MAX_CASCADES]; +} +sdfgi; + +layout(set = 1, binding = 1) uniform texture2DArray sdfgi_ambient_texture; + +layout(set = 1, binding = 2) uniform texture3D sdfgi_occlusion_texture; + +#endif //SDFGI + +layout(push_constant, binding = 0, std430) uniform Params { + vec2 fog_frustum_size_begin; + vec2 fog_frustum_size_end; + + float fog_frustum_end; + float z_near; + float z_far; + int filter_axis; + + ivec3 fog_volume_size; + uint directional_light_count; + + vec3 light_color; + float base_density; + + float detail_spread; + float gi_inject; + uint max_gi_probes; + uint pad; + + mat3x4 cam_rotation; +} +params; + +float get_depth_at_pos(float cell_depth_size, int z) { + float d = float(z) * cell_depth_size + cell_depth_size * 0.5; //center of voxels + d = pow(d, params.detail_spread); + return params.fog_frustum_end * d; +} + +vec3 hash3f(uvec3 x) { + x = ((x >> 16) ^ x) * 0x45d9f3b; + x = ((x >> 16) ^ x) * 0x45d9f3b; + x = (x >> 16) ^ x; + return vec3(x & 0xFFFFF) / vec3(float(0xFFFFF)); +} + +void main() { + vec3 fog_cell_size = 1.0 / vec3(params.fog_volume_size); + +#ifdef MODE_DENSITY + + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + if (any(greaterThanEqual(pos, params.fog_volume_size))) { + return; //do not compute + } + + vec3 posf = vec3(pos); + + //posf += mix(vec3(0.0),vec3(1.0),0.3) * hash3f(uvec3(pos)) * 2.0 - 1.0; + + vec3 fog_unit_pos = posf * fog_cell_size + fog_cell_size * 0.5; //center of voxels + fog_unit_pos.z = pow(fog_unit_pos.z, params.detail_spread); + + vec3 view_pos; + view_pos.xy = (fog_unit_pos.xy * 2.0 - 1.0) * mix(params.fog_frustum_size_begin, params.fog_frustum_size_end, vec2(fog_unit_pos.z)); + view_pos.z = -params.fog_frustum_end * fog_unit_pos.z; + view_pos.y = -view_pos.y; + + vec3 total_light = params.light_color; + + float total_density = params.base_density; + float cell_depth_size = abs(view_pos.z - get_depth_at_pos(fog_cell_size.z, pos.z + 1)); + //compute directional lights + + for (uint i = 0; i < params.directional_light_count; i++) { + vec3 shadow_attenuation = vec3(1.0); + + if (directional_lights.data[i].shadow_enabled) { + float depth_z = -view_pos.z; + + vec4 pssm_coord; + vec3 shadow_color = directional_lights.data[i].shadow_color1.rgb; + vec3 light_dir = directional_lights.data[i].direction; + vec4 v = vec4(view_pos, 1.0); + float z_range; + + if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { + pssm_coord = (directional_lights.data[i].shadow_matrix1 * v); + pssm_coord /= pssm_coord.w; + z_range = directional_lights.data[i].shadow_z_range.x; + + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { + pssm_coord = (directional_lights.data[i].shadow_matrix2 * v); + pssm_coord /= pssm_coord.w; + z_range = directional_lights.data[i].shadow_z_range.y; + + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { + pssm_coord = (directional_lights.data[i].shadow_matrix3 * v); + pssm_coord /= pssm_coord.w; + z_range = directional_lights.data[i].shadow_z_range.z; + + } else { + pssm_coord = (directional_lights.data[i].shadow_matrix4 * v); + pssm_coord /= pssm_coord.w; + z_range = directional_lights.data[i].shadow_z_range.w; + } + + float depth = texture(sampler2D(directional_shadow_atlas, linear_sampler), pssm_coord.xy).r; + float shadow = exp(min(0.0, (depth - pssm_coord.z)) * z_range * directional_lights.data[i].shadow_volumetric_fog_fade); + + /* + //float shadow = textureProj(sampler2DShadow(directional_shadow_atlas,shadow_sampler),pssm_coord); + float shadow = 0.0; + for(float xi=-1;xi<=1;xi++) { + for(float yi=-1;yi<=1;yi++) { + vec2 ofs = vec2(xi,yi) * 1.5 * params.directional_shadow_pixel_size; + shadow += textureProj(sampler2DShadow(directional_shadow_atlas,shadow_sampler),pssm_coord + vec4(ofs,0.0,0.0)); + } + + } + + shadow /= 3.0 * 3.0; + +*/ + shadow = mix(shadow, 1.0, smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, view_pos.z)); //done with negative values for performance + + shadow_attenuation = mix(shadow_color, vec3(1.0), shadow); + } + + total_light += shadow_attenuation * directional_lights.data[i].color * directional_lights.data[i].energy / M_PI; + } + + //compute lights from cluster + + vec3 cluster_pos; + cluster_pos.xy = fog_unit_pos.xy; + cluster_pos.z = clamp((abs(view_pos.z) - params.z_near) / (params.z_far - params.z_near), 0.0, 1.0); + + uvec4 cluster_cell = texture(usampler3D(cluster_texture, linear_sampler), cluster_pos); + + uint omni_light_count = cluster_cell.x >> CLUSTER_COUNTER_SHIFT; + uint omni_light_pointer = cluster_cell.x & CLUSTER_POINTER_MASK; + + for (uint i = 0; i < omni_light_count; i++) { + uint light_index = cluster_data.indices[omni_light_pointer + i]; + + vec3 light_pos = lights.data[i].position; + float d = distance(lights.data[i].position, view_pos) * lights.data[i].inv_radius; + vec3 shadow_attenuation = vec3(1.0); + + if (d < 1.0) { + vec2 attenuation_energy = unpackHalf2x16(lights.data[i].attenuation_energy); + vec4 color_specular = unpackUnorm4x8(lights.data[i].color_specular); + + float attenuation = pow(max(1.0 - d, 0.0), attenuation_energy.x); + + vec3 light = attenuation_energy.y * color_specular.rgb / M_PI; + + vec4 shadow_color_enabled = unpackUnorm4x8(lights.data[i].shadow_color_enabled); + + if (shadow_color_enabled.a > 0.5) { + //has shadow + vec4 v = vec4(view_pos, 1.0); + + vec4 splane = (lights.data[i].shadow_matrix * v); + float shadow_len = length(splane.xyz); //need to remember shadow len from here + + splane.xyz = normalize(splane.xyz); + vec4 clamp_rect = lights.data[i].atlas_rect; + + if (splane.z >= 0.0) { + splane.z += 1.0; + + clamp_rect.y += clamp_rect.w; + + } else { + splane.z = 1.0 - splane.z; + } + + splane.xy /= splane.z; + + splane.xy = splane.xy * 0.5 + 0.5; + splane.z = shadow_len * lights.data[i].inv_radius; + splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; + splane.w = 1.0; //needed? i think it should be 1 already + + float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; + float shadow = exp(min(0.0, (depth - splane.z)) / lights.data[i].inv_radius * lights.data[i].shadow_volumetric_fog_fade); + + shadow_attenuation = mix(shadow_color_enabled.rgb, vec3(1.0), shadow); + } + total_light += light * attenuation * shadow_attenuation; + } + } + + uint spot_light_count = cluster_cell.y >> CLUSTER_COUNTER_SHIFT; + uint spot_light_pointer = cluster_cell.y & CLUSTER_POINTER_MASK; + + for (uint i = 0; i < spot_light_count; i++) { + uint light_index = cluster_data.indices[spot_light_pointer + i]; + + vec3 light_pos = lights.data[i].position; + vec3 light_rel_vec = lights.data[i].position - view_pos; + float d = length(light_rel_vec) * lights.data[i].inv_radius; + vec3 shadow_attenuation = vec3(1.0); + + if (d < 1.0) { + vec2 attenuation_energy = unpackHalf2x16(lights.data[i].attenuation_energy); + vec4 color_specular = unpackUnorm4x8(lights.data[i].color_specular); + + float attenuation = pow(max(1.0 - d, 0.0), attenuation_energy.x); + + vec3 spot_dir = lights.data[i].direction; + vec2 spot_att_angle = unpackHalf2x16(lights.data[i].cone_attenuation_angle); + float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_att_angle.y); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_att_angle.y)); + attenuation *= 1.0 - pow(spot_rim, spot_att_angle.x); + + vec3 light = attenuation_energy.y * color_specular.rgb / M_PI; + + vec4 shadow_color_enabled = unpackUnorm4x8(lights.data[i].shadow_color_enabled); + + if (shadow_color_enabled.a > 0.5) { + //has shadow + vec4 v = vec4(view_pos, 1.0); + + vec4 splane = (lights.data[i].shadow_matrix * v); + splane /= splane.w; + + float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; + float shadow = exp(min(0.0, (depth - splane.z)) / lights.data[i].inv_radius * lights.data[i].shadow_volumetric_fog_fade); + + shadow_attenuation = mix(shadow_color_enabled.rgb, vec3(1.0), shadow); + } + + total_light += light * attenuation * shadow_attenuation; + } + } + + vec3 world_pos = mat3(params.cam_rotation) * view_pos; + + for (uint i = 0; i < params.max_gi_probes; i++) { + vec3 position = (gi_probes.data[i].xform * vec4(world_pos, 1.0)).xyz; + + //this causes corrupted pixels, i have no idea why.. + if (all(bvec2(all(greaterThanEqual(position, vec3(0.0))), all(lessThan(position, gi_probes.data[i].bounds))))) { + position /= gi_probes.data[i].bounds; + + vec4 light = vec4(0.0); + for (uint j = 0; j < gi_probes.data[i].mipmaps; j++) { + vec4 slight = textureLod(sampler3D(gi_probe_textures[i], linear_sampler_with_mipmaps), position, float(j)); + float a = (1.0 - light.a); + light += a * slight; + } + + light.rgb *= gi_probes.data[i].dynamic_range * params.gi_inject; + + total_light += light.rgb; + } + } + + //sdfgi +#ifdef ENABLE_SDFGI + + { + float blend = -1.0; + vec3 ambient_total = vec3(0.0); + + for (uint i = 0; i < sdfgi.max_cascades; i++) { + vec3 cascade_pos = (world_pos - sdfgi.cascades[i].position) * sdfgi.cascades[i].to_probe; + + if (any(lessThan(cascade_pos, vec3(0.0))) || any(greaterThanEqual(cascade_pos, sdfgi.cascade_probe_size))) { + continue; //skip cascade + } + + vec3 base_pos = floor(cascade_pos); + ivec3 probe_base_pos = ivec3(base_pos); + + vec4 ambient_accum = vec4(0.0); + + ivec3 tex_pos = ivec3(probe_base_pos.xy, int(i)); + tex_pos.x += probe_base_pos.z * sdfgi.probe_axis_size; + + for (uint j = 0; j < 8; j++) { + ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); + ivec3 probe_posi = probe_base_pos; + probe_posi += offset; + + // Compute weight + + vec3 probe_pos = vec3(probe_posi); + vec3 probe_to_pos = cascade_pos - probe_pos; + + vec3 trilinear = vec3(1.0) - abs(probe_to_pos); + float weight = trilinear.x * trilinear.y * trilinear.z; + + // Compute lightprobe occlusion + + if (sdfgi.use_occlusion) { + ivec3 occ_indexv = abs((sdfgi.cascades[i].probe_world_offset + probe_posi) & ivec3(1, 1, 1)) * ivec3(1, 2, 4); + vec4 occ_mask = mix(vec4(0.0), vec4(1.0), equal(ivec4(occ_indexv.x | occ_indexv.y), ivec4(0, 1, 2, 3))); + + vec3 occ_pos = clamp(cascade_pos, probe_pos - sdfgi.occlusion_clamp, probe_pos + sdfgi.occlusion_clamp) * sdfgi.probe_to_uvw; + occ_pos.z += float(i); + if (occ_indexv.z != 0) { //z bit is on, means index is >=4, so make it switch to the other half of textures + occ_pos.x += 1.0; + } + + occ_pos *= sdfgi.occlusion_renormalize; + float occlusion = dot(textureLod(sampler3D(sdfgi_occlusion_texture, linear_sampler), occ_pos, 0.0), occ_mask); + + weight *= max(occlusion, 0.01); + } + + // Compute ambient texture position + + ivec3 uvw = tex_pos; + uvw.xy += offset.xy; + uvw.x += offset.z * sdfgi.probe_axis_size; + + vec3 ambient = texelFetch(sampler2DArray(sdfgi_ambient_texture, linear_sampler), uvw, 0).rgb; + + ambient_accum.rgb += ambient * weight; + ambient_accum.a += weight; + } + + if (ambient_accum.a > 0) { + ambient_accum.rgb /= ambient_accum.a; + } + ambient_total = ambient_accum.rgb; + break; + } + + total_light += ambient_total * params.gi_inject; + } + +#endif + + imageStore(density_map, pos, vec4(total_light, total_density)); +#endif + +#ifdef MODE_FOG + + ivec3 pos = ivec3(gl_GlobalInvocationID.xy, 0); + + if (any(greaterThanEqual(pos, params.fog_volume_size))) { + return; //do not compute + } + + vec4 fog_accum = vec4(0.0); + float prev_z = 0.0; + + float t = 1.0; + + for (int i = 0; i < params.fog_volume_size.z; i++) { + //compute fog position + ivec3 fog_pos = pos + ivec3(0, 0, i); + //get fog value + vec4 fog = imageLoad(density_map, fog_pos); + + //get depth at cell pos + float z = get_depth_at_pos(fog_cell_size.z, i); + //get distance from previos pos + float d = abs(prev_z - z); + //compute exinction based on beer's + float extinction = t * exp(-d * fog.a); + //compute alpha based on different of extinctions + float alpha = t - extinction; + //update extinction + t = extinction; + + fog_accum += vec4(fog.rgb * alpha, alpha); + prev_z = z; + + vec4 fog_value; + + if (fog_accum.a > 0.0) { + fog_value = vec4(fog_accum.rgb / fog_accum.a, 1.0 - t); + } else { + fog_value = vec4(0.0); + } + + imageStore(fog_map, fog_pos, fog_value); + } + +#endif + +#ifdef MODE_FILTER + + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + + const float gauss[7] = float[](0.071303, 0.131514, 0.189879, 0.214607, 0.189879, 0.131514, 0.071303); + + const ivec3 filter_dir[3] = ivec3[](ivec3(1, 0, 0), ivec3(0, 1, 0), ivec3(0, 0, 1)); + ivec3 offset = filter_dir[params.filter_axis]; + + vec4 accum = vec4(0.0); + for (int i = -3; i <= 3; i++) { + accum += imageLoad(source_map, clamp(pos + offset * i, ivec3(0), params.fog_volume_size - ivec3(1))) * gauss[i + 3]; + } + + imageStore(dest_map, pos, accum); + +#endif +} |