diff options
Diffstat (limited to 'servers/rendering/renderer_rd/shaders')
4 files changed, 250 insertions, 0 deletions
diff --git a/servers/rendering/renderer_rd/shaders/fsr_upscale.glsl b/servers/rendering/renderer_rd/shaders/fsr_upscale.glsl new file mode 100644 index 0000000000..4e2ba84033 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/fsr_upscale.glsl @@ -0,0 +1,173 @@ +/*************************************************************************/ +/* fsr_upscale.glsl */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#[compute] + +#version 450 + +#VERSION_DEFINES + +#define A_GPU +#define A_GLSL + +#ifdef MODE_FSR_UPSCALE_NORMAL + +#define A_HALF + +#endif + +#include "thirdparty/amd-fsr/ffx_a.h" + +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly image2D fsr_image; +layout(set = 0, binding = 0) uniform sampler2D source_image; + +#define FSR_UPSCALE_PASS_TYPE_EASU 0 +#define FSR_UPSCALE_PASS_TYPE_RCAS 1 + +layout(push_constant, binding = 1, std430) uniform Params { + float resolution_width; + float resolution_height; + float upscaled_width; + float upscaled_height; + float sharpness; + int pass; +} +params; + +AU4 Const0, Const1, Const2, Const3; + +#ifdef MODE_FSR_UPSCALE_FALLBACK + +#define FSR_EASU_F +AF4 FsrEasuRF(AF2 p) { + AF4 res = textureGather(source_image, p, 0); + return res; +} +AF4 FsrEasuGF(AF2 p) { + AF4 res = textureGather(source_image, p, 1); + return res; +} +AF4 FsrEasuBF(AF2 p) { + AF4 res = textureGather(source_image, p, 2); + return res; +} + +#define FSR_RCAS_F +AF4 FsrRcasLoadF(ASU2 p) { + return AF4(texelFetch(source_image, ASU2(p), 0)); +} +void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {} + +#else + +#define FSR_EASU_H +AH4 FsrEasuRH(AF2 p) { + AH4 res = AH4(textureGather(source_image, p, 0)); + return res; +} +AH4 FsrEasuGH(AF2 p) { + AH4 res = AH4(textureGather(source_image, p, 1)); + return res; +} +AH4 FsrEasuBH(AF2 p) { + AH4 res = AH4(textureGather(source_image, p, 2)); + return res; +} + +#define FSR_RCAS_H +AH4 FsrRcasLoadH(ASW2 p) { + return AH4(texelFetch(source_image, ASU2(p), 0)); +} +void FsrRcasInputH(inout AH1 r, inout AH1 g, inout AH1 b) {} + +#endif + +#include "thirdparty/amd-fsr/ffx_fsr1.h" + +void fsr_easu_pass(AU2 pos) { +#ifdef MODE_FSR_UPSCALE_NORMAL + + AH3 Gamma2Color = AH3(0, 0, 0); + FsrEasuH(Gamma2Color, pos, Const0, Const1, Const2, Const3); + imageStore(fsr_image, ASU2(pos), AH4(Gamma2Color, 1)); + +#else + + AF3 Gamma2Color = AF3(0, 0, 0); + FsrEasuF(Gamma2Color, pos, Const0, Const1, Const2, Const3); + imageStore(fsr_image, ASU2(pos), AF4(Gamma2Color, 1)); + +#endif +} + +void fsr_rcas_pass(AU2 pos) { +#ifdef MODE_FSR_UPSCALE_NORMAL + + AH3 Gamma2Color = AH3(0, 0, 0); + FsrRcasH(Gamma2Color.r, Gamma2Color.g, Gamma2Color.b, pos, Const0); + imageStore(fsr_image, ASU2(pos), AH4(Gamma2Color, 1)); + +#else + + AF3 Gamma2Color = AF3(0, 0, 0); + FsrRcasF(Gamma2Color.r, Gamma2Color.g, Gamma2Color.b, pos, Const0); + imageStore(fsr_image, ASU2(pos), AF4(Gamma2Color, 1)); + +#endif +} + +void fsr_pass(AU2 pos) { + if (params.pass == FSR_UPSCALE_PASS_TYPE_EASU) { + fsr_easu_pass(pos); + } else if (params.pass == FSR_UPSCALE_PASS_TYPE_RCAS) { + fsr_rcas_pass(pos); + } +} + +void main() { + // Clang does not like unused functions. If ffx_a.h is included in the binary, clang will throw a fit and not compile so we must configure FSR in this shader + if (params.pass == FSR_UPSCALE_PASS_TYPE_EASU) { + FsrEasuCon(Const0, Const1, Const2, Const3, params.resolution_width, params.resolution_height, params.resolution_width, params.resolution_height, params.upscaled_width, params.upscaled_height); + } else if (params.pass == FSR_UPSCALE_PASS_TYPE_RCAS) { + FsrRcasCon(Const0, params.sharpness); + } + + AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); + + fsr_pass(gxy); + gxy.x += 8u; + fsr_pass(gxy); + gxy.y += 8u; + fsr_pass(gxy); + gxy.x -= 8u; + fsr_pass(gxy); +} diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl index 8e33610ae2..e4628b2d5a 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl @@ -1757,7 +1757,11 @@ void main() { } } +#ifdef MOLTENVK_USED + imageStore(geom_facing_grid, grid_pos, uvec4(imageLoad(geom_facing_grid, grid_pos).r | facing_bits)); //store facing bits +#else imageAtomicOr(geom_facing_grid, grid_pos, facing_bits); //store facing bits +#endif if (length(emission) > 0.001) { float lumas[6]; diff --git a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl index afc5d68776..181d3b272f 100644 --- a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl +++ b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl @@ -47,7 +47,13 @@ layout(push_constant, binding = 0, std430) uniform Params { } params; +#ifdef MOLTENVK_USED +layout(set = 1, binding = 1) volatile buffer emissive_only_map_buffer { + uint emissive_only_map[]; +}; +#else layout(r32ui, set = 1, binding = 1) uniform volatile uimage3D emissive_only_map; +#endif layout(set = 1, binding = 2, std140) uniform SceneParams { vec2 fog_frustum_size_begin; @@ -71,8 +77,17 @@ layout(set = 1, binding = 2, std140) uniform SceneParams { } scene_params; +#ifdef MOLTENVK_USED +layout(set = 1, binding = 3) volatile buffer density_only_map_buffer { + uint density_only_map[]; +}; +layout(set = 1, binding = 4) volatile buffer light_only_map_buffer { + uint light_only_map[]; +}; +#else layout(r32ui, set = 1, binding = 3) uniform volatile uimage3D density_only_map; layout(r32ui, set = 1, binding = 4) uniform volatile uimage3D light_only_map; +#endif #ifdef MATERIAL_UNIFORMS_USED layout(set = 2, binding = 0, std140) uniform MaterialUniforms{ @@ -115,6 +130,9 @@ void main() { if (any(greaterThanEqual(pos, scene_params.fog_volume_size))) { return; //do not compute } +#ifdef MOLTENVK_USED + uint lpos = pos.z * scene_params.fog_volume_size.x * scene_params.fog_volume_size.y + pos.y * scene_params.fog_volume_size.x + pos.x; +#endif vec3 posf = vec3(pos); @@ -197,7 +215,11 @@ void main() { density *= cull_mask; if (abs(density) > 0.001) { int final_density = int(density * DENSITY_SCALE); +#ifdef MOLTENVK_USED + atomicAdd(density_only_map[lpos], uint(final_density)); +#else imageAtomicAdd(density_only_map, pos, uint(final_density)); +#endif #ifdef EMISSION_USED { @@ -207,7 +229,11 @@ void main() { uvec3 emission_u = uvec3(emission.r * 511.0, emission.g * 511.0, emission.b * 255.0); // R and G have 11 bits each and B has 10. Then pack them into a 32 bit uint uint final_emission = emission_u.r << 21 | emission_u.g << 10 | emission_u.b; +#ifdef MOLTENVK_USED + uint prev_emission = atomicAdd(emissive_only_map[lpos], final_emission); +#else uint prev_emission = imageAtomicAdd(emissive_only_map, pos, final_emission); +#endif // Adding can lead to colors overflowing, so validate uvec3 prev_emission_u = uvec3(prev_emission >> 21, (prev_emission << 11) >> 21, prev_emission % 1024); @@ -219,7 +245,11 @@ void main() { if (any(overflowing)) { uvec3 overflow_factor = mix(uvec3(0), uvec3(2047 << 21, 2047 << 10, 1023), overflowing); uint force_max = overflow_factor.r | overflow_factor.g | overflow_factor.b; +#ifdef MOLTENVK_USED + atomicOr(emissive_only_map[lpos], force_max); +#else imageAtomicOr(emissive_only_map, pos, force_max); +#endif } } #endif @@ -230,7 +260,11 @@ void main() { uvec3 scattering_u = uvec3(scattering.r * 2047.0, scattering.g * 2047.0, scattering.b * 1023.0); // R and G have 11 bits each and B has 10. Then pack them into a 32 bit uint uint final_scattering = scattering_u.r << 21 | scattering_u.g << 10 | scattering_u.b; +#ifdef MOLTENVK_USED + uint prev_scattering = atomicAdd(light_only_map[lpos], final_scattering); +#else uint prev_scattering = imageAtomicAdd(light_only_map, pos, final_scattering); +#endif // Adding can lead to colors overflowing, so validate uvec3 prev_scattering_u = uvec3(prev_scattering >> 21, (prev_scattering << 11) >> 21, prev_scattering % 1024); @@ -242,7 +276,11 @@ void main() { if (any(overflowing)) { uvec3 overflow_factor = mix(uvec3(0), uvec3(2047 << 21, 2047 << 10, 1023), overflowing); uint force_max = overflow_factor.r | overflow_factor.g | overflow_factor.b; +#ifdef MOLTENVK_USED + atomicOr(light_only_map[lpos], force_max); +#else imageAtomicOr(light_only_map, pos, force_max); +#endif } } #endif // ALBEDO_USED diff --git a/servers/rendering/renderer_rd/shaders/volumetric_fog_process.glsl b/servers/rendering/renderer_rd/shaders/volumetric_fog_process.glsl index 3d6fbb5653..747f88960c 100644 --- a/servers/rendering/renderer_rd/shaders/volumetric_fog_process.glsl +++ b/servers/rendering/renderer_rd/shaders/volumetric_fog_process.glsl @@ -190,9 +190,22 @@ params; #ifndef MODE_COPY layout(set = 0, binding = 15) uniform texture3D prev_density_texture; +#ifdef MOLTENVK_USED +layout(set = 0, binding = 16) buffer density_only_map_buffer { + uint density_only_map[]; +}; +layout(set = 0, binding = 17) buffer light_only_map_buffer { + uint light_only_map[]; +}; +layout(set = 0, binding = 18) buffer emissive_only_map_buffer { + uint emissive_only_map[]; +}; +#else layout(r32ui, set = 0, binding = 16) uniform uimage3D density_only_map; layout(r32ui, set = 0, binding = 17) uniform uimage3D light_only_map; layout(r32ui, set = 0, binding = 18) uniform uimage3D emissive_only_map; +#endif + #ifdef USE_RADIANCE_CUBEMAP_ARRAY layout(set = 0, binding = 19) uniform textureCubeArray sky_texture; #else @@ -272,6 +285,9 @@ void main() { if (any(greaterThanEqual(pos, params.fog_volume_size))) { return; //do not compute } +#ifdef MOLTENVK_USED + uint lpos = pos.z * params.fog_volume_size.x * params.fog_volume_size.y + pos.y * params.fog_volume_size.x + pos.x; +#endif vec3 posf = vec3(pos); @@ -335,15 +351,28 @@ void main() { vec3 total_light = vec3(0.0); float total_density = params.base_density; +#ifdef MOLTENVK_USED + uint local_density = density_only_map[lpos]; +#else uint local_density = imageLoad(density_only_map, pos).x; +#endif + total_density += float(int(local_density)) / DENSITY_SCALE; total_density = max(0.0, total_density); +#ifdef MOLTENVK_USED + uint scattering_u = light_only_map[lpos]; +#else uint scattering_u = imageLoad(light_only_map, pos).x; +#endif vec3 scattering = vec3(scattering_u >> 21, (scattering_u << 11) >> 21, scattering_u % 1024) / vec3(2047.0, 2047.0, 1023.0); scattering += params.base_scattering * params.base_density; +#ifdef MOLTENVK_USED + uint emission_u = emissive_only_map[lpos]; +#else uint emission_u = imageLoad(emissive_only_map, pos).x; +#endif vec3 emission = vec3(emission_u >> 21, (emission_u << 11) >> 21, emission_u % 1024) / vec3(511.0, 511.0, 255.0); emission += params.base_emission * params.base_density; @@ -673,10 +702,16 @@ void main() { final_density = mix(final_density, reprojected_density, reproject_amount); imageStore(density_map, pos, final_density); +#ifdef MOLTENVK_USED + density_only_map[lpos] = 0; + light_only_map[lpos] = 0; + emissive_only_map[lpos] = 0; +#else imageStore(density_only_map, pos, uvec4(0)); imageStore(light_only_map, pos, uvec4(0)); imageStore(emissive_only_map, pos, uvec4(0)); #endif +#endif #ifdef MODE_FOG |