diff options
Diffstat (limited to 'drivers/gles3/shaders')
-rw-r--r-- | drivers/gles3/shaders/SCsub | 3 | ||||
-rw-r--r-- | drivers/gles3/shaders/effect_blur.glsl | 17 | ||||
-rw-r--r-- | drivers/gles3/shaders/scene.glsl | 4 | ||||
-rw-r--r-- | drivers/gles3/shaders/ssao.glsl | 247 | ||||
-rw-r--r-- | drivers/gles3/shaders/ssao_blur.glsl | 113 | ||||
-rw-r--r-- | drivers/gles3/shaders/ssao_minify.glsl | 55 |
6 files changed, 438 insertions, 1 deletions
diff --git a/drivers/gles3/shaders/SCsub b/drivers/gles3/shaders/SCsub index b5797e78b8..dd7ec45242 100644 --- a/drivers/gles3/shaders/SCsub +++ b/drivers/gles3/shaders/SCsub @@ -12,5 +12,8 @@ if env['BUILDERS'].has_key('GLES3_GLSL'): env.GLES3_GLSL('screen_space_reflection.glsl'); env.GLES3_GLSL('effect_blur.glsl'); env.GLES3_GLSL('subsurf_scattering.glsl'); + env.GLES3_GLSL('ssao.glsl'); + env.GLES3_GLSL('ssao_minify.glsl'); + env.GLES3_GLSL('ssao_blur.glsl'); diff --git a/drivers/gles3/shaders/effect_blur.glsl b/drivers/gles3/shaders/effect_blur.glsl index fc6de7f654..211b60ca2e 100644 --- a/drivers/gles3/shaders/effect_blur.glsl +++ b/drivers/gles3/shaders/effect_blur.glsl @@ -19,12 +19,22 @@ void main() { in vec2 uv_interp; uniform sampler2D source_color; //texunit:0 +#ifdef SSAO_MERGE +uniform sampler2D source_ssao; //texunit:1 +#endif + uniform float lod; uniform vec2 pixel_size; layout(location = 0) out vec4 frag_color; +#ifdef SSAO_MERGE + +uniform vec4 ssao_color; + +#endif + void main() { @@ -52,6 +62,13 @@ void main() { frag_color = color; #endif +#ifdef SSAO_MERGE + + vec4 color =textureLod( source_color, uv_interp,0.0); + float ssao =textureLod( source_ssao, uv_interp,0.0).r; + frag_color = vec4( mix(color.rgb,color.rgb*mix(ssao_color.rgb,vec3(1.0),ssao),color.a), 1.0 ); + +#endif } diff --git a/drivers/gles3/shaders/scene.glsl b/drivers/gles3/shaders/scene.glsl index 230544c1c3..61e9e37d2b 100644 --- a/drivers/gles3/shaders/scene.glsl +++ b/drivers/gles3/shaders/scene.glsl @@ -76,6 +76,7 @@ layout(std140) uniform SceneData { //ubo:0 float reflection_multiplier; float subsurface_scatter_width; + float ambient_occlusion_affect_light; }; @@ -387,6 +388,7 @@ layout(std140) uniform SceneData { float reflection_multiplier; float subsurface_scatter_width; + float ambient_occlusion_affect_light; }; @@ -1223,7 +1225,7 @@ LIGHT_SHADER_CODE float max_ambient=max(ambient_light.r,max(ambient_light.g,ambient_light.b)); float max_diffuse=max(diffuse_light.r,max(diffuse_light.g,diffuse_light.b)); float total_ambient = max_ambient+max_diffuse+max_emission; - float ambient_scale = (total_ambient>0.0) ? max_ambient/total_ambient : 0.0; + float ambient_scale = (total_ambient>0.0) ? (max_ambient+ambient_occlusion_affect_light*max_diffuse)/total_ambient : 0.0; #endif //ENABLE_AO diffuse_buffer=vec4(emission+diffuse_light+ambient_light,ambient_scale); diff --git a/drivers/gles3/shaders/ssao.glsl b/drivers/gles3/shaders/ssao.glsl new file mode 100644 index 0000000000..75f49ef37a --- /dev/null +++ b/drivers/gles3/shaders/ssao.glsl @@ -0,0 +1,247 @@ +[vertex] + + +layout(location=0) in highp vec4 vertex_attrib; + +void main() { + + gl_Position = vertex_attrib; + gl_Position.z=1.0; +} + +[fragment] + + +#define NUM_SAMPLES (11) + +// If using depth mip levels, the log of the maximum pixel offset before we need to switch to a lower +// miplevel to maintain reasonable spatial locality in the cache +// If this number is too small (< 3), too many taps will land in the same pixel, and we'll get bad variance that manifests as flashing. +// If it is too high (> 5), we'll get bad performance because we're not using the MIP levels effectively +#define LOG_MAX_OFFSET (3) + +// This must be less than or equal to the MAX_MIP_LEVEL defined in SSAO.cpp +#define MAX_MIP_LEVEL (4) + +// This is the number of turns around the circle that the spiral pattern makes. This should be prime to prevent +// taps from lining up. This particular choice was tuned for NUM_SAMPLES == 9 +#define NUM_SPIRAL_TURNS (7) + + +uniform sampler2D source_depth; //texunit:0 +uniform usampler2D source_depth_mipmaps; //texunit:1 +uniform sampler2D source_normal; //texunit:2 + +uniform ivec2 screen_size; +uniform float camera_z_far; +uniform float camera_z_near; + +uniform float intensity_div_r6; +uniform float radius; + +#ifdef ENABLE_RADIUS2 +uniform float intensity_div_r62; +uniform float radius2; +#endif + +uniform float bias; +uniform float proj_scale; + +layout(location = 0) out float visibility; + +uniform vec4 proj_info; + +vec3 reconstructCSPosition(vec2 S, float z) { + return vec3((S.xy * proj_info.xy + proj_info.zw) * z, z); +} + +vec3 getPosition(ivec2 ssP) { + vec3 P; + P.z = texelFetch(source_depth, ssP, 0).r; + + P.z = P.z * 2.0 - 1.0; + P.z = 2.0 * camera_z_near * camera_z_far / (camera_z_far + camera_z_near - P.z * (camera_z_far - camera_z_near)); + P.z = -P.z; + + // Offset to pixel center + P = reconstructCSPosition(vec2(ssP) + vec2(0.5), P.z); + return P; +} + +/** Reconstructs screen-space unit normal from screen-space position */ +vec3 reconstructCSFaceNormal(vec3 C) { + return normalize(cross(dFdy(C), dFdx(C))); +} + + + +/** Returns a unit vector and a screen-space radius for the tap on a unit disk (the caller should scale by the actual disk radius) */ +vec2 tapLocation(int sampleNumber, float spinAngle, out float ssR){ + // Radius relative to ssR + float alpha = float(sampleNumber + 0.5) * (1.0 / NUM_SAMPLES); + float angle = alpha * (NUM_SPIRAL_TURNS * 6.28) + spinAngle; + + ssR = alpha; + return vec2(cos(angle), sin(angle)); +} + + +/** Read the camera-space position of the point at screen-space pixel ssP + unitOffset * ssR. Assumes length(unitOffset) == 1 */ +vec3 getOffsetPosition(ivec2 ssC, vec2 unitOffset, float ssR) { + // Derivation: + // mipLevel = floor(log(ssR / MAX_OFFSET)); + int mipLevel = clamp(int(floor(log2(ssR))) - LOG_MAX_OFFSET, 0, MAX_MIP_LEVEL); + + ivec2 ssP = ivec2(ssR * unitOffset) + ssC; + + vec3 P; + + // We need to divide by 2^mipLevel to read the appropriately scaled coordinate from a MIP-map. + // Manually clamp to the texture size because texelFetch bypasses the texture unit + ivec2 mipP = clamp(ssP >> mipLevel, ivec2(0), (screen_size >> mipLevel) - ivec2(1)); + + + if (mipLevel < 1) { + //read from depth buffer + P.z = texelFetch(source_depth, mipP, 0).r; + P.z = P.z * 2.0 - 1.0; + P.z = 2.0 * camera_z_near * camera_z_far / (camera_z_far + camera_z_near - P.z * (camera_z_far - camera_z_near)); + P.z = -P.z; + + } else { + //read from mipmaps + uint d = texelFetch(source_depth_mipmaps, mipP, mipLevel-1).r; + P.z = -(float(d)/65535.0)*camera_z_far; + } + + + // Offset to pixel center + P = reconstructCSPosition(vec2(ssP) + vec2(0.5), P.z); + + return P; +} + + + +/** Compute the occlusion due to sample with index \a i about the pixel at \a ssC that corresponds + to camera-space point \a C with unit normal \a n_C, using maximum screen-space sampling radius \a ssDiskRadius + + Note that units of H() in the HPG12 paper are meters, not + unitless. The whole falloff/sampling function is therefore + unitless. In this implementation, we factor out (9 / radius). + + Four versions of the falloff function are implemented below +*/ +float sampleAO(in ivec2 ssC, in vec3 C, in vec3 n_C, in float ssDiskRadius,in float p_radius, in int tapIndex, in float randomPatternRotationAngle) { + // Offset on the unit disk, spun for this pixel + float ssR; + vec2 unitOffset = tapLocation(tapIndex, randomPatternRotationAngle, ssR); + ssR *= ssDiskRadius; + + // The occluding point in camera space + vec3 Q = getOffsetPosition(ssC, unitOffset, ssR); + + vec3 v = Q - C; + + float vv = dot(v, v); + float vn = dot(v, n_C); + + const float epsilon = 0.01; + float radius2 = p_radius*p_radius; + + // A: From the HPG12 paper + // Note large epsilon to avoid overdarkening within cracks + //return float(vv < radius2) * max((vn - bias) / (epsilon + vv), 0.0) * radius2 * 0.6; + + // B: Smoother transition to zero (lowers contrast, smoothing out corners). [Recommended] + float f=max(radius2 - vv, 0.0); + return f * f * f * max((vn - bias) / (epsilon + vv), 0.0); + + // C: Medium contrast (which looks better at high radii), no division. Note that the + // contribution still falls off with radius^2, but we've adjusted the rate in a way that is + // more computationally efficient and happens to be aesthetically pleasing. + // return 4.0 * max(1.0 - vv * invRadius2, 0.0) * max(vn - bias, 0.0); + + // D: Low contrast, no division operation + // return 2.0 * float(vv < radius * radius) * max(vn - bias, 0.0); +} + + + +void main() { + + + // Pixel being shaded + ivec2 ssC = ivec2(gl_FragCoord.xy); + + // World space point being shaded + vec3 C = getPosition(ssC); + +/* if (C.z <= -camera_z_far*0.999) { + // We're on the skybox + visibility=1.0; + return; + }*/ + + //visibility=-C.z/camera_z_far; + //return; + + //vec3 n_C = texelFetch(source_normal,ssC,0).rgb * 2.0 - 1.0; + + vec3 n_C = reconstructCSFaceNormal(C); + n_C = -n_C; + + + // Hash function used in the HPG12 AlchemyAO paper + float randomPatternRotationAngle = (3 * ssC.x ^ ssC.y + ssC.x * ssC.y) * 10; + + // Reconstruct normals from positions. These will lead to 1-pixel black lines + // at depth discontinuities, however the blur will wipe those out so they are not visible + // in the final image. + + // Choose the screen-space sample radius + // proportional to the projected area of the sphere + float ssDiskRadius = -proj_scale * radius / C.z; + + float sum = 0.0; + for (int i = 0; i < NUM_SAMPLES; ++i) { + sum += sampleAO(ssC, C, n_C, ssDiskRadius, radius,i, randomPatternRotationAngle); + } + + float A = max(0.0, 1.0 - sum * intensity_div_r6 * (5.0 / NUM_SAMPLES)); + +#ifdef ENABLE_RADIUS2 + + //go again for radius2 + randomPatternRotationAngle = (5 * ssC.x ^ ssC.y + ssC.x * ssC.y) * 11; + + // Reconstruct normals from positions. These will lead to 1-pixel black lines + // at depth discontinuities, however the blur will wipe those out so they are not visible + // in the final image. + + // Choose the screen-space sample radius + // proportional to the projected area of the sphere + ssDiskRadius = -proj_scale * radius2 / C.z; + + sum = 0.0; + for (int i = 0; i < NUM_SAMPLES; ++i) { + sum += sampleAO(ssC, C, n_C, ssDiskRadius,radius2, i, randomPatternRotationAngle); + } + + A= min(A,max(0.0, 1.0 - sum * intensity_div_r62 * (5.0 / NUM_SAMPLES))); +#endif + // Bilateral box-filter over a quad for free, respecting depth edges + // (the difference that this makes is subtle) + if (abs(dFdx(C.z)) < 0.02) { + A -= dFdx(A) * ((ssC.x & 1) - 0.5); + } + if (abs(dFdy(C.z)) < 0.02) { + A -= dFdy(A) * ((ssC.y & 1) - 0.5); + } + + visibility = A; + +} + + + diff --git a/drivers/gles3/shaders/ssao_blur.glsl b/drivers/gles3/shaders/ssao_blur.glsl new file mode 100644 index 0000000000..31f3841a2a --- /dev/null +++ b/drivers/gles3/shaders/ssao_blur.glsl @@ -0,0 +1,113 @@ +[vertex] + + +layout(location=0) in highp vec4 vertex_attrib; + + +void main() { + + gl_Position = vertex_attrib; + gl_Position.z=1.0; +} + +[fragment] + + +uniform sampler2D source_ssao; //texunit:0 +uniform sampler2D source_depth; //texunit:1 + + +layout(location = 0) out float visibility; + + +////////////////////////////////////////////////////////////////////////////////////////////// +// Tunable Parameters: + +/** Increase to make depth edges crisper. Decrease to reduce flicker. */ +#define EDGE_SHARPNESS (1.0) + +/** Step in 2-pixel intervals since we already blurred against neighbors in the + first AO pass. This constant can be increased while R decreases to improve + performance at the expense of some dithering artifacts. + + Morgan found that a scale of 3 left a 1-pixel checkerboard grid that was + unobjectionable after shading was applied but eliminated most temporal incoherence + from using small numbers of sample taps. + */ +#define SCALE (3) + +/** Filter radius in pixels. This will be multiplied by SCALE. */ +#define R (4) + + +////////////////////////////////////////////////////////////////////////////////////////////// + + +// Gaussian coefficients +const float gaussian[R + 1] = +// float[](0.356642, 0.239400, 0.072410, 0.009869); +// float[](0.398943, 0.241971, 0.053991, 0.004432, 0.000134); // stddev = 1.0 + float[](0.153170, 0.144893, 0.122649, 0.092902, 0.062970); // stddev = 2.0 +// float[](0.111220, 0.107798, 0.098151, 0.083953, 0.067458, 0.050920, 0.036108); // stddev = 3.0 + +/** (1, 0) or (0, 1)*/ +uniform ivec2 axis; + +uniform float camera_z_far; +uniform float camera_z_near; + +void main() { + + ivec2 ssC = ivec2(gl_FragCoord.xy); + + float depth = texelFetch(source_depth, ssC, 0).r; + + depth = depth * 2.0 - 1.0; + depth = 2.0 * camera_z_near * camera_z_far / (camera_z_far + camera_z_near - depth * (camera_z_far - camera_z_near)); + + float depth_divide = 1.0 / camera_z_far; + + depth*=depth_divide; + + //if (depth > camera_z_far*0.999) { + // discard;//skybox + //} + + float sum = texelFetch(source_ssao, ssC, 0).r; + + // Base weight for depth falloff. Increase this for more blurriness, + // decrease it for better edge discrimination + float BASE = gaussian[0]; + float totalWeight = BASE; + sum *= totalWeight; + + + for (int r = -R; r <= R; ++r) { + // We already handled the zero case above. This loop should be unrolled and the static branch optimized out, + // so the IF statement has no runtime cost + if (r != 0) { + + ivec2 ppos = ssC + axis * (r * SCALE); + float value = texelFetch(source_ssao, ppos, 0).r; + float temp_depth = texelFetch(source_depth, ssC, 0).r; + + temp_depth = temp_depth * 2.0 - 1.0; + temp_depth = 2.0 * camera_z_near * camera_z_far / (camera_z_far + camera_z_near - temp_depth * (camera_z_far - camera_z_near)); + temp_depth *= depth_divide; + + // spatial domain: offset gaussian tap + float weight = 0.3 + gaussian[abs(r)]; + + // range domain (the "bilateral" weight). As depth difference increases, decrease weight. + weight *= max(0.0, 1.0 + - (EDGE_SHARPNESS * 2000.0) * abs(temp_depth - depth) + ); + + sum += value * weight; + totalWeight += weight; + } + } + + const float epsilon = 0.0001; + visibility = sum / (totalWeight + epsilon); +} diff --git a/drivers/gles3/shaders/ssao_minify.glsl b/drivers/gles3/shaders/ssao_minify.glsl new file mode 100644 index 0000000000..df9045c28a --- /dev/null +++ b/drivers/gles3/shaders/ssao_minify.glsl @@ -0,0 +1,55 @@ +[vertex] + + +layout(location=0) in highp vec4 vertex_attrib; + +void main() { + + gl_Position = vertex_attrib; +} + +[fragment] + + +#ifdef MINIFY_START + +#define SDEPTH_TYPE highp sampler2D +uniform float camera_z_far; +uniform float camera_z_near; + +#else + +#define SDEPTH_TYPE mediump usampler2D + +#endif + +uniform SDEPTH_TYPE source_depth; //texunit:0 + +uniform ivec2 from_size; +uniform int source_mipmap; + +layout(location = 0) out mediump uint depth; + +void main() { + + + ivec2 ssP = ivec2(gl_FragCoord.xy); + + // Rotated grid subsampling to avoid XY directional bias or Z precision bias while downsampling. + // On DX9, the bit-and can be implemented with floating-point modulo + +#ifdef MINIFY_START + float fdepth = texelFetch(source_depth, clamp(ssP * 2 + ivec2(ssP.y & 1, ssP.x & 1), ivec2(0), from_size - ivec2(1)), source_mipmap).r; + fdepth = fdepth * 2.0 - 1.0; + fdepth = 2.0 * camera_z_near * camera_z_far / (camera_z_far + camera_z_near - fdepth * (camera_z_far - camera_z_near)); + fdepth /= camera_z_far; + depth = uint(clamp(fdepth*65535,0.0,65535.0)); + +#else + depth = texelFetch(source_depth, clamp(ssP * 2 + ivec2(ssP.y & 1, ssP.x & 1), ivec2(0), from_size - ivec2(1)), source_mipmap).r; +#endif + + +} + + |