summaryrefslogtreecommitdiff
path: root/drivers/gles3/shaders/ssao.glsl
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gles3/shaders/ssao.glsl')
-rw-r--r--drivers/gles3/shaders/ssao.glsl247
1 files changed, 247 insertions, 0 deletions
diff --git a/drivers/gles3/shaders/ssao.glsl b/drivers/gles3/shaders/ssao.glsl
new file mode 100644
index 0000000000..75f49ef37a
--- /dev/null
+++ b/drivers/gles3/shaders/ssao.glsl
@@ -0,0 +1,247 @@
+[vertex]
+
+
+layout(location=0) in highp vec4 vertex_attrib;
+
+void main() {
+
+ gl_Position = vertex_attrib;
+ gl_Position.z=1.0;
+}
+
+[fragment]
+
+
+#define NUM_SAMPLES (11)
+
+// If using depth mip levels, the log of the maximum pixel offset before we need to switch to a lower
+// miplevel to maintain reasonable spatial locality in the cache
+// If this number is too small (< 3), too many taps will land in the same pixel, and we'll get bad variance that manifests as flashing.
+// If it is too high (> 5), we'll get bad performance because we're not using the MIP levels effectively
+#define LOG_MAX_OFFSET (3)
+
+// This must be less than or equal to the MAX_MIP_LEVEL defined in SSAO.cpp
+#define MAX_MIP_LEVEL (4)
+
+// This is the number of turns around the circle that the spiral pattern makes. This should be prime to prevent
+// taps from lining up. This particular choice was tuned for NUM_SAMPLES == 9
+#define NUM_SPIRAL_TURNS (7)
+
+
+uniform sampler2D source_depth; //texunit:0
+uniform usampler2D source_depth_mipmaps; //texunit:1
+uniform sampler2D source_normal; //texunit:2
+
+uniform ivec2 screen_size;
+uniform float camera_z_far;
+uniform float camera_z_near;
+
+uniform float intensity_div_r6;
+uniform float radius;
+
+#ifdef ENABLE_RADIUS2
+uniform float intensity_div_r62;
+uniform float radius2;
+#endif
+
+uniform float bias;
+uniform float proj_scale;
+
+layout(location = 0) out float visibility;
+
+uniform vec4 proj_info;
+
+vec3 reconstructCSPosition(vec2 S, float z) {
+ return vec3((S.xy * proj_info.xy + proj_info.zw) * z, z);
+}
+
+vec3 getPosition(ivec2 ssP) {
+ vec3 P;
+ P.z = texelFetch(source_depth, ssP, 0).r;
+
+ P.z = P.z * 2.0 - 1.0;
+ P.z = 2.0 * camera_z_near * camera_z_far / (camera_z_far + camera_z_near - P.z * (camera_z_far - camera_z_near));
+ P.z = -P.z;
+
+ // Offset to pixel center
+ P = reconstructCSPosition(vec2(ssP) + vec2(0.5), P.z);
+ return P;
+}
+
+/** Reconstructs screen-space unit normal from screen-space position */
+vec3 reconstructCSFaceNormal(vec3 C) {
+ return normalize(cross(dFdy(C), dFdx(C)));
+}
+
+
+
+/** Returns a unit vector and a screen-space radius for the tap on a unit disk (the caller should scale by the actual disk radius) */
+vec2 tapLocation(int sampleNumber, float spinAngle, out float ssR){
+ // Radius relative to ssR
+ float alpha = float(sampleNumber + 0.5) * (1.0 / NUM_SAMPLES);
+ float angle = alpha * (NUM_SPIRAL_TURNS * 6.28) + spinAngle;
+
+ ssR = alpha;
+ return vec2(cos(angle), sin(angle));
+}
+
+
+/** Read the camera-space position of the point at screen-space pixel ssP + unitOffset * ssR. Assumes length(unitOffset) == 1 */
+vec3 getOffsetPosition(ivec2 ssC, vec2 unitOffset, float ssR) {
+ // Derivation:
+ // mipLevel = floor(log(ssR / MAX_OFFSET));
+ int mipLevel = clamp(int(floor(log2(ssR))) - LOG_MAX_OFFSET, 0, MAX_MIP_LEVEL);
+
+ ivec2 ssP = ivec2(ssR * unitOffset) + ssC;
+
+ vec3 P;
+
+ // We need to divide by 2^mipLevel to read the appropriately scaled coordinate from a MIP-map.
+ // Manually clamp to the texture size because texelFetch bypasses the texture unit
+ ivec2 mipP = clamp(ssP >> mipLevel, ivec2(0), (screen_size >> mipLevel) - ivec2(1));
+
+
+ if (mipLevel < 1) {
+ //read from depth buffer
+ P.z = texelFetch(source_depth, mipP, 0).r;
+ P.z = P.z * 2.0 - 1.0;
+ P.z = 2.0 * camera_z_near * camera_z_far / (camera_z_far + camera_z_near - P.z * (camera_z_far - camera_z_near));
+ P.z = -P.z;
+
+ } else {
+ //read from mipmaps
+ uint d = texelFetch(source_depth_mipmaps, mipP, mipLevel-1).r;
+ P.z = -(float(d)/65535.0)*camera_z_far;
+ }
+
+
+ // Offset to pixel center
+ P = reconstructCSPosition(vec2(ssP) + vec2(0.5), P.z);
+
+ return P;
+}
+
+
+
+/** Compute the occlusion due to sample with index \a i about the pixel at \a ssC that corresponds
+ to camera-space point \a C with unit normal \a n_C, using maximum screen-space sampling radius \a ssDiskRadius
+
+ Note that units of H() in the HPG12 paper are meters, not
+ unitless. The whole falloff/sampling function is therefore
+ unitless. In this implementation, we factor out (9 / radius).
+
+ Four versions of the falloff function are implemented below
+*/
+float sampleAO(in ivec2 ssC, in vec3 C, in vec3 n_C, in float ssDiskRadius,in float p_radius, in int tapIndex, in float randomPatternRotationAngle) {
+ // Offset on the unit disk, spun for this pixel
+ float ssR;
+ vec2 unitOffset = tapLocation(tapIndex, randomPatternRotationAngle, ssR);
+ ssR *= ssDiskRadius;
+
+ // The occluding point in camera space
+ vec3 Q = getOffsetPosition(ssC, unitOffset, ssR);
+
+ vec3 v = Q - C;
+
+ float vv = dot(v, v);
+ float vn = dot(v, n_C);
+
+ const float epsilon = 0.01;
+ float radius2 = p_radius*p_radius;
+
+ // A: From the HPG12 paper
+ // Note large epsilon to avoid overdarkening within cracks
+ //return float(vv < radius2) * max((vn - bias) / (epsilon + vv), 0.0) * radius2 * 0.6;
+
+ // B: Smoother transition to zero (lowers contrast, smoothing out corners). [Recommended]
+ float f=max(radius2 - vv, 0.0);
+ return f * f * f * max((vn - bias) / (epsilon + vv), 0.0);
+
+ // C: Medium contrast (which looks better at high radii), no division. Note that the
+ // contribution still falls off with radius^2, but we've adjusted the rate in a way that is
+ // more computationally efficient and happens to be aesthetically pleasing.
+ // return 4.0 * max(1.0 - vv * invRadius2, 0.0) * max(vn - bias, 0.0);
+
+ // D: Low contrast, no division operation
+ // return 2.0 * float(vv < radius * radius) * max(vn - bias, 0.0);
+}
+
+
+
+void main() {
+
+
+ // Pixel being shaded
+ ivec2 ssC = ivec2(gl_FragCoord.xy);
+
+ // World space point being shaded
+ vec3 C = getPosition(ssC);
+
+/* if (C.z <= -camera_z_far*0.999) {
+ // We're on the skybox
+ visibility=1.0;
+ return;
+ }*/
+
+ //visibility=-C.z/camera_z_far;
+ //return;
+
+ //vec3 n_C = texelFetch(source_normal,ssC,0).rgb * 2.0 - 1.0;
+
+ vec3 n_C = reconstructCSFaceNormal(C);
+ n_C = -n_C;
+
+
+ // Hash function used in the HPG12 AlchemyAO paper
+ float randomPatternRotationAngle = (3 * ssC.x ^ ssC.y + ssC.x * ssC.y) * 10;
+
+ // Reconstruct normals from positions. These will lead to 1-pixel black lines
+ // at depth discontinuities, however the blur will wipe those out so they are not visible
+ // in the final image.
+
+ // Choose the screen-space sample radius
+ // proportional to the projected area of the sphere
+ float ssDiskRadius = -proj_scale * radius / C.z;
+
+ float sum = 0.0;
+ for (int i = 0; i < NUM_SAMPLES; ++i) {
+ sum += sampleAO(ssC, C, n_C, ssDiskRadius, radius,i, randomPatternRotationAngle);
+ }
+
+ float A = max(0.0, 1.0 - sum * intensity_div_r6 * (5.0 / NUM_SAMPLES));
+
+#ifdef ENABLE_RADIUS2
+
+ //go again for radius2
+ randomPatternRotationAngle = (5 * ssC.x ^ ssC.y + ssC.x * ssC.y) * 11;
+
+ // Reconstruct normals from positions. These will lead to 1-pixel black lines
+ // at depth discontinuities, however the blur will wipe those out so they are not visible
+ // in the final image.
+
+ // Choose the screen-space sample radius
+ // proportional to the projected area of the sphere
+ ssDiskRadius = -proj_scale * radius2 / C.z;
+
+ sum = 0.0;
+ for (int i = 0; i < NUM_SAMPLES; ++i) {
+ sum += sampleAO(ssC, C, n_C, ssDiskRadius,radius2, i, randomPatternRotationAngle);
+ }
+
+ A= min(A,max(0.0, 1.0 - sum * intensity_div_r62 * (5.0 / NUM_SAMPLES)));
+#endif
+ // Bilateral box-filter over a quad for free, respecting depth edges
+ // (the difference that this makes is subtle)
+ if (abs(dFdx(C.z)) < 0.02) {
+ A -= dFdx(A) * ((ssC.x & 1) - 0.5);
+ }
+ if (abs(dFdy(C.z)) < 0.02) {
+ A -= dFdy(A) * ((ssC.y & 1) - 0.5);
+ }
+
+ visibility = A;
+
+}
+
+
+