summaryrefslogtreecommitdiff
path: root/servers/rendering/rasterizer_rd/shaders
diff options
context:
space:
mode:
Diffstat (limited to 'servers/rendering/rasterizer_rd/shaders')
-rw-r--r--servers/rendering/rasterizer_rd/shaders/SCsub3
-rw-r--r--servers/rendering/rasterizer_rd/shaders/copy.glsl42
-rw-r--r--servers/rendering/rasterizer_rd/shaders/particles.glsl262
-rw-r--r--servers/rendering/rasterizer_rd/shaders/particles_copy.glsl82
-rw-r--r--servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl2
-rw-r--r--servers/rendering/rasterizer_rd/shaders/sort.glsl203
6 files changed, 585 insertions, 9 deletions
diff --git a/servers/rendering/rasterizer_rd/shaders/SCsub b/servers/rendering/rasterizer_rd/shaders/SCsub
index 3aa863be98..9d531d63ad 100644
--- a/servers/rendering/rasterizer_rd/shaders/SCsub
+++ b/servers/rendering/rasterizer_rd/shaders/SCsub
@@ -37,3 +37,6 @@ if "RD_GLSL" in env["BUILDERS"]:
env.RD_GLSL("sdfgi_debug_probes.glsl")
env.RD_GLSL("volumetric_fog.glsl")
env.RD_GLSL("shadow_reduce.glsl")
+ env.RD_GLSL("particles.glsl")
+ env.RD_GLSL("particles_copy.glsl")
+ env.RD_GLSL("sort.glsl")
diff --git a/servers/rendering/rasterizer_rd/shaders/copy.glsl b/servers/rendering/rasterizer_rd/shaders/copy.glsl
index eb39c28fa9..e565bd8e3d 100644
--- a/servers/rendering/rasterizer_rd/shaders/copy.glsl
+++ b/servers/rendering/rasterizer_rd/shaders/copy.glsl
@@ -14,6 +14,7 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
#define FLAG_FLIP_Y (1 << 5)
#define FLAG_FORCE_LUMINANCE (1 << 6)
#define FLAG_COPY_ALL_SOURCE (1 << 7)
+#define FLAG_HIGH_QUALITY_GLOW (1 << 8)
layout(push_constant, binding = 1, std430) uniform Params {
ivec4 section;
@@ -116,17 +117,42 @@ void main() {
vec4 color = vec4(0.0);
if (bool(params.flags & FLAG_HORIZONTAL)) {
- ivec2 base_pos = (pos + params.section.xy) << 1;
+ ivec2 base_pos = ((pos + params.section.xy) << 1) + ivec2(1);
ivec2 section_begin = params.section.xy << 1;
ivec2 section_end = section_begin + (params.section.zw << 1);
- GLOW_ADD(ivec2(0, 0), 0.174938);
- GLOW_ADD(ivec2(1, 0), 0.165569);
- GLOW_ADD(ivec2(2, 0), 0.140367);
- GLOW_ADD(ivec2(3, 0), 0.106595);
- GLOW_ADD(ivec2(-1, 0), 0.165569);
- GLOW_ADD(ivec2(-2, 0), 0.140367);
- GLOW_ADD(ivec2(-3, 0), 0.106595);
+ if (bool(params.flags & FLAG_HIGH_QUALITY_GLOW)) {
+ //Sample from two lines to capture single pixel features
+ GLOW_ADD(ivec2(0, 0), 0.152781);
+ GLOW_ADD(ivec2(1, 0), 0.144599);
+ GLOW_ADD(ivec2(2, 0), 0.122589);
+ GLOW_ADD(ivec2(3, 0), 0.093095);
+ GLOW_ADD(ivec2(4, 0), 0.063327);
+ GLOW_ADD(ivec2(-1, 0), 0.144599);
+ GLOW_ADD(ivec2(-2, 0), 0.122589);
+ GLOW_ADD(ivec2(-3, 0), 0.093095);
+ GLOW_ADD(ivec2(-4, 0), 0.063327);
+
+ GLOW_ADD(ivec2(0, 1), 0.152781);
+ GLOW_ADD(ivec2(1, 1), 0.144599);
+ GLOW_ADD(ivec2(2, 1), 0.122589);
+ GLOW_ADD(ivec2(3, 1), 0.093095);
+ GLOW_ADD(ivec2(4, 1), 0.063327);
+ GLOW_ADD(ivec2(-1, 1), 0.144599);
+ GLOW_ADD(ivec2(-2, 1), 0.122589);
+ GLOW_ADD(ivec2(-3, 1), 0.093095);
+ GLOW_ADD(ivec2(-4, 1), 0.063327);
+ color *= 0.5;
+ } else {
+ GLOW_ADD(ivec2(0, 0), 0.174938);
+ GLOW_ADD(ivec2(1, 0), 0.165569);
+ GLOW_ADD(ivec2(2, 0), 0.140367);
+ GLOW_ADD(ivec2(3, 0), 0.106595);
+ GLOW_ADD(ivec2(-1, 0), 0.165569);
+ GLOW_ADD(ivec2(-2, 0), 0.140367);
+ GLOW_ADD(ivec2(-3, 0), 0.106595);
+ }
+
color *= params.glow_strength;
} else {
ivec2 base_pos = pos + params.section.xy;
diff --git a/servers/rendering/rasterizer_rd/shaders/particles.glsl b/servers/rendering/rasterizer_rd/shaders/particles.glsl
new file mode 100644
index 0000000000..7cdedfcbfe
--- /dev/null
+++ b/servers/rendering/rasterizer_rd/shaders/particles.glsl
@@ -0,0 +1,262 @@
+#[compute]
+
+#version 450
+
+VERSION_DEFINES
+
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+#define SAMPLER_NEAREST_CLAMP 0
+#define SAMPLER_LINEAR_CLAMP 1
+#define SAMPLER_NEAREST_WITH_MIPMAPS_CLAMP 2
+#define SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP 3
+#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_CLAMP 4
+#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_CLAMP 5
+#define SAMPLER_NEAREST_REPEAT 6
+#define SAMPLER_LINEAR_REPEAT 7
+#define SAMPLER_NEAREST_WITH_MIPMAPS_REPEAT 8
+#define SAMPLER_LINEAR_WITH_MIPMAPS_REPEAT 9
+#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_REPEAT 10
+#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_REPEAT 11
+
+/* SET 0: GLOBAL DATA */
+
+layout(set = 0, binding = 1) uniform sampler material_samplers[12];
+
+layout(set = 0, binding = 2, std430) restrict readonly buffer GlobalVariableData {
+ vec4 data[];
+}
+global_variables;
+
+/* Set 1: FRAME AND PARTICLE DATA */
+
+// a frame history is kept for trail deterministic behavior
+struct FrameParams {
+ bool emitting;
+ float system_phase;
+ float prev_system_phase;
+ uint cycle;
+
+ float explosiveness;
+ float randomness;
+ float time;
+ float delta;
+
+ uint random_seed;
+ uint pad[3];
+
+ mat4 emission_transform;
+};
+
+layout(set = 1, binding = 0, std430) restrict buffer FrameHistory {
+ FrameParams data[];
+}
+frame_history;
+
+struct ParticleData {
+ mat4 xform;
+ vec3 velocity;
+ bool is_active;
+ vec4 color;
+ vec4 custom;
+};
+
+layout(set = 1, binding = 1, std430) restrict buffer Particles {
+ ParticleData data[];
+}
+particles;
+
+/* SET 2: MATERIAL */
+
+#ifdef USE_MATERIAL_UNIFORMS
+layout(set = 2, binding = 0, std140) uniform MaterialUniforms{
+ /* clang-format off */
+MATERIAL_UNIFORMS
+ /* clang-format on */
+} material;
+#endif
+
+layout(push_constant, binding = 0, std430) uniform Params {
+ float lifetime;
+ bool clear;
+ uint total_particles;
+ uint trail_size;
+ bool use_fractional_delta;
+ uint pad[3];
+}
+params;
+
+uint hash(uint x) {
+ x = ((x >> uint(16)) ^ x) * uint(0x45d9f3b);
+ x = ((x >> uint(16)) ^ x) * uint(0x45d9f3b);
+ x = (x >> uint(16)) ^ x;
+ return x;
+}
+
+/* clang-format off */
+
+COMPUTE_SHADER_GLOBALS
+
+/* clang-format on */
+
+void main() {
+ uint particle = gl_GlobalInvocationID.x;
+
+ if (particle >= params.total_particles * params.trail_size) {
+ return; //discard
+ }
+
+ uint index = particle / params.trail_size;
+ uint frame = (particle % params.trail_size);
+
+#define FRAME frame_history.data[frame]
+#define PARTICLE particles.data[particle]
+
+ bool apply_forces = true;
+ bool apply_velocity = true;
+ float local_delta = FRAME.delta;
+
+ float mass = 1.0;
+
+ float restart_phase = float(index) / float(params.total_particles);
+
+ if (FRAME.randomness > 0.0) {
+ uint seed = FRAME.cycle;
+ if (restart_phase >= FRAME.system_phase) {
+ seed -= uint(1);
+ }
+ seed *= uint(params.total_particles);
+ seed += uint(index);
+ float random = float(hash(seed) % uint(65536)) / 65536.0;
+ restart_phase += FRAME.randomness * random * 1.0 / float(params.total_particles);
+ }
+
+ restart_phase *= (1.0 - FRAME.explosiveness);
+
+ bool restart = false;
+
+ if (FRAME.system_phase > FRAME.prev_system_phase) {
+ // restart_phase >= prev_system_phase is used so particles emit in the first frame they are processed
+
+ if (restart_phase >= FRAME.prev_system_phase && restart_phase < FRAME.system_phase) {
+ restart = true;
+ if (params.use_fractional_delta) {
+ local_delta = (FRAME.system_phase - restart_phase) * params.lifetime;
+ }
+ }
+
+ } else if (FRAME.delta > 0.0) {
+ if (restart_phase >= FRAME.prev_system_phase) {
+ restart = true;
+ if (params.use_fractional_delta) {
+ local_delta = (1.0 - restart_phase + FRAME.system_phase) * params.lifetime;
+ }
+
+ } else if (restart_phase < FRAME.system_phase) {
+ restart = true;
+ if (params.use_fractional_delta) {
+ local_delta = (FRAME.system_phase - restart_phase) * params.lifetime;
+ }
+ }
+ }
+
+ uint current_cycle = FRAME.cycle;
+
+ if (FRAME.system_phase < restart_phase) {
+ current_cycle -= uint(1);
+ }
+
+ uint particle_number = current_cycle * uint(params.total_particles) + particle;
+
+ if (restart) {
+ PARTICLE.is_active = FRAME.emitting;
+ }
+
+#ifdef ENABLE_KEEP_DATA
+ if (params.clear) {
+#else
+ if (params.clear || restart) {
+#endif
+ PARTICLE.color = vec4(1.0);
+ PARTICLE.custom = vec4(0.0);
+ PARTICLE.velocity = vec3(0.0);
+ if (!restart) {
+ PARTICLE.is_active = false;
+ }
+ PARTICLE.xform = mat4(
+ vec4(1.0, 0.0, 0.0, 0.0),
+ vec4(0.0, 1.0, 0.0, 0.0),
+ vec4(0.0, 0.0, 1.0, 0.0),
+ vec4(0.0, 0.0, 0.0, 1.0));
+ }
+
+ if (PARTICLE.is_active) {
+ /* clang-format off */
+
+COMPUTE_SHADER_CODE
+
+ /* clang-format on */
+ }
+
+#if !defined(DISABLE_VELOCITY)
+
+ if (PARTICLE.is_active) {
+ PARTICLE.xform[3].xyz += PARTICLE.velocity * local_delta;
+ }
+#endif
+
+#if 0
+ if (PARTICLE.is_active) {
+ //execute shader
+
+
+
+
+ //!defined(DISABLE_FORCE)
+
+ if (false) {
+ vec3 force = vec3(0.0);
+ for (int i = 0; i < attractor_count; i++) {
+ vec3 rel_vec = xform[3].xyz - attractors[i].pos;
+ float dist = length(rel_vec);
+ if (attractors[i].radius < dist)
+ continue;
+ if (attractors[i].eat_radius > 0.0 && attractors[i].eat_radius > dist) {
+ out_velocity_active.a = 0.0;
+ }
+
+ rel_vec = normalize(rel_vec);
+
+ float attenuation = pow(dist / attractors[i].radius, attractors[i].attenuation);
+
+ if (attractors[i].dir == vec3(0.0)) {
+ //towards center
+ force += attractors[i].strength * rel_vec * attenuation * mass;
+ } else {
+ force += attractors[i].strength * attractors[i].dir * attenuation * mass;
+ }
+ }
+
+ out_velocity_active.xyz += force * local_delta;
+ }
+
+#if !defined(DISABLE_VELOCITY)
+
+ if (true) {
+ xform[3].xyz += out_velocity_active.xyz * local_delta;
+ }
+#endif
+ } else {
+ xform = mat4(0.0);
+ }
+
+
+ xform = transpose(xform);
+
+ out_velocity_active.a = mix(0.0, 1.0, shader_active);
+
+ out_xform_1 = xform[0];
+ out_xform_2 = xform[1];
+ out_xform_3 = xform[2];
+#endif
+}
diff --git a/servers/rendering/rasterizer_rd/shaders/particles_copy.glsl b/servers/rendering/rasterizer_rd/shaders/particles_copy.glsl
new file mode 100644
index 0000000000..6c782b6045
--- /dev/null
+++ b/servers/rendering/rasterizer_rd/shaders/particles_copy.glsl
@@ -0,0 +1,82 @@
+#[compute]
+
+#version 450
+
+VERSION_DEFINES
+
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+struct ParticleData {
+ mat4 xform;
+ vec3 velocity;
+ bool is_active;
+ vec4 color;
+ vec4 custom;
+};
+
+layout(set = 0, binding = 1, std430) restrict readonly buffer Particles {
+ ParticleData data[];
+}
+particles;
+
+layout(set = 0, binding = 2, std430) restrict writeonly buffer Transforms {
+ vec4 data[];
+}
+instances;
+
+#ifdef USE_SORT_BUFFER
+
+layout(set = 1, binding = 0, std430) restrict buffer SortBuffer {
+ vec2 data[];
+}
+sort_buffer;
+
+#endif // USE_SORT_BUFFER
+
+layout(push_constant, binding = 0, std430) uniform Params {
+ vec3 sort_direction;
+ uint total_particles;
+}
+params;
+
+void main() {
+#ifdef MODE_FILL_SORT_BUFFER
+
+ uint particle = gl_GlobalInvocationID.x;
+ if (particle >= params.total_particles) {
+ return; //discard
+ }
+
+ sort_buffer.data[particle].x = dot(params.sort_direction, particles.data[particle].xform[3].xyz);
+ sort_buffer.data[particle].y = float(particle);
+#endif
+
+#ifdef MODE_FILL_INSTANCES
+
+ uint particle = gl_GlobalInvocationID.x;
+ uint write_offset = gl_GlobalInvocationID.x * (3 + 1 + 1); //xform + color + custom
+
+ if (particle >= params.total_particles) {
+ return; //discard
+ }
+
+#ifdef USE_SORT_BUFFER
+ particle = uint(sort_buffer.data[particle].y); //use index from sort buffer
+#endif
+
+ mat4 txform;
+
+ if (particles.data[particle].is_active) {
+ txform = transpose(particles.data[particle].xform);
+ } else {
+ txform = mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); //zero scale, becomes invisible
+ }
+
+ instances.data[write_offset + 0] = txform[0];
+ instances.data[write_offset + 1] = txform[1];
+ instances.data[write_offset + 2] = txform[2];
+ instances.data[write_offset + 3] = particles.data[particle].color;
+ instances.data[write_offset + 4] = particles.data[particle].custom;
+
+#endif
+}
diff --git a/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl b/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl
index d7d19897e3..dd0ca5c506 100644
--- a/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl
+++ b/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl
@@ -338,7 +338,7 @@ void main() {
continue; //was not initialized yet, ignore
}
- float q_dist = distance(posf, vec3(p.xyz));
+ float q_dist = distance(posf, vec3(q.xyz));
if (p.w == 0 || q_dist < p_dist) {
p = q; //just replace because current is unused
p_dist = q_dist;
diff --git a/servers/rendering/rasterizer_rd/shaders/sort.glsl b/servers/rendering/rasterizer_rd/shaders/sort.glsl
new file mode 100644
index 0000000000..e5ebb9c64b
--- /dev/null
+++ b/servers/rendering/rasterizer_rd/shaders/sort.glsl
@@ -0,0 +1,203 @@
+#[compute]
+
+#version 450
+
+VERSION_DEFINES
+
+// Original version here:
+// https://github.com/GPUOpen-LibrariesAndSDKs/GPUParticles11/blob/master/gpuparticles11/src/Shaders
+
+//
+// Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+
+#define SORT_SIZE 512
+#define NUM_THREADS (SORT_SIZE / 2)
+#define INVERSION (16 * 2 + 8 * 3)
+#define ITERATIONS 1
+
+layout(local_size_x = NUM_THREADS, local_size_y = 1, local_size_z = 1) in;
+
+#ifndef MODE_SORT_STEP
+
+shared vec2 g_LDS[SORT_SIZE];
+
+#endif
+
+layout(set = 1, binding = 0, std430) restrict buffer SortBuffer {
+ vec2 data[];
+}
+sort_buffer;
+
+layout(push_constant, binding = 0, std430) uniform Params {
+ uint total_elements;
+ uint pad[3];
+ ivec4 job_params;
+}
+params;
+
+void main() {
+#ifdef MODE_SORT_BLOCK
+
+ uvec3 Gid = gl_WorkGroupID;
+ uvec3 DTid = gl_GlobalInvocationID;
+ uvec3 GTid = gl_LocalInvocationID;
+ uint GI = gl_LocalInvocationIndex;
+
+ int GlobalBaseIndex = int((Gid.x * SORT_SIZE) + GTid.x);
+ int LocalBaseIndex = int(GI);
+ int numElementsInThreadGroup = int(min(SORT_SIZE, params.total_elements - (Gid.x * SORT_SIZE)));
+
+ // Load shared data
+
+ int i;
+ for (i = 0; i < 2 * ITERATIONS; ++i) {
+ if (GI + i * NUM_THREADS < numElementsInThreadGroup)
+ g_LDS[LocalBaseIndex + i * NUM_THREADS] = sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS];
+ }
+
+ groupMemoryBarrier();
+ barrier();
+
+ // Bitonic sort
+ for (int nMergeSize = 2; nMergeSize <= SORT_SIZE; nMergeSize = nMergeSize * 2) {
+ for (int nMergeSubSize = nMergeSize >> 1; nMergeSubSize > 0; nMergeSubSize = nMergeSubSize >> 1) {
+ for (i = 0; i < ITERATIONS; ++i) {
+ int tmp_index = int(GI + NUM_THREADS * i);
+ int index_low = tmp_index & (nMergeSubSize - 1);
+ int index_high = 2 * (tmp_index - index_low);
+ int index = index_high + index_low;
+
+ int nSwapElem = nMergeSubSize == nMergeSize >> 1 ? index_high + (2 * nMergeSubSize - 1) - index_low : index_high + nMergeSubSize + index_low;
+ if (nSwapElem < numElementsInThreadGroup) {
+ vec2 a = g_LDS[index];
+ vec2 b = g_LDS[nSwapElem];
+
+ if (a.x > b.x) {
+ g_LDS[index] = b;
+ g_LDS[nSwapElem] = a;
+ }
+ }
+ groupMemoryBarrier();
+ barrier();
+ }
+ }
+ }
+
+ // Store shared data
+ for (i = 0; i < 2 * ITERATIONS; ++i) {
+ if (GI + i * NUM_THREADS < numElementsInThreadGroup) {
+ sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS] = g_LDS[LocalBaseIndex + i * NUM_THREADS];
+ }
+ }
+
+#endif
+
+#ifdef MODE_SORT_STEP
+
+ uvec3 Gid = gl_WorkGroupID;
+ uvec3 GTid = gl_LocalInvocationID;
+
+ ivec4 tgp;
+
+ tgp.x = int(Gid.x) * 256;
+ tgp.y = 0;
+ tgp.z = int(params.total_elements);
+ tgp.w = min(512, max(0, tgp.z - int(Gid.x) * 512));
+
+ uint localID = int(tgp.x) + GTid.x; // calculate threadID within this sortable-array
+
+ uint index_low = localID & (params.job_params.x - 1);
+ uint index_high = 2 * (localID - index_low);
+
+ uint index = tgp.y + index_high + index_low;
+ uint nSwapElem = tgp.y + index_high + params.job_params.y + params.job_params.z * index_low;
+
+ if (nSwapElem < tgp.y + tgp.z) {
+ vec2 a = sort_buffer.data[index];
+ vec2 b = sort_buffer.data[nSwapElem];
+
+ if (a.x > b.x) {
+ sort_buffer.data[index] = b;
+ sort_buffer.data[nSwapElem] = a;
+ }
+ }
+
+#endif
+
+#ifdef MODE_SORT_INNER
+
+ uvec3 Gid = gl_WorkGroupID;
+ uvec3 DTid = gl_GlobalInvocationID;
+ uvec3 GTid = gl_LocalInvocationID;
+ uint GI = gl_LocalInvocationIndex;
+
+ ivec4 tgp;
+
+ tgp.x = int(Gid.x * 256);
+ tgp.y = 0;
+ tgp.z = int(params.total_elements.x);
+ tgp.w = int(min(512, max(0, params.total_elements - Gid.x * 512)));
+
+ int GlobalBaseIndex = int(tgp.y + tgp.x * 2 + GTid.x);
+ int LocalBaseIndex = int(GI);
+ int i;
+
+ // Load shared data
+ for (i = 0; i < 2; ++i) {
+ if (GI + i * NUM_THREADS < tgp.w)
+ g_LDS[LocalBaseIndex + i * NUM_THREADS] = sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS];
+ }
+
+ groupMemoryBarrier();
+ barrier();
+
+ // sort threadgroup shared memory
+ for (int nMergeSubSize = SORT_SIZE >> 1; nMergeSubSize > 0; nMergeSubSize = nMergeSubSize >> 1) {
+ int tmp_index = int(GI);
+ int index_low = tmp_index & (nMergeSubSize - 1);
+ int index_high = 2 * (tmp_index - index_low);
+ int index = index_high + index_low;
+
+ int nSwapElem = index_high + nMergeSubSize + index_low;
+
+ if (nSwapElem < tgp.w) {
+ vec2 a = g_LDS[index];
+ vec2 b = g_LDS[nSwapElem];
+
+ if (a.x > b.x) {
+ g_LDS[index] = b;
+ g_LDS[nSwapElem] = a;
+ }
+ }
+ groupMemoryBarrier();
+ barrier();
+ }
+
+ // Store shared data
+ for (i = 0; i < 2; ++i) {
+ if (GI + i * NUM_THREADS < tgp.w) {
+ sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS] = g_LDS[LocalBaseIndex + i * NUM_THREADS];
+ }
+ }
+
+#endif
+}