diff options
Diffstat (limited to 'servers/rendering/rasterizer_rd')
17 files changed, 2520 insertions, 82 deletions
diff --git a/servers/rendering/rasterizer_rd/rasterizer_effects_rd.cpp b/servers/rendering/rasterizer_rd/rasterizer_effects_rd.cpp index b3279f041f..527ed09584 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_effects_rd.cpp +++ b/servers/rendering/rasterizer_rd/rasterizer_effects_rd.cpp @@ -389,7 +389,7 @@ void RasterizerEffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RD::get_singleton()->compute_list_end(); } -void RasterizerEffectsRD::gaussian_glow(RID p_source_rd_texture, RID p_texture, RID p_back_texture, const Size2i &p_size, float p_strength, bool p_first_pass, float p_luminance_cap, float p_exposure, float p_bloom, float p_hdr_bleed_treshold, float p_hdr_bleed_scale, RID p_auto_exposure, float p_auto_exposure_grey) { +void RasterizerEffectsRD::gaussian_glow(RID p_source_rd_texture, RID p_texture, RID p_back_texture, const Size2i &p_size, float p_strength, bool p_high_quality, bool p_first_pass, float p_luminance_cap, float p_exposure, float p_bloom, float p_hdr_bleed_treshold, float p_hdr_bleed_scale, RID p_auto_exposure, float p_auto_exposure_grey) { zeromem(©.push_constant, sizeof(CopyPushConstant)); CopyMode copy_mode = p_first_pass && p_auto_exposure.is_valid() ? COPY_MODE_GAUSSIAN_GLOW_AUTO_EXPOSURE : COPY_MODE_GAUSSIAN_GLOW; @@ -415,12 +415,12 @@ void RasterizerEffectsRD::gaussian_glow(RID p_source_rd_texture, RID p_texture, RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[copy_mode]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_back_texture), 3); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_texture), 3); if (p_auto_exposure.is_valid() && p_first_pass) { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_auto_exposure), 1); } - copy.push_constant.flags = base_flags | COPY_FLAG_HORIZONTAL | (p_first_pass ? COPY_FLAG_GLOW_FIRST_PASS : 0); + copy.push_constant.flags = base_flags | COPY_FLAG_HORIZONTAL | (p_first_pass ? COPY_FLAG_GLOW_FIRST_PASS : 0) | (p_high_quality ? COPY_FLAG_HIGH_QUALITY_GLOW : 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); @@ -430,8 +430,8 @@ void RasterizerEffectsRD::gaussian_glow(RID p_source_rd_texture, RID p_texture, //VERTICAL RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[copy_mode]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_back_texture), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_texture), 3); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_texture), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_back_texture), 3); copy.push_constant.flags = base_flags; RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); @@ -1273,6 +1273,76 @@ void RasterizerEffectsRD::filter_shadow(RID p_shadow, RID p_backing_shadow, cons RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1, 8, 8, 1); } } + +void RasterizerEffectsRD::sort_buffer(RID p_uniform_set, int p_size) { + Sort::PushConstant push_constant; + push_constant.total_elements = p_size; + + bool done = true; + + int numThreadGroups = ((p_size - 1) >> 9) + 1; + + if (numThreadGroups > 1) { + done = false; + } + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sort.pipelines[SORT_MODE_BLOCK]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, p_uniform_set, 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(Sort::PushConstant)); + RD::get_singleton()->compute_list_dispatch(compute_list, numThreadGroups, 1, 1); + + int presorted = 512; + + while (!done) { + RD::get_singleton()->compute_list_add_barrier(compute_list); + + done = true; + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sort.pipelines[SORT_MODE_STEP]); + + numThreadGroups = 0; + + if (p_size > presorted) { + if (p_size > presorted * 2) { + done = false; + } + + int pow2 = presorted; + while (pow2 < p_size) { + pow2 *= 2; + } + numThreadGroups = pow2 >> 9; + } + + unsigned int nMergeSize = presorted * 2; + + for (unsigned int nMergeSubSize = nMergeSize >> 1; nMergeSubSize > 256; nMergeSubSize = nMergeSubSize >> 1) { + push_constant.job_params[0] = nMergeSubSize; + if (nMergeSubSize == nMergeSize >> 1) { + push_constant.job_params[1] = (2 * nMergeSubSize - 1); + push_constant.job_params[2] = -1; + } else { + push_constant.job_params[1] = nMergeSubSize; + push_constant.job_params[2] = 1; + } + push_constant.job_params[3] = 0; + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(Sort::PushConstant)); + RD::get_singleton()->compute_list_dispatch(compute_list, numThreadGroups, 1, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + } + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sort.pipelines[SORT_MODE_INNER]); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(Sort::PushConstant)); + RD::get_singleton()->compute_list_dispatch(compute_list, numThreadGroups, 1, 1); + + presorted *= 2; + } + + RD::get_singleton()->compute_list_end(); +} + RasterizerEffectsRD::RasterizerEffectsRD() { { // Initialize copy Vector<String> copy_modes; @@ -1618,6 +1688,21 @@ RasterizerEffectsRD::RasterizerEffectsRD() { } } + { + Vector<String> sort_modes; + sort_modes.push_back("\n#define MODE_SORT_BLOCK\n"); + sort_modes.push_back("\n#define MODE_SORT_STEP\n"); + sort_modes.push_back("\n#define MODE_SORT_INNER\n"); + + sort.shader.initialize(sort_modes); + + sort.shader_version = sort.shader.version_create(); + + for (int i = 0; i < SORT_MODE_MAX; i++) { + sort.pipelines[i] = RD::get_singleton()->compute_pipeline_create(sort.shader.version_get_shader(sort.shader_version, i)); + } + } + RD::SamplerState sampler; sampler.mag_filter = RD::SAMPLER_FILTER_LINEAR; sampler.min_filter = RD::SAMPLER_FILTER_LINEAR; diff --git a/servers/rendering/rasterizer_rd/rasterizer_effects_rd.h b/servers/rendering/rasterizer_rd/rasterizer_effects_rd.h index b0964f23e7..e434bbc372 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_effects_rd.h +++ b/servers/rendering/rasterizer_rd/rasterizer_effects_rd.h @@ -47,6 +47,7 @@ #include "servers/rendering/rasterizer_rd/shaders/screen_space_reflection_filter.glsl.gen.h" #include "servers/rendering/rasterizer_rd/shaders/screen_space_reflection_scale.glsl.gen.h" #include "servers/rendering/rasterizer_rd/shaders/shadow_reduce.glsl.gen.h" +#include "servers/rendering/rasterizer_rd/shaders/sort.glsl.gen.h" #include "servers/rendering/rasterizer_rd/shaders/specular_merge.glsl.gen.h" #include "servers/rendering/rasterizer_rd/shaders/ssao.glsl.gen.h" #include "servers/rendering/rasterizer_rd/shaders/ssao_blur.glsl.gen.h" @@ -81,7 +82,8 @@ class RasterizerEffectsRD { COPY_FLAG_GLOW_FIRST_PASS = (1 << 4), COPY_FLAG_FLIP_Y = (1 << 5), COPY_FLAG_FORCE_LUMINANCE = (1 << 6), - COPY_FLAG_ALL_SOURCE = (1 << 7) + COPY_FLAG_ALL_SOURCE = (1 << 7), + COPY_FLAG_HIGH_QUALITY_GLOW = (1 << 8) }; struct CopyPushConstant { @@ -544,9 +546,28 @@ class RasterizerEffectsRD { struct ShadowReduce { ShadowReduceShaderRD shader; RID shader_version; - RID pipelines[2]; + RID pipelines[SHADOW_REDUCE_MAX]; } shadow_reduce; + enum SortMode { + SORT_MODE_BLOCK, + SORT_MODE_STEP, + SORT_MODE_INNER, + SORT_MODE_MAX + }; + + struct Sort { + struct PushConstant { + uint32_t total_elements; + uint32_t pad[3]; + int32_t job_params[4]; + }; + + SortShaderRD shader; + RID shader_version; + RID pipelines[SORT_MODE_MAX]; + } sort; + RID default_sampler; RID default_mipmap_sampler; RID index_buffer; @@ -586,7 +607,7 @@ public: void copy_depth_to_rect_and_linearize(RID p_source_rd_texture, RID p_dest_texture, const Rect2i &p_rect, bool p_flip_y, float p_z_near, float p_z_far); void copy_to_atlas_fb(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2 &p_uv_rect, RD::DrawListID p_draw_list, bool p_flip_y = false, bool p_panorama = false); void gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back_texture, const Rect2i &p_region, bool p_8bit_dst = false); - void gaussian_glow(RID p_source_rd_texture, RID p_texture, RID p_back_texture, const Size2i &p_size, float p_strength = 1.0, bool p_first_pass = false, float p_luminance_cap = 16.0, float p_exposure = 1.0, float p_bloom = 0.0, float p_hdr_bleed_treshold = 1.0, float p_hdr_bleed_scale = 1.0, RID p_auto_exposure = RID(), float p_auto_exposure_grey = 1.0); + void gaussian_glow(RID p_source_rd_texture, RID p_texture, RID p_back_texture, const Size2i &p_size, float p_strength = 1.0, bool p_high_quality = false, bool p_first_pass = false, float p_luminance_cap = 16.0, float p_exposure = 1.0, float p_bloom = 0.0, float p_hdr_bleed_treshold = 1.0, float p_hdr_bleed_scale = 1.0, RID p_auto_exposure = RID(), float p_auto_exposure_grey = 1.0); void cubemap_roughness(RID p_source_rd_texture, RID p_dest_framebuffer, uint32_t p_face_id, uint32_t p_sample_count, float p_roughness, float p_size); void make_mipmap(RID p_source_rd_texture, RID p_dest_texture, const Size2i &p_size); @@ -649,6 +670,8 @@ public: void reduce_shadow(RID p_source_shadow, RID p_dest_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, int p_shrink_limit, RenderingDevice::ComputeListID compute_list); void filter_shadow(RID p_shadow, RID p_backing_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, RS::EnvVolumetricFogShadowFilter p_filter, RenderingDevice::ComputeListID compute_list, bool p_vertical = true, bool p_horizontal = true); + void sort_buffer(RID p_uniform_set, int p_size); + RasterizerEffectsRD(); ~RasterizerEffectsRD(); }; diff --git a/servers/rendering/rasterizer_rd/rasterizer_rd.cpp b/servers/rendering/rasterizer_rd/rasterizer_rd.cpp index 18cf4fa340..509bd3ee73 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_rd.cpp +++ b/servers/rendering/rasterizer_rd/rasterizer_rd.cpp @@ -90,7 +90,7 @@ void RasterizerRD::begin_frame(double frame_step) { void RasterizerRD::end_frame(bool p_swap_buffers) { #ifndef _MSC_VER -#warning TODO: likely passa bool to swap buffers to avoid display? +#warning TODO: likely pass a bool to swap buffers to avoid display? #endif RD::get_singleton()->swap_buffers(); //probably should pass some bool to avoid display? } diff --git a/servers/rendering/rasterizer_rd/rasterizer_scene_high_end_rd.cpp b/servers/rendering/rasterizer_rd/rasterizer_scene_high_end_rd.cpp index 11abb8f4a8..c56c208098 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_scene_high_end_rd.cpp +++ b/servers/rendering/rasterizer_rd/rasterizer_scene_high_end_rd.cpp @@ -782,8 +782,7 @@ void RasterizerSceneHighEndRD::_fill_instances(RenderList::Element **p_elements, for (int i = 0; i < p_element_count; i++) { const RenderList::Element *e = p_elements[i]; InstanceData &id = scene_state.instances[i]; - RasterizerStorageRD::store_transform(e->instance->transform, id.transform); - RasterizerStorageRD::store_transform(Transform(e->instance->transform.basis.inverse().transposed()), id.normal_transform); + bool store_transform = true; id.flags = 0; id.mask = e->instance->layer_mask; id.instance_uniforms_ofs = e->instance->instance_allocated_shader_parameters_offset >= 0 ? e->instance->instance_allocated_shader_parameters_offset : 0; @@ -807,12 +806,42 @@ void RasterizerSceneHighEndRD::_fill_instances(RenderList::Element **p_elements, } id.flags |= (stride << INSTANCE_DATA_FLAGS_MULTIMESH_STRIDE_SHIFT); + } else if (e->instance->base_type == RS::INSTANCE_PARTICLES) { + id.flags |= INSTANCE_DATA_FLAG_MULTIMESH; + uint32_t stride; + if (false) { // 2D particles + id.flags |= INSTANCE_DATA_FLAG_MULTIMESH_FORMAT_2D; + stride = 2; + } else { + stride = 3; + } + + id.flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_COLOR; + stride += 1; + + id.flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_CUSTOM_DATA; + stride += 1; + + id.flags |= (stride << INSTANCE_DATA_FLAGS_MULTIMESH_STRIDE_SHIFT); + + if (!storage->particles_is_using_local_coords(e->instance->base)) { + store_transform = false; + } + } else if (e->instance->base_type == RS::INSTANCE_MESH) { if (e->instance->skeleton.is_valid()) { id.flags |= INSTANCE_DATA_FLAG_SKELETON; } } + if (store_transform) { + RasterizerStorageRD::store_transform(e->instance->transform, id.transform); + RasterizerStorageRD::store_transform(Transform(e->instance->transform.basis.inverse().transposed()), id.normal_transform); + } else { + RasterizerStorageRD::store_transform(Transform(), id.transform); + RasterizerStorageRD::store_transform(Transform(), id.normal_transform); + } + if (p_for_depth) { id.gi_offset = 0xFFFFFFFF; continue; @@ -967,7 +996,12 @@ void RasterizerSceneHighEndRD::_render_list(RenderingDevice::DrawListID p_draw_l ERR_CONTINUE(true); //should be a bug } break; case RS::INSTANCE_PARTICLES: { - ERR_CONTINUE(true); //should be a bug + RID mesh = storage->particles_get_draw_pass_mesh(e->instance->base, e->surface_index >> 16); + ERR_CONTINUE(!mesh.is_valid()); //should be a bug + primitive = storage->mesh_surface_get_primitive(mesh, e->surface_index & 0xFFFF); + + xforms_uniform_set = storage->particles_get_instance_buffer_uniform_set(e->instance->base, default_shader_rd, TRANSFORMS_UNIFORM_SET); + } break; default: { ERR_CONTINUE(true); //should be a bug @@ -1036,7 +1070,9 @@ void RasterizerSceneHighEndRD::_render_list(RenderingDevice::DrawListID p_draw_l ERR_CONTINUE(true); //should be a bug } break; case RS::INSTANCE_PARTICLES: { - ERR_CONTINUE(true); //should be a bug + RID mesh = storage->particles_get_draw_pass_mesh(e->instance->base, e->surface_index >> 16); + ERR_CONTINUE(!mesh.is_valid()); //should be a bug + storage->mesh_surface_get_arrays_and_format(mesh, e->surface_index & 0xFFFF, pipeline->get_vertex_input_mask(), vertex_array_rd, index_array_rd, vertex_format); } break; default: { ERR_CONTINUE(true); //should be a bug @@ -1092,6 +1128,8 @@ void RasterizerSceneHighEndRD::_render_list(RenderingDevice::DrawListID p_draw_l case RS::INSTANCE_IMMEDIATE: { } break; case RS::INSTANCE_PARTICLES: { + uint32_t instances = storage->particles_get_amount(e->instance->base); + RD::get_singleton()->draw_list_draw(draw_list, index_array_rd.is_valid(), instances); } break; default: { ERR_CONTINUE(true); //should be a bug @@ -1524,31 +1562,31 @@ void RasterizerSceneHighEndRD::_fill_render_list(InstanceBase **p_cull_result, i _add_geometry(immediate, inst, nullptr, -1, p_depth_pass, p_shadow_pass); } break; +#endif case RS::INSTANCE_PARTICLES: { + int draw_passes = storage->particles_get_draw_passes(inst->base); - RasterizerStorageGLES3::Particles *particles = storage->particles_owner.getornull(inst->base); - ERR_CONTINUE(!particles); - - for (int j = 0; j < particles->draw_passes.size(); j++) { - - RID pmesh = particles->draw_passes[j]; - if (!pmesh.is_valid()) + for (int j = 0; j < draw_passes; j++) { + RID mesh = storage->particles_get_draw_pass_mesh(inst->base, j); + if (!mesh.is_valid()) continue; - RasterizerStorageGLES3::Mesh *mesh = storage->mesh_owner.getornull(pmesh); - if (!mesh) - continue; //mesh not assigned - int ssize = mesh->surfaces.size(); + const RID *materials = nullptr; + uint32_t surface_count; - for (int k = 0; k < ssize; k++) { + materials = storage->mesh_get_surface_count_and_materials(mesh, surface_count); + if (!materials) { + continue; //nothing to do + } - RasterizerStorageGLES3::Surface *s = mesh->surfaces[k]; - _add_geometry(s, inst, particles, -1, p_depth_pass, p_shadow_pass); + for (uint32_t k = 0; k < surface_count; k++) { + uint32_t surface_index = storage->mesh_surface_get_particles_render_pass_index(mesh, j, render_pass, &geometry_index); + _add_geometry(inst, (j << 16) | k, materials[j], p_pass_mode, surface_index, p_using_sdfgi); } } } break; -#endif + default: { } } @@ -2696,6 +2734,7 @@ RasterizerSceneHighEndRD::RasterizerSceneHighEndRD(RasterizerStorageRD *p_storag actions.renames["LIGHT_COLOR"] = "light_color"; actions.renames["LIGHT"] = "light"; actions.renames["ATTENUATION"] = "attenuation"; + actions.renames["SHADOW_ATTENUATION"] = "shadow_attenuation"; actions.renames["DIFFUSE_LIGHT"] = "diffuse_light"; actions.renames["SPECULAR_LIGHT"] = "specular_light"; diff --git a/servers/rendering/rasterizer_rd/rasterizer_scene_rd.cpp b/servers/rendering/rasterizer_rd/rasterizer_scene_rd.cpp index 34818d2683..958d8eac1f 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_scene_rd.cpp +++ b/servers/rendering/rasterizer_rd/rasterizer_scene_rd.cpp @@ -2951,6 +2951,10 @@ void RasterizerSceneRD::environment_glow_set_use_bicubic_upscale(bool p_enable) glow_bicubic_upscale = p_enable; } +void RasterizerSceneRD::environment_glow_set_use_high_quality(bool p_enable) { + glow_high_quality = p_enable; +} + void RasterizerSceneRD::environment_set_sdfgi(RID p_env, bool p_enable, RS::EnvironmentSDFGICascades p_cascades, float p_min_cell_size, RS::EnvironmentSDFGIYScale p_y_scale, bool p_use_occlusion, bool p_use_multibounce, bool p_read_sky, float p_energy, float p_normal_bias, float p_probe_bias) { Environment *env = environment_owner.getornull(p_env); ERR_FAIL_COND(!env); @@ -5265,9 +5269,9 @@ void RasterizerSceneRD::_render_buffers_post_process_and_tonemap(RID p_render_bu if (env->auto_exposure && rb->luminance.current.is_valid()) { luminance_texture = rb->luminance.current; } - storage->get_effects()->gaussian_glow(rb->texture, rb->blur[0].mipmaps[i + 1].texture, rb->blur[1].mipmaps[i].texture, Size2i(vp_w, vp_h), env->glow_strength, true, env->glow_hdr_luminance_cap, env->exposure, env->glow_bloom, env->glow_hdr_bleed_threshold, env->glow_hdr_bleed_scale, luminance_texture, env->auto_exp_scale); + storage->get_effects()->gaussian_glow(rb->texture, rb->blur[0].mipmaps[i + 1].texture, rb->blur[1].mipmaps[i].texture, Size2i(vp_w, vp_h), env->glow_strength, glow_high_quality, true, env->glow_hdr_luminance_cap, env->exposure, env->glow_bloom, env->glow_hdr_bleed_threshold, env->glow_hdr_bleed_scale, luminance_texture, env->auto_exp_scale); } else { - storage->get_effects()->gaussian_glow(rb->blur[1].mipmaps[i - 1].texture, rb->blur[0].mipmaps[i + 1].texture, rb->blur[1].mipmaps[i].texture, Size2i(vp_w, vp_h), env->glow_strength); + storage->get_effects()->gaussian_glow(rb->blur[1].mipmaps[i - 1].texture, rb->blur[0].mipmaps[i + 1].texture, rb->blur[1].mipmaps[i].texture, Size2i(vp_w, vp_h), env->glow_strength, glow_high_quality); } } } @@ -8310,6 +8314,7 @@ RasterizerSceneRD::RasterizerSceneRD(RasterizerStorageRD *p_storage) { screen_space_roughness_limiter_amount = GLOBAL_GET("rendering/quality/screen_filters/screen_space_roughness_limiter_amount"); screen_space_roughness_limiter_limit = GLOBAL_GET("rendering/quality/screen_filters/screen_space_roughness_limiter_limit"); glow_bicubic_upscale = int(GLOBAL_GET("rendering/quality/glow/upscale_mode")) > 0; + glow_high_quality = GLOBAL_GET("rendering/quality/glow/use_high_quality"); ssr_roughness_quality = RS::EnvironmentSSRRoughnessQuality(int(GLOBAL_GET("rendering/quality/screen_space_reflection/roughness_quality"))); sss_quality = RS::SubSurfaceScatteringQuality(int(GLOBAL_GET("rendering/quality/subsurface_scattering/subsurface_scattering_quality"))); sss_scale = GLOBAL_GET("rendering/quality/subsurface_scattering/subsurface_scattering_scale"); diff --git a/servers/rendering/rasterizer_rd/rasterizer_scene_rd.h b/servers/rendering/rasterizer_rd/rasterizer_scene_rd.h index f504240f50..fe31d2f76b 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_scene_rd.h +++ b/servers/rendering/rasterizer_rd/rasterizer_scene_rd.h @@ -359,7 +359,7 @@ private: mutable RID_Owner<ReflectionProbeInstance> reflection_probe_instance_owner; - /* REFLECTION PROBE INSTANCE */ + /* DECAL INSTANCE */ struct DecalInstance { RID decal; @@ -765,6 +765,7 @@ private: RS::EnvironmentSSAOQuality ssao_quality = RS::ENV_SSAO_QUALITY_MEDIUM; bool ssao_half_size = false; bool glow_bicubic_upscale = false; + bool glow_high_quality = false; RS::EnvironmentSSRRoughnessQuality ssr_roughness_quality = RS::ENV_SSR_ROUGNESS_QUALITY_LOW; static uint64_t auto_exposure_counter; @@ -1530,6 +1531,7 @@ public: void environment_set_glow(RID p_env, bool p_enable, int p_level_flags, float p_intensity, float p_strength, float p_mix, float p_bloom_threshold, RS::EnvironmentGlowBlendMode p_blend_mode, float p_hdr_bleed_threshold, float p_hdr_bleed_scale, float p_hdr_luminance_cap); void environment_glow_set_use_bicubic_upscale(bool p_enable); + void environment_glow_set_use_high_quality(bool p_enable); void environment_set_fog(RID p_env, bool p_enable, const Color &p_light_color, float p_light_energy, float p_sun_scatter, float p_density, float p_height, float p_height_density); bool environment_is_fog_enabled(RID p_env) const; @@ -1540,7 +1542,7 @@ public: float environment_get_fog_height(RID p_env) const; float environment_get_fog_height_density(RID p_env) const; - void environment_set_volumetric_fog(RID p_env, bool p_enable, float p_density, const Color &p_light, float p_light_energy, float p_lenght, float p_detail_spread, float p_gi_inject, RS::EnvVolumetricFogShadowFilter p_shadow_filter); + void environment_set_volumetric_fog(RID p_env, bool p_enable, float p_density, const Color &p_light, float p_light_energy, float p_length, float p_detail_spread, float p_gi_inject, RS::EnvVolumetricFogShadowFilter p_shadow_filter); virtual void environment_set_volumetric_fog_volume_size(int p_size, int p_depth); virtual void environment_set_volumetric_fog_filter_active(bool p_enable); diff --git a/servers/rendering/rasterizer_rd/rasterizer_storage_rd.cpp b/servers/rendering/rasterizer_rd/rasterizer_storage_rd.cpp index f3ba57e733..a13e7d786b 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_storage_rd.cpp +++ b/servers/rendering/rasterizer_rd/rasterizer_storage_rd.cpp @@ -33,6 +33,7 @@ #include "core/engine.h" #include "core/io/resource_loader.h" #include "core/project_settings.h" +#include "rasterizer_rd.h" #include "servers/rendering/shader_language.h" Ref<Image> RasterizerStorageRD::_validate_texture_format(const Ref<Image> &p_image, TextureToRDFormat &r_format) { @@ -714,8 +715,120 @@ RID RasterizerStorageRD::texture_2d_layered_create(const Vector<Ref<Image>> &p_l return texture_owner.make_rid(texture); } -RID RasterizerStorageRD::texture_3d_create(const Vector<Ref<Image>> &p_slices) { - return RID(); +RID RasterizerStorageRD::texture_3d_create(Image::Format p_format, int p_width, int p_height, int p_depth, bool p_mipmaps, const Vector<Ref<Image>> &p_data) { + ERR_FAIL_COND_V(p_data.size() == 0, RID()); + Image::Image3DValidateError verr = Image::validate_3d_image(p_format, p_width, p_height, p_depth, p_mipmaps, p_data); + if (verr != Image::VALIDATE_3D_OK) { + ERR_FAIL_V_MSG(RID(), Image::get_3d_image_validation_error_text(verr)); + } + + TextureToRDFormat ret_format; + Image::Format validated_format = Image::FORMAT_MAX; + Vector<uint8_t> all_data; + uint32_t mipmap_count = 0; + Vector<Texture::BufferSlice3D> slices; + { + Vector<Ref<Image>> images; + uint32_t all_data_size = 0; + images.resize(p_data.size()); + for (int i = 0; i < p_data.size(); i++) { + TextureToRDFormat f; + images.write[i] = _validate_texture_format(p_data[i], f); + if (i == 0) { + ret_format = f; + validated_format = images[0]->get_format(); + } + + all_data_size += images[i]->get_data().size(); + } + + all_data.resize(all_data_size); //consolidate all data here + uint32_t offset = 0; + Size2i prev_size; + for (int i = 0; i < p_data.size(); i++) { + uint32_t s = images[i]->get_data().size(); + + copymem(&all_data.write[offset], images[i]->get_data().ptr(), s); + { + Texture::BufferSlice3D slice; + slice.size.width = images[i]->get_width(); + slice.size.height = images[i]->get_height(); + slice.offset = offset; + slice.buffer_size = s; + slices.push_back(slice); + } + offset += s; + + Size2i img_size(images[i]->get_width(), images[i]->get_height()); + if (img_size != prev_size) { + mipmap_count++; + } + prev_size = img_size; + } + } + + Texture texture; + + texture.type = Texture::TYPE_3D; + texture.width = p_width; + texture.height = p_height; + texture.depth = p_depth; + texture.mipmaps = mipmap_count; + texture.format = p_data[0]->get_format(); + texture.validated_format = validated_format; + + texture.buffer_size_3d = all_data.size(); + texture.buffer_slices_3d = slices; + + texture.rd_type = RD::TEXTURE_TYPE_3D; + texture.rd_format = ret_format.format; + texture.rd_format_srgb = ret_format.format_srgb; + + RD::TextureFormat rd_format; + RD::TextureView rd_view; + { //attempt register + rd_format.format = texture.rd_format; + rd_format.width = texture.width; + rd_format.height = texture.height; + rd_format.depth = texture.depth; + rd_format.array_layers = 1; + rd_format.mipmaps = texture.mipmaps; + rd_format.type = texture.rd_type; + rd_format.samples = RD::TEXTURE_SAMPLES_1; + rd_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + if (texture.rd_format_srgb != RD::DATA_FORMAT_MAX) { + rd_format.shareable_formats.push_back(texture.rd_format); + rd_format.shareable_formats.push_back(texture.rd_format_srgb); + } + } + { + rd_view.swizzle_r = ret_format.swizzle_r; + rd_view.swizzle_g = ret_format.swizzle_g; + rd_view.swizzle_b = ret_format.swizzle_b; + rd_view.swizzle_a = ret_format.swizzle_a; + } + Vector<Vector<uint8_t>> data_slices; + data_slices.push_back(all_data); //one slice + + texture.rd_texture = RD::get_singleton()->texture_create(rd_format, rd_view, data_slices); + ERR_FAIL_COND_V(texture.rd_texture.is_null(), RID()); + if (texture.rd_format_srgb != RD::DATA_FORMAT_MAX) { + rd_view.format_override = texture.rd_format_srgb; + texture.rd_texture_srgb = RD::get_singleton()->texture_create_shared(rd_view, texture.rd_texture); + if (texture.rd_texture_srgb.is_null()) { + RD::get_singleton()->free(texture.rd_texture); + ERR_FAIL_COND_V(texture.rd_texture_srgb.is_null(), RID()); + } + } + + //used for 2D, overridable + texture.width_2d = texture.width; + texture.height_2d = texture.height; + texture.is_render_target = false; + texture.rd_view = rd_view; + texture.is_proxy = false; + + return texture_owner.make_rid(texture); } RID RasterizerStorageRD::texture_proxy_create(RID p_base) { @@ -771,7 +884,41 @@ void RasterizerStorageRD::texture_2d_update(RID p_texture, const Ref<Image> &p_i _texture_2d_update(p_texture, p_image, p_layer, false); } -void RasterizerStorageRD::texture_3d_update(RID p_texture, const Ref<Image> &p_image, int p_depth, int p_mipmap) { +void RasterizerStorageRD::texture_3d_update(RID p_texture, const Vector<Ref<Image>> &p_data) { + Texture *tex = texture_owner.getornull(p_texture); + ERR_FAIL_COND(!tex); + ERR_FAIL_COND(tex->type != Texture::TYPE_3D); + Image::Image3DValidateError verr = Image::validate_3d_image(tex->format, tex->width, tex->height, tex->depth, tex->mipmaps > 1, p_data); + if (verr != Image::VALIDATE_3D_OK) { + ERR_FAIL_MSG(Image::get_3d_image_validation_error_text(verr)); + } + + Vector<uint8_t> all_data; + { + Vector<Ref<Image>> images; + uint32_t all_data_size = 0; + images.resize(p_data.size()); + for (int i = 0; i < p_data.size(); i++) { + Ref<Image> image = p_data[i]; + if (image->get_format() != tex->validated_format) { + image = image->duplicate(); + image->convert(tex->validated_format); + } + all_data_size += images[i]->get_data().size(); + images.push_back(image); + } + + all_data.resize(all_data_size); //consolidate all data here + uint32_t offset = 0; + + for (int i = 0; i < p_data.size(); i++) { + uint32_t s = images[i]->get_data().size(); + copymem(&all_data.write[offset], images[i]->get_data().ptr(), s); + offset += s; + } + } + + RD::get_singleton()->texture_update(tex->rd_texture, 0, all_data, true); } void RasterizerStorageRD::texture_proxy_update(RID p_texture, RID p_proxy_to) { @@ -857,7 +1004,25 @@ RID RasterizerStorageRD::texture_2d_layered_placeholder_create(RS::TextureLayere } RID RasterizerStorageRD::texture_3d_placeholder_create() { - return RID(); + //this could be better optimized to reuse an existing image , done this way + //for now to get it working + Ref<Image> image; + image.instance(); + image->create(4, 4, false, Image::FORMAT_RGBA8); + + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + image->set_pixel(i, j, Color(1, 0, 1, 1)); + } + } + + Vector<Ref<Image>> images; + //cube + for (int i = 0; i < 4; i++) { + images.push_back(image); + } + + return texture_3d_create(Image::FORMAT_RGBA8, 4, 4, 4, false, images); } Ref<Image> RasterizerStorageRD::texture_2d_get(RID p_texture) const { @@ -889,11 +1054,51 @@ Ref<Image> RasterizerStorageRD::texture_2d_get(RID p_texture) const { } Ref<Image> RasterizerStorageRD::texture_2d_layer_get(RID p_texture, int p_layer) const { - return Ref<Image>(); + Texture *tex = texture_owner.getornull(p_texture); + ERR_FAIL_COND_V(!tex, Ref<Image>()); + + Vector<uint8_t> data = RD::get_singleton()->texture_get_data(tex->rd_texture, p_layer); + ERR_FAIL_COND_V(data.size() == 0, Ref<Image>()); + Ref<Image> image; + image.instance(); + image->create(tex->width, tex->height, tex->mipmaps > 1, tex->validated_format, data); + ERR_FAIL_COND_V(image->empty(), Ref<Image>()); + if (tex->format != tex->validated_format) { + image->convert(tex->format); + } + + return image; } -Ref<Image> RasterizerStorageRD::texture_3d_slice_get(RID p_texture, int p_depth, int p_mipmap) const { - return Ref<Image>(); +Vector<Ref<Image>> RasterizerStorageRD::texture_3d_get(RID p_texture) const { + Texture *tex = texture_owner.getornull(p_texture); + ERR_FAIL_COND_V(!tex, Vector<Ref<Image>>()); + ERR_FAIL_COND_V(tex->type != Texture::TYPE_3D, Vector<Ref<Image>>()); + + Vector<uint8_t> all_data = RD::get_singleton()->texture_get_data(tex->rd_texture, 0); + + ERR_FAIL_COND_V(all_data.size() != (int)tex->buffer_size_3d, Vector<Ref<Image>>()); + + Vector<Ref<Image>> ret; + + for (int i = 0; i < tex->buffer_slices_3d.size(); i++) { + const Texture::BufferSlice3D &bs = tex->buffer_slices_3d[i]; + ERR_FAIL_COND_V(bs.offset >= (uint32_t)all_data.size(), Vector<Ref<Image>>()); + ERR_FAIL_COND_V(bs.offset + bs.buffer_size > (uint32_t)all_data.size(), Vector<Ref<Image>>()); + Vector<uint8_t> sub_region = all_data.subarray(bs.offset, bs.offset + bs.buffer_size - 1); + + Ref<Image> img; + img.instance(); + img->create(bs.size.width, bs.size.height, false, tex->validated_format, sub_region); + ERR_FAIL_COND_V(img->empty(), Vector<Ref<Image>>()); + if (tex->format != tex->validated_format) { + img->convert(tex->format); + } + + ret.push_back(img); + } + + return ret; } void RasterizerStorageRD::texture_replace(RID p_texture, RID p_by_texture) { @@ -3003,9 +3208,9 @@ Vector<float> RasterizerStorageRD::multimesh_get_buffer(RID p_multimesh) const { Vector<uint8_t> buffer = RD::get_singleton()->buffer_get_data(multimesh->buffer); Vector<float> ret; - ret.resize(multimesh->instances); + ret.resize(multimesh->instances * multimesh->stride_cache); { - float *w = multimesh->data_cache.ptrw(); + float *w = ret.ptrw(); const uint8_t *r = buffer.ptr(); copymem(w, r, buffer.size()); } @@ -3098,8 +3303,914 @@ void RasterizerStorageRD::_update_dirty_multimeshes() { multimesh_dirty_list = nullptr; } -/* SKELETON */ +/* PARTICLES */ + +RID RasterizerStorageRD::particles_create() { + return particles_owner.make_rid(Particles()); +} + +void RasterizerStorageRD::particles_set_emitting(RID p_particles, bool p_emitting) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->emitting = p_emitting; +} + +bool RasterizerStorageRD::particles_get_emitting(RID p_particles) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND_V(!particles, false); + + return particles->emitting; +} + +void RasterizerStorageRD::_particles_free_data(Particles *particles) { + if (!particles->particle_buffer.is_valid()) { + return; + } + RD::get_singleton()->free(particles->particle_buffer); + RD::get_singleton()->free(particles->frame_params_buffer); + RD::get_singleton()->free(particles->particle_instance_buffer); + particles->particles_transforms_buffer_uniform_set = RID(); + particles->particle_buffer = RID(); + + if (particles->particles_sort_buffer.is_valid()) { + RD::get_singleton()->free(particles->particles_sort_buffer); + particles->particles_sort_buffer = RID(); + } + + if (particles->emission_buffer != nullptr) { + particles->emission_buffer = nullptr; + particles->emission_buffer_data.clear(); + RD::get_singleton()->free(particles->emission_storage_buffer); + particles->emission_storage_buffer = RID(); + } +} + +void RasterizerStorageRD::particles_set_amount(RID p_particles, int p_amount) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + if (particles->amount == p_amount) { + return; + } + + _particles_free_data(particles); + + particles->amount = p_amount; + + if (particles->amount > 0) { + particles->particle_buffer = RD::get_singleton()->storage_buffer_create(sizeof(ParticleData) * p_amount); + particles->frame_params_buffer = RD::get_singleton()->storage_buffer_create(sizeof(ParticlesFrameParams) * 1); + particles->particle_instance_buffer = RD::get_singleton()->storage_buffer_create(sizeof(float) * 4 * (3 + 1 + 1) * p_amount); + //needs to clear it + + { + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 1; + u.ids.push_back(particles->particle_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + u.ids.push_back(particles->particle_instance_buffer); + uniforms.push_back(u); + } + + particles->particles_copy_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, particles_shader.copy_shader.version_get_shader(particles_shader.copy_shader_version, 0), 0); + } + } + + particles->prev_ticks = 0; + particles->phase = 0; + particles->prev_phase = 0; + particles->clear = true; +} + +void RasterizerStorageRD::particles_set_lifetime(RID p_particles, float p_lifetime) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + particles->lifetime = p_lifetime; +} + +void RasterizerStorageRD::particles_set_one_shot(RID p_particles, bool p_one_shot) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + particles->one_shot = p_one_shot; +} + +void RasterizerStorageRD::particles_set_pre_process_time(RID p_particles, float p_time) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + particles->pre_process_time = p_time; +} +void RasterizerStorageRD::particles_set_explosiveness_ratio(RID p_particles, float p_ratio) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + particles->explosiveness = p_ratio; +} +void RasterizerStorageRD::particles_set_randomness_ratio(RID p_particles, float p_ratio) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + particles->randomness = p_ratio; +} + +void RasterizerStorageRD::particles_set_custom_aabb(RID p_particles, const AABB &p_aabb) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + particles->custom_aabb = p_aabb; + particles->instance_dependency.instance_notify_changed(true, false); +} + +void RasterizerStorageRD::particles_set_speed_scale(RID p_particles, float p_scale) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->speed_scale = p_scale; +} +void RasterizerStorageRD::particles_set_use_local_coordinates(RID p_particles, bool p_enable) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->use_local_coords = p_enable; +} + +void RasterizerStorageRD::particles_set_fixed_fps(RID p_particles, int p_fps) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->fixed_fps = p_fps; +} + +void RasterizerStorageRD::particles_set_fractional_delta(RID p_particles, bool p_enable) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->fractional_delta = p_enable; +} + +void RasterizerStorageRD::particles_set_process_material(RID p_particles, RID p_material) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->process_material = p_material; +} + +void RasterizerStorageRD::particles_set_draw_order(RID p_particles, RS::ParticlesDrawOrder p_order) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->draw_order = p_order; +} + +void RasterizerStorageRD::particles_set_draw_passes(RID p_particles, int p_passes) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->draw_passes.resize(p_passes); +} + +void RasterizerStorageRD::particles_set_draw_pass_mesh(RID p_particles, int p_pass, RID p_mesh) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + ERR_FAIL_INDEX(p_pass, particles->draw_passes.size()); + particles->draw_passes.write[p_pass] = p_mesh; +} + +void RasterizerStorageRD::particles_restart(RID p_particles) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->restart_request = true; +} + +void RasterizerStorageRD::_particles_allocate_emission_buffer(Particles *particles) { + ERR_FAIL_COND(particles->emission_buffer != nullptr); + + particles->emission_buffer_data.resize(sizeof(ParticleEmissionBuffer::Data) * particles->amount + sizeof(uint32_t) * 4); + zeromem(particles->emission_buffer_data.ptrw(), particles->emission_buffer_data.size()); + particles->emission_buffer = (ParticleEmissionBuffer *)particles->emission_buffer_data.ptrw(); + particles->emission_buffer->particle_max = particles->amount; + + particles->emission_storage_buffer = RD::get_singleton()->storage_buffer_create(particles->emission_buffer_data.size(), particles->emission_buffer_data); + + if (RD::get_singleton()->uniform_set_is_valid(particles->particles_material_uniform_set)) { + //will need to be re-created + RD::get_singleton()->free(particles->particles_material_uniform_set); + particles->particles_material_uniform_set = RID(); + } +} + +void RasterizerStorageRD::particles_set_subemitter(RID p_particles, RID p_subemitter_particles) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + ERR_FAIL_COND(p_particles == p_subemitter_particles); + + particles->sub_emitter = p_subemitter_particles; + + if (RD::get_singleton()->uniform_set_is_valid(particles->particles_material_uniform_set)) { + RD::get_singleton()->free(particles->particles_material_uniform_set); + particles->particles_material_uniform_set = RID(); //clear and force to re create sub emitting + } +} + +void RasterizerStorageRD::particles_emit(RID p_particles, const Transform &p_transform, const Vector3 &p_velocity, const Color &p_color, const Color &p_custom, uint32_t p_emit_flags) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + ERR_FAIL_COND(particles->amount == 0); + + if (particles->emitting) { + particles->clear = true; + particles->emitting = false; + } + + if (particles->emission_buffer == nullptr) { + _particles_allocate_emission_buffer(particles); + } + + if (particles->inactive) { + //in case it was inactive, make active again + particles->inactive = false; + particles->inactive_time = 0; + } + + int32_t idx = particles->emission_buffer->particle_count; + if (idx < particles->emission_buffer->particle_max) { + store_transform(p_transform, particles->emission_buffer->data[idx].xform); + + particles->emission_buffer->data[idx].velocity[0] = p_velocity.x; + particles->emission_buffer->data[idx].velocity[1] = p_velocity.y; + particles->emission_buffer->data[idx].velocity[2] = p_velocity.z; + + particles->emission_buffer->data[idx].custom[0] = p_custom.r; + particles->emission_buffer->data[idx].custom[1] = p_custom.g; + particles->emission_buffer->data[idx].custom[2] = p_custom.b; + particles->emission_buffer->data[idx].custom[3] = p_custom.a; + + particles->emission_buffer->data[idx].color[0] = p_color.r; + particles->emission_buffer->data[idx].color[1] = p_color.g; + particles->emission_buffer->data[idx].color[2] = p_color.b; + particles->emission_buffer->data[idx].color[3] = p_color.a; + + particles->emission_buffer->data[idx].flags = p_emit_flags; + particles->emission_buffer->particle_count++; + } +} + +void RasterizerStorageRD::particles_request_process(RID p_particles) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + if (!particles->dirty) { + particles->dirty = true; + particles->update_list = particle_update_list; + particle_update_list = particles; + } +} + +AABB RasterizerStorageRD::particles_get_current_aabb(RID p_particles) { + const Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND_V(!particles, AABB()); + + Vector<ParticleData> data; + data.resize(particles->amount); + + Vector<uint8_t> buffer = RD::get_singleton()->buffer_get_data(particles->particle_buffer); + + Transform inv = particles->emission_transform.affine_inverse(); + + AABB aabb; + if (buffer.size()) { + bool first = true; + const ParticleData *particle_data = (const ParticleData *)data.ptr(); + for (int i = 0; i < particles->amount; i++) { + if (particle_data[i].active) { + Vector3 pos = Vector3(particle_data[i].xform[12], particle_data[i].xform[13], particle_data[i].xform[14]); + if (!particles->use_local_coords) { + pos = inv.xform(pos); + } + if (first) { + aabb.position = pos; + first = false; + } else { + aabb.expand_to(pos); + } + } + } + } + + float longest_axis_size = 0; + for (int i = 0; i < particles->draw_passes.size(); i++) { + if (particles->draw_passes[i].is_valid()) { + AABB maabb = mesh_get_aabb(particles->draw_passes[i], RID()); + longest_axis_size = MAX(maabb.get_longest_axis_size(), longest_axis_size); + } + } + + aabb.grow_by(longest_axis_size); + + return aabb; +} + +AABB RasterizerStorageRD::particles_get_aabb(RID p_particles) const { + const Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND_V(!particles, AABB()); + + return particles->custom_aabb; +} + +void RasterizerStorageRD::particles_set_emission_transform(RID p_particles, const Transform &p_transform) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + particles->emission_transform = p_transform; +} + +int RasterizerStorageRD::particles_get_draw_passes(RID p_particles) const { + const Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND_V(!particles, 0); + + return particles->draw_passes.size(); +} + +RID RasterizerStorageRD::particles_get_draw_pass_mesh(RID p_particles, int p_pass) const { + const Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND_V(!particles, RID()); + ERR_FAIL_INDEX_V(p_pass, particles->draw_passes.size(), RID()); + return particles->draw_passes[p_pass]; +} + +void RasterizerStorageRD::_particles_process(Particles *p_particles, float p_delta) { + if (p_particles->particles_material_uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(p_particles->particles_material_uniform_set)) { + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 0; + u.ids.push_back(p_particles->frame_params_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 1; + u.ids.push_back(p_particles->particle_buffer); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + if (p_particles->emission_storage_buffer.is_valid()) { + u.ids.push_back(p_particles->emission_storage_buffer); + } else { + u.ids.push_back(default_rd_storage_buffer); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 3; + Particles *sub_emitter = particles_owner.getornull(p_particles->sub_emitter); + if (sub_emitter) { + if (sub_emitter->emission_buffer == nullptr) { //no emission buffer, allocate emission buffer + _particles_allocate_emission_buffer(sub_emitter); + } + u.ids.push_back(sub_emitter->emission_storage_buffer); + } else { + u.ids.push_back(default_rd_storage_buffer); + } + uniforms.push_back(u); + } + + p_particles->particles_material_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, particles_shader.default_shader_rd, 1); + } + + float new_phase = Math::fmod((float)p_particles->phase + (p_delta / p_particles->lifetime) * p_particles->speed_scale, (float)1.0); + + ParticlesFrameParams &frame_params = p_particles->frame_params; + + if (p_particles->clear) { + p_particles->cycle_number = 0; + p_particles->random_seed = Math::rand(); + } else if (new_phase < p_particles->phase) { + if (p_particles->one_shot) { + p_particles->emitting = false; + } + p_particles->cycle_number++; + } + + frame_params.emitting = p_particles->emitting; + frame_params.system_phase = new_phase; + frame_params.prev_system_phase = p_particles->phase; + + p_particles->phase = new_phase; + + frame_params.time = RasterizerRD::singleton->get_total_time(); + frame_params.delta = p_delta * p_particles->speed_scale; + frame_params.random_seed = p_particles->random_seed; + frame_params.explosiveness = p_particles->explosiveness; + frame_params.randomness = p_particles->randomness; + + if (p_particles->use_local_coords) { + store_transform(Transform(), frame_params.emission_transform); + } else { + store_transform(p_particles->emission_transform, frame_params.emission_transform); + } + + frame_params.cycle = p_particles->cycle_number; + + ParticlesShader::PushConstant push_constant; + + push_constant.clear = p_particles->clear; + push_constant.total_particles = p_particles->amount; + push_constant.lifetime = p_particles->lifetime; + push_constant.trail_size = 1; + push_constant.use_fractional_delta = p_particles->fractional_delta; + push_constant.sub_emitter_mode = !p_particles->emitting && p_particles->emission_buffer && (p_particles->emission_buffer->particle_count > 0 || p_particles->force_sub_emit); + + p_particles->force_sub_emit = false; //reset + + Particles *sub_emitter = particles_owner.getornull(p_particles->sub_emitter); + + if (sub_emitter && sub_emitter->emission_storage_buffer.is_valid()) { + // print_line("updating subemitter buffer"); + int32_t zero[4] = { 0, sub_emitter->amount, 0, 0 }; + RD::get_singleton()->buffer_update(sub_emitter->emission_storage_buffer, 0, sizeof(uint32_t) * 4, zero, true); + push_constant.can_emit = true; + + if (sub_emitter->emitting) { + sub_emitter->emitting = false; + sub_emitter->clear = true; //will need to clear if it was emitting, sorry + } + //make sure the sub emitter processes particles too + sub_emitter->inactive = false; + sub_emitter->inactive_time = 0; + + sub_emitter->force_sub_emit = true; + + } else { + push_constant.can_emit = false; + } + + if (p_particles->emission_buffer && p_particles->emission_buffer->particle_count) { + RD::get_singleton()->buffer_update(p_particles->emission_storage_buffer, 0, sizeof(uint32_t) * 4 + sizeof(ParticleEmissionBuffer::Data) * p_particles->emission_buffer->particle_count, p_particles->emission_buffer, true); + p_particles->emission_buffer->particle_count = 0; + } + + p_particles->clear = false; + + RD::get_singleton()->buffer_update(p_particles->frame_params_buffer, 0, sizeof(ParticlesFrameParams), &frame_params, true); + + ParticlesMaterialData *m = (ParticlesMaterialData *)material_get_data(p_particles->process_material, SHADER_TYPE_PARTICLES); + if (!m) { + m = (ParticlesMaterialData *)material_get_data(particles_shader.default_material, SHADER_TYPE_PARTICLES); + } + + ERR_FAIL_COND(!m); + + //todo should maybe compute all particle systems together? + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, m->shader_data->pipeline); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles_shader.base_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, p_particles->particles_material_uniform_set, 1); + if (m->uniform_set.is_valid()) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, m->uniform_set, 2); + } + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ParticlesShader::PushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_particles->amount, 1, 1, 64, 1, 1); + + RD::get_singleton()->compute_list_end(); +} + +void RasterizerStorageRD::particles_set_view_axis(RID p_particles, const Vector3 &p_axis) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND(!particles); + + if (particles->draw_order != RS::PARTICLES_DRAW_ORDER_VIEW_DEPTH) { + return; //uninteresting for other modes + } + + //copy to sort buffer + if (particles->particles_sort_buffer == RID()) { + uint32_t size = particles->amount; + if (size & 1) { + size++; //make multiple of 16 + } + size *= sizeof(float) * 2; + particles->particles_sort_buffer = RD::get_singleton()->storage_buffer_create(size); + { + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 0; + u.ids.push_back(particles->particles_sort_buffer); + uniforms.push_back(u); + } + + particles->particles_sort_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, particles_shader.copy_shader.version_get_shader(particles_shader.copy_shader_version, ParticlesShader::COPY_MODE_FILL_SORT_BUFFER), 1); + } + } + + Vector3 axis = -p_axis; // cameras look to z negative + + if (particles->use_local_coords) { + axis = particles->emission_transform.basis.xform_inv(axis).normalized(); + } + + ParticlesShader::CopyPushConstant copy_push_constant; + copy_push_constant.total_particles = particles->amount; + copy_push_constant.sort_direction[0] = axis.x; + copy_push_constant.sort_direction[1] = axis.y; + copy_push_constant.sort_direction[2] = axis.z; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, particles_shader.copy_pipelines[ParticlesShader::COPY_MODE_FILL_SORT_BUFFER]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_copy_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_sort_uniform_set, 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, ©_push_constant, sizeof(ParticlesShader::CopyPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1, 64, 1, 1); + + RD::get_singleton()->compute_list_end(); + + effects.sort_buffer(particles->particles_sort_uniform_set, particles->amount); + + compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, particles_shader.copy_pipelines[ParticlesShader::COPY_MODE_FILL_INSTANCES_WITH_SORT_BUFFER]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_copy_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_sort_uniform_set, 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, ©_push_constant, sizeof(ParticlesShader::CopyPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1, 64, 1, 1); + + RD::get_singleton()->compute_list_end(); +} + +void RasterizerStorageRD::update_particles() { + while (particle_update_list) { + //use transform feedback to process particles + + Particles *particles = particle_update_list; + + //take and remove + particle_update_list = particles->update_list; + particles->update_list = nullptr; + particles->dirty = false; + + if (particles->restart_request) { + particles->prev_ticks = 0; + particles->phase = 0; + particles->prev_phase = 0; + particles->clear = true; + particles->restart_request = false; + } + + if (particles->inactive && !particles->emitting) { + //go next + continue; + } + + if (particles->emitting) { + if (particles->inactive) { + //restart system from scratch + particles->prev_ticks = 0; + particles->phase = 0; + particles->prev_phase = 0; + particles->clear = true; + } + particles->inactive = false; + particles->inactive_time = 0; + } else { + particles->inactive_time += particles->speed_scale * RasterizerRD::singleton->get_frame_delta_time(); + if (particles->inactive_time > particles->lifetime * 1.2) { + particles->inactive = true; + continue; + } + } + + bool zero_time_scale = Engine::get_singleton()->get_time_scale() <= 0.0; + + if (particles->clear && particles->pre_process_time > 0.0) { + float frame_time; + if (particles->fixed_fps > 0) + frame_time = 1.0 / particles->fixed_fps; + else + frame_time = 1.0 / 30.0; + + float todo = particles->pre_process_time; + + while (todo >= 0) { + _particles_process(particles, frame_time); + todo -= frame_time; + } + } + + if (particles->fixed_fps > 0) { + float frame_time; + float decr; + if (zero_time_scale) { + frame_time = 0.0; + decr = 1.0 / particles->fixed_fps; + } else { + frame_time = 1.0 / particles->fixed_fps; + decr = frame_time; + } + float delta = RasterizerRD::singleton->get_frame_delta_time(); + if (delta > 0.1) { //avoid recursive stalls if fps goes below 10 + delta = 0.1; + } else if (delta <= 0.0) { //unlikely but.. + delta = 0.001; + } + float todo = particles->frame_remainder + delta; + + while (todo >= frame_time) { + _particles_process(particles, frame_time); + todo -= decr; + } + + particles->frame_remainder = todo; + + } else { + if (zero_time_scale) + _particles_process(particles, 0.0); + else + _particles_process(particles, RasterizerRD::singleton->get_frame_delta_time()); + } + + //copy particles to instance buffer + + if (particles->draw_order != RS::PARTICLES_DRAW_ORDER_VIEW_DEPTH) { + ParticlesShader::CopyPushConstant copy_push_constant; + copy_push_constant.total_particles = particles->amount; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, particles_shader.copy_pipelines[ParticlesShader::COPY_MODE_FILL_INSTANCES]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_copy_uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, ©_push_constant, sizeof(ParticlesShader::CopyPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1, 64, 1, 1); + + RD::get_singleton()->compute_list_end(); + } + + particles->instance_dependency.instance_notify_changed(true, false); //make sure shadows are updated + } +} + +bool RasterizerStorageRD::particles_is_inactive(RID p_particles) const { + const Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND_V(!particles, false); + return !particles->emitting && particles->inactive; +} + +/* SKY SHADER */ + +void RasterizerStorageRD::ParticlesShaderData::set_code(const String &p_code) { + //compile + + code = p_code; + valid = false; + ubo_size = 0; + uniforms.clear(); + + if (code == String()) { + return; //just invalid, but no error + } + + ShaderCompilerRD::GeneratedCode gen_code; + ShaderCompilerRD::IdentifierActions actions; + + /* + uses_time = false; + + actions.render_mode_flags["use_half_res_pass"] = &uses_half_res; + actions.render_mode_flags["use_quarter_res_pass"] = &uses_quarter_res; + + actions.usage_flag_pointers["TIME"] = &uses_time; +*/ + + actions.uniforms = &uniforms; + + Error err = base_singleton->particles_shader.compiler.compile(RS::SHADER_PARTICLES, code, &actions, path, gen_code); + + ERR_FAIL_COND(err != OK); + + if (version.is_null()) { + version = base_singleton->particles_shader.shader.version_create(); + } + + base_singleton->particles_shader.shader.version_set_compute_code(version, gen_code.uniforms, gen_code.compute_global, gen_code.compute, gen_code.defines); + ERR_FAIL_COND(!base_singleton->particles_shader.shader.version_is_valid(version)); + + ubo_size = gen_code.uniform_total_size; + ubo_offsets = gen_code.uniform_offsets; + texture_uniforms = gen_code.texture_uniforms; + + //update pipelines + + pipeline = RD::get_singleton()->compute_pipeline_create(base_singleton->particles_shader.shader.version_get_shader(version, 0)); + + valid = true; +} + +void RasterizerStorageRD::ParticlesShaderData::set_default_texture_param(const StringName &p_name, RID p_texture) { + if (!p_texture.is_valid()) { + default_texture_params.erase(p_name); + } else { + default_texture_params[p_name] = p_texture; + } +} + +void RasterizerStorageRD::ParticlesShaderData::get_param_list(List<PropertyInfo> *p_param_list) const { + Map<int, StringName> order; + + for (Map<StringName, ShaderLanguage::ShaderNode::Uniform>::Element *E = uniforms.front(); E; E = E->next()) { + if (E->get().scope == ShaderLanguage::ShaderNode::Uniform::SCOPE_GLOBAL || E->get().scope == ShaderLanguage::ShaderNode::Uniform::SCOPE_INSTANCE) { + continue; + } + + if (E->get().texture_order >= 0) { + order[E->get().texture_order + 100000] = E->key(); + } else { + order[E->get().order] = E->key(); + } + } + + for (Map<int, StringName>::Element *E = order.front(); E; E = E->next()) { + PropertyInfo pi = ShaderLanguage::uniform_to_property_info(uniforms[E->get()]); + pi.name = E->get(); + p_param_list->push_back(pi); + } +} + +void RasterizerStorageRD::ParticlesShaderData::get_instance_param_list(List<RasterizerStorage::InstanceShaderParam> *p_param_list) const { + for (Map<StringName, ShaderLanguage::ShaderNode::Uniform>::Element *E = uniforms.front(); E; E = E->next()) { + if (E->get().scope != ShaderLanguage::ShaderNode::Uniform::SCOPE_INSTANCE) { + continue; + } + + RasterizerStorage::InstanceShaderParam p; + p.info = ShaderLanguage::uniform_to_property_info(E->get()); + p.info.name = E->key(); //supply name + p.index = E->get().instance_index; + p.default_value = ShaderLanguage::constant_value_to_variant(E->get().default_value, E->get().type, E->get().hint); + p_param_list->push_back(p); + } +} + +bool RasterizerStorageRD::ParticlesShaderData::is_param_texture(const StringName &p_param) const { + if (!uniforms.has(p_param)) { + return false; + } + + return uniforms[p_param].texture_order >= 0; +} + +bool RasterizerStorageRD::ParticlesShaderData::is_animated() const { + return false; +} + +bool RasterizerStorageRD::ParticlesShaderData::casts_shadows() const { + return false; +} + +Variant RasterizerStorageRD::ParticlesShaderData::get_default_parameter(const StringName &p_parameter) const { + if (uniforms.has(p_parameter)) { + ShaderLanguage::ShaderNode::Uniform uniform = uniforms[p_parameter]; + Vector<ShaderLanguage::ConstantNode::Value> default_value = uniform.default_value; + return ShaderLanguage::constant_value_to_variant(default_value, uniform.type, uniform.hint); + } + return Variant(); +} + +RasterizerStorageRD::ParticlesShaderData::ParticlesShaderData() { + valid = false; +} + +RasterizerStorageRD::ParticlesShaderData::~ParticlesShaderData() { + //pipeline variants will clear themselves if shader is gone + if (version.is_valid()) { + base_singleton->particles_shader.shader.version_free(version); + } +} + +RasterizerStorageRD::ShaderData *RasterizerStorageRD::_create_particles_shader_func() { + ParticlesShaderData *shader_data = memnew(ParticlesShaderData); + return shader_data; +} + +void RasterizerStorageRD::ParticlesMaterialData::update_parameters(const Map<StringName, Variant> &p_parameters, bool p_uniform_dirty, bool p_textures_dirty) { + uniform_set_updated = true; + + if ((uint32_t)ubo_data.size() != shader_data->ubo_size) { + p_uniform_dirty = true; + if (uniform_buffer.is_valid()) { + RD::get_singleton()->free(uniform_buffer); + uniform_buffer = RID(); + } + + ubo_data.resize(shader_data->ubo_size); + if (ubo_data.size()) { + uniform_buffer = RD::get_singleton()->uniform_buffer_create(ubo_data.size()); + memset(ubo_data.ptrw(), 0, ubo_data.size()); //clear + } + + //clear previous uniform set + if (uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + RD::get_singleton()->free(uniform_set); + uniform_set = RID(); + } + } + + //check whether buffer changed + if (p_uniform_dirty && ubo_data.size()) { + update_uniform_buffer(shader_data->uniforms, shader_data->ubo_offsets.ptr(), p_parameters, ubo_data.ptrw(), ubo_data.size(), false); + RD::get_singleton()->buffer_update(uniform_buffer, 0, ubo_data.size(), ubo_data.ptrw()); + } + + uint32_t tex_uniform_count = shader_data->texture_uniforms.size(); + + if ((uint32_t)texture_cache.size() != tex_uniform_count) { + texture_cache.resize(tex_uniform_count); + p_textures_dirty = true; + + //clear previous uniform set + if (uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + RD::get_singleton()->free(uniform_set); + uniform_set = RID(); + } + } + + if (p_textures_dirty && tex_uniform_count) { + update_textures(p_parameters, shader_data->default_texture_params, shader_data->texture_uniforms, texture_cache.ptrw(), true); + } + + if (shader_data->ubo_size == 0 && shader_data->texture_uniforms.size() == 0) { + // This material does not require an uniform set, so don't create it. + return; + } + + if (!p_textures_dirty && uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + //no reason to update uniform set, only UBO (or nothing) was needed to update + return; + } + + Vector<RD::Uniform> uniforms; + + { + if (shader_data->ubo_size) { + RD::Uniform u; + u.type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 0; + u.ids.push_back(uniform_buffer); + uniforms.push_back(u); + } + + const RID *textures = texture_cache.ptrw(); + for (uint32_t i = 0; i < tex_uniform_count; i++) { + RD::Uniform u; + u.type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 1 + i; + u.ids.push_back(textures[i]); + uniforms.push_back(u); + } + } + + uniform_set = RD::get_singleton()->uniform_set_create(uniforms, base_singleton->particles_shader.shader.version_get_shader(shader_data->version, 0), 2); +} + +RasterizerStorageRD::ParticlesMaterialData::~ParticlesMaterialData() { + if (uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { + RD::get_singleton()->free(uniform_set); + } + + if (uniform_buffer.is_valid()) { + RD::get_singleton()->free(uniform_buffer); + } +} + +RasterizerStorageRD::MaterialData *RasterizerStorageRD::_create_particles_material_func(ParticlesShaderData *p_shader) { + ParticlesMaterialData *material_data = memnew(ParticlesMaterialData); + material_data->shader_data = p_shader; + material_data->last_frame = false; + //update will happen later anyway so do nothing. + return material_data; +} +//////// /* SKELETON API */ RID RasterizerStorageRD::skeleton_create() { @@ -4683,6 +5794,9 @@ void RasterizerStorageRD::base_update_dependency(RID p_base, RasterizerScene::In } else if (light_owner.owns(p_base)) { Light *l = light_owner.getornull(p_base); p_instance->update_dependency(&l->instance_dependency); + } else if (particles_owner.owns(p_base)) { + Particles *p = particles_owner.getornull(p_base); + p_instance->update_dependency(&p->instance_dependency); } } @@ -4715,6 +5829,9 @@ RS::InstanceType RasterizerStorageRD::get_base_type(RID p_rid) const { if (lightmap_owner.owns(p_rid)) { return RS::INSTANCE_LIGHTMAP; } + if (particles_owner.owns(p_rid)) { + return RS::INSTANCE_PARTICLES; + } return RS::INSTANCE_NONE; } @@ -5618,6 +6735,8 @@ void RasterizerStorageRD::update_dirty_resources() { _update_dirty_multimeshes(); _update_dirty_skeletons(); _update_decal_atlas(); + + update_particles(); } bool RasterizerStorageRD::has_os_feature(const String &p_feature) const { @@ -5747,6 +6866,11 @@ bool RasterizerStorageRD::free(RID p_rid) { light->instance_dependency.instance_notify_deleted(p_rid); light_owner.free(p_rid); + } else if (particles_owner.owns(p_rid)) { + Particles *particles = particles_owner.getornull(p_rid); + _particles_free_data(particles); + particles->instance_dependency.instance_notify_deleted(p_rid); + particles_owner.free(p_rid); } else if (render_target_owner.owns(p_rid)) { RenderTarget *rt = render_target_owner.getornull(p_rid); @@ -6066,15 +7190,18 @@ RasterizerStorageRD::RasterizerStorageRD() { case RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED: { sampler_state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; sampler_state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; + sampler_state.repeat_w = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; } break; case RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED: { sampler_state.repeat_u = RD::SAMPLER_REPEAT_MODE_REPEAT; sampler_state.repeat_v = RD::SAMPLER_REPEAT_MODE_REPEAT; + sampler_state.repeat_w = RD::SAMPLER_REPEAT_MODE_REPEAT; } break; case RS::CANVAS_ITEM_TEXTURE_REPEAT_MIRROR: { sampler_state.repeat_u = RD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT; sampler_state.repeat_v = RD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT; + sampler_state.repeat_w = RD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT; } break; default: { } @@ -6211,6 +7338,125 @@ RasterizerStorageRD::RasterizerStorageRD() { } lightmap_probe_capture_update_speed = GLOBAL_GET("rendering/lightmapper/probe_capture_update_speed"); + + /* Particles */ + + { + // Initialize particles + Vector<String> particles_modes; + particles_modes.push_back(""); + particles_shader.shader.initialize(particles_modes, String()); + } + shader_set_data_request_function(RasterizerStorageRD::SHADER_TYPE_PARTICLES, _create_particles_shader_funcs); + material_set_data_request_function(RasterizerStorageRD::SHADER_TYPE_PARTICLES, _create_particles_material_funcs); + + { + ShaderCompilerRD::DefaultIdentifierActions actions; + + actions.renames["COLOR"] = "PARTICLE.color"; + actions.renames["VELOCITY"] = "PARTICLE.velocity"; + //actions.renames["MASS"] = "mass"; ? + actions.renames["ACTIVE"] = "PARTICLE.is_active"; + actions.renames["RESTART"] = "restart"; + actions.renames["CUSTOM"] = "PARTICLE.custom"; + actions.renames["TRANSFORM"] = "PARTICLE.xform"; + actions.renames["TIME"] = "FRAME.time"; + actions.renames["LIFETIME"] = "params.lifetime"; + actions.renames["DELTA"] = "local_delta"; + actions.renames["NUMBER"] = "particle"; + actions.renames["INDEX"] = "index"; + //actions.renames["GRAVITY"] = "current_gravity"; + actions.renames["EMISSION_TRANSFORM"] = "FRAME.emission_transform"; + actions.renames["RANDOM_SEED"] = "FRAME.random_seed"; + actions.renames["FLAG_EMIT_POSITION"] = "EMISSION_FLAG_HAS_POSITION"; + actions.renames["FLAG_EMIT_ROT_SCALE"] = "EMISSION_FLAG_HAS_ROTATION_SCALE"; + actions.renames["FLAG_EMIT_VELOCITY"] = "EMISSION_FLAG_HAS_VELOCITY"; + actions.renames["FLAG_EMIT_COLOR"] = "EMISSION_FLAG_HAS_COLOR"; + actions.renames["FLAG_EMIT_CUSTOM"] = "EMISSION_FLAG_HAS_CUSTOM"; + actions.renames["RESTART_POSITION"] = "restart_position"; + actions.renames["RESTART_ROT_SCALE"] = "restart_rotation_scale"; + actions.renames["RESTART_VELOCITY"] = "restart_velocity"; + actions.renames["RESTART_COLOR"] = "restart_color"; + actions.renames["RESTART_CUSTOM"] = "restart_custom"; + actions.renames["emit_particle"] = "emit_particle"; + + actions.render_mode_defines["disable_force"] = "#define DISABLE_FORCE\n"; + actions.render_mode_defines["disable_velocity"] = "#define DISABLE_VELOCITY\n"; + actions.render_mode_defines["keep_data"] = "#define ENABLE_KEEP_DATA\n"; + + actions.sampler_array_name = "material_samplers"; + actions.base_texture_binding_index = 1; + actions.texture_layout_set = 2; + actions.base_uniform_string = "material."; + actions.base_varying_index = 10; + + actions.default_filter = ShaderLanguage::FILTER_LINEAR_MIPMAP; + actions.default_repeat = ShaderLanguage::REPEAT_ENABLE; + actions.global_buffer_array_variable = "global_variables.data"; + + particles_shader.compiler.initialize(actions); + } + + { + // default material and shader for particles shader + particles_shader.default_shader = shader_create(); + shader_set_code(particles_shader.default_shader, "shader_type particles; void compute() { COLOR = vec4(1.0); } \n"); + particles_shader.default_material = material_create(); + material_set_shader(particles_shader.default_material, particles_shader.default_shader); + + ParticlesMaterialData *md = (ParticlesMaterialData *)material_get_data(particles_shader.default_material, RasterizerStorageRD::SHADER_TYPE_PARTICLES); + particles_shader.default_shader_rd = particles_shader.shader.version_get_shader(md->shader_data->version, 0); + + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 1; + u.ids.resize(12); + RID *ids_ptr = u.ids.ptrw(); + ids_ptr[0] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[1] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[2] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[3] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[4] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[5] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + ids_ptr[6] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[7] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[8] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[9] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[10] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + ids_ptr[11] = sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS_ANISOTROPIC, RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + u.ids.push_back(global_variables_get_storage_buffer()); + uniforms.push_back(u); + } + + particles_shader.base_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, particles_shader.default_shader_rd, 0); + } + + default_rd_storage_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 4); + + { + Vector<String> copy_modes; + copy_modes.push_back("\n#define MODE_FILL_INSTANCES\n"); + copy_modes.push_back("\n#define MODE_FILL_SORT_BUFFER\n#define USE_SORT_BUFFER\n"); + copy_modes.push_back("\n#define MODE_FILL_INSTANCES\n#define USE_SORT_BUFFER\n"); + + particles_shader.copy_shader.initialize(copy_modes); + + particles_shader.copy_shader_version = particles_shader.copy_shader.version_create(); + + for (int i = 0; i < ParticlesShader::COPY_MODE_MAX; i++) { + particles_shader.copy_pipelines[i] = RD::get_singleton()->compute_pipeline_create(particles_shader.copy_shader.version_get_shader(particles_shader.copy_shader_version, i)); + } + } } RasterizerStorageRD::~RasterizerStorageRD() { @@ -6237,6 +7483,8 @@ RasterizerStorageRD::~RasterizerStorageRD() { } giprobe_sdf_shader.version_free(giprobe_sdf_shader_version); + RD::get_singleton()->free(default_rd_storage_buffer); + if (decal_atlas.textures.size()) { ERR_PRINT("Decal Atlas: " + itos(decal_atlas.textures.size()) + " textures were not removed from the atlas."); } diff --git a/servers/rendering/rasterizer_rd/rasterizer_storage_rd.h b/servers/rendering/rasterizer_rd/rasterizer_storage_rd.h index 6e5923953b..e14b9528cf 100644 --- a/servers/rendering/rasterizer_rd/rasterizer_storage_rd.h +++ b/servers/rendering/rasterizer_rd/rasterizer_storage_rd.h @@ -36,6 +36,8 @@ #include "servers/rendering/rasterizer_rd/rasterizer_effects_rd.h" #include "servers/rendering/rasterizer_rd/shader_compiler_rd.h" #include "servers/rendering/rasterizer_rd/shaders/giprobe_sdf.glsl.gen.h" +#include "servers/rendering/rasterizer_rd/shaders/particles.glsl.gen.h" +#include "servers/rendering/rasterizer_rd/shaders/particles_copy.glsl.gen.h" #include "servers/rendering/rendering_device.h" class RasterizerStorageRD : public RasterizerStorage { @@ -203,6 +205,14 @@ private: int height_2d; int width_2d; + struct BufferSlice3D { + Size2i size; + uint32_t offset = 0; + uint32_t buffer_size = 0; + }; + Vector<BufferSlice3D> buffer_slices_3d; + uint32_t buffer_size_3d = 0; + bool is_render_target; bool is_proxy; @@ -247,6 +257,7 @@ private: RID default_rd_textures[DEFAULT_RD_TEXTURE_MAX]; RID default_rd_samplers[RS::CANVAS_ITEM_TEXTURE_FILTER_MAX][RS::CANVAS_ITEM_TEXTURE_REPEAT_MAX]; + RID default_rd_storage_buffer; /* DECAL ATLAS */ @@ -386,6 +397,9 @@ private: uint32_t multimesh_render_index = 0; uint64_t multimesh_render_pass = 0; + + uint32_t particles_render_index = 0; + uint64_t particles_render_pass = 0; }; uint32_t blend_shape_count = 0; @@ -448,6 +462,248 @@ private: _FORCE_INLINE_ void _multimesh_re_create_aabb(MultiMesh *multimesh, const float *p_data, int p_instances); void _update_dirty_multimeshes(); + /* PARTICLES */ + + struct ParticleData { + float xform[16]; + float velocity[3]; + uint32_t active; + float color[4]; + float custom[3]; + float lifetime; + uint32_t pad[3]; + }; + + struct ParticlesFrameParams { + uint32_t emitting; + float system_phase; + float prev_system_phase; + uint32_t cycle; + + float explosiveness; + float randomness; + float time; + float delta; + + uint32_t random_seed; + uint32_t pad[3]; + + float emission_transform[16]; + }; + + struct ParticleEmissionBufferData { + }; + + struct ParticleEmissionBuffer { + struct Data { + float xform[16]; + float velocity[3]; + uint32_t flags; + float color[4]; + float custom[4]; + }; + + int32_t particle_count; + int32_t particle_max; + uint32_t pad1; + uint32_t pad2; + Data data[1]; //its 2020 and empty arrays are still non standard in C++ + }; + + struct Particles { + bool inactive; + float inactive_time; + bool emitting; + bool one_shot; + int amount; + float lifetime; + float pre_process_time; + float explosiveness; + float randomness; + bool restart_request; + AABB custom_aabb; + bool use_local_coords; + RID process_material; + + RS::ParticlesDrawOrder draw_order; + + Vector<RID> draw_passes; + + RID particle_buffer; + RID particle_instance_buffer; + RID frame_params_buffer; + + RID particles_material_uniform_set; + RID particles_copy_uniform_set; + RID particles_transforms_buffer_uniform_set; + + RID particles_sort_buffer; + RID particles_sort_uniform_set; + + bool dirty = false; + Particles *update_list = nullptr; + + RID sub_emitter; + + float phase; + float prev_phase; + uint64_t prev_ticks; + uint32_t random_seed; + + uint32_t cycle_number; + + float speed_scale; + + int fixed_fps; + bool fractional_delta; + float frame_remainder; + + bool clear; + + bool force_sub_emit = false; + + Transform emission_transform; + + Vector<uint8_t> emission_buffer_data; + + ParticleEmissionBuffer *emission_buffer = nullptr; + RID emission_storage_buffer; + + Particles() : + inactive(true), + inactive_time(0.0), + emitting(false), + one_shot(false), + amount(0), + lifetime(1.0), + pre_process_time(0.0), + explosiveness(0.0), + randomness(0.0), + restart_request(false), + custom_aabb(AABB(Vector3(-4, -4, -4), Vector3(8, 8, 8))), + use_local_coords(true), + draw_order(RS::PARTICLES_DRAW_ORDER_INDEX), + prev_ticks(0), + random_seed(0), + cycle_number(0), + speed_scale(1.0), + fixed_fps(0), + fractional_delta(false), + frame_remainder(0), + clear(true) { + } + + RasterizerScene::InstanceDependency instance_dependency; + + ParticlesFrameParams frame_params; + }; + + void _particles_process(Particles *p_particles, float p_delta); + void _particles_allocate_emission_buffer(Particles *particles); + void _particles_free_data(Particles *particles); + + struct ParticlesShader { + struct PushConstant { + float lifetime; + uint32_t clear; + uint32_t total_particles; + uint32_t trail_size; + + uint32_t use_fractional_delta; + uint32_t sub_emitter_mode; + uint32_t can_emit; + uint32_t pad; + }; + + ParticlesShaderRD shader; + ShaderCompilerRD compiler; + + RID default_shader; + RID default_material; + RID default_shader_rd; + + RID base_uniform_set; + + struct CopyPushConstant { + float sort_direction[3]; + uint32_t total_particles; + }; + + enum { + COPY_MODE_FILL_INSTANCES, + COPY_MODE_FILL_SORT_BUFFER, + COPY_MODE_FILL_INSTANCES_WITH_SORT_BUFFER, + COPY_MODE_MAX, + }; + + ParticlesCopyShaderRD copy_shader; + RID copy_shader_version; + RID copy_pipelines[COPY_MODE_MAX]; + + } particles_shader; + + Particles *particle_update_list = nullptr; + + struct ParticlesShaderData : public ShaderData { + bool valid; + RID version; + + //RenderPipelineVertexFormatCacheRD pipelines[SKY_VERSION_MAX]; + Map<StringName, ShaderLanguage::ShaderNode::Uniform> uniforms; + Vector<ShaderCompilerRD::GeneratedCode::Texture> texture_uniforms; + + Vector<uint32_t> ubo_offsets; + uint32_t ubo_size; + + String path; + String code; + Map<StringName, RID> default_texture_params; + + RID pipeline; + + bool uses_time; + + virtual void set_code(const String &p_Code); + virtual void set_default_texture_param(const StringName &p_name, RID p_texture); + virtual void get_param_list(List<PropertyInfo> *p_param_list) const; + virtual void get_instance_param_list(List<RasterizerStorage::InstanceShaderParam> *p_param_list) const; + virtual bool is_param_texture(const StringName &p_param) const; + virtual bool is_animated() const; + virtual bool casts_shadows() const; + virtual Variant get_default_parameter(const StringName &p_parameter) const; + ParticlesShaderData(); + virtual ~ParticlesShaderData(); + }; + + ShaderData *_create_particles_shader_func(); + static RasterizerStorageRD::ShaderData *_create_particles_shader_funcs() { + return base_singleton->_create_particles_shader_func(); + } + + struct ParticlesMaterialData : public MaterialData { + uint64_t last_frame; + ParticlesShaderData *shader_data; + RID uniform_buffer; + RID uniform_set; + Vector<RID> texture_cache; + Vector<uint8_t> ubo_data; + bool uniform_set_updated; + + virtual void set_render_priority(int p_priority) {} + virtual void set_next_pass(RID p_pass) {} + virtual void update_parameters(const Map<StringName, Variant> &p_parameters, bool p_uniform_dirty, bool p_textures_dirty); + virtual ~ParticlesMaterialData(); + }; + + MaterialData *_create_particles_material_func(ParticlesShaderData *p_shader); + static RasterizerStorageRD::MaterialData *_create_particles_material_funcs(ShaderData *p_shader) { + return base_singleton->_create_particles_material_func(static_cast<ParticlesShaderData *>(p_shader)); + } + + void update_particles(); + + mutable RID_Owner<Particles> particles_owner; + /* Skeleton */ struct Skeleton { @@ -732,14 +988,14 @@ public: virtual RID texture_2d_create(const Ref<Image> &p_image); virtual RID texture_2d_layered_create(const Vector<Ref<Image>> &p_layers, RS::TextureLayeredType p_layered_type); - virtual RID texture_3d_create(const Vector<Ref<Image>> &p_slices); //all slices, then all the mipmaps, must be coherent + virtual RID texture_3d_create(Image::Format p_format, int p_width, int p_height, int p_depth, bool p_mipmaps, const Vector<Ref<Image>> &p_data); //all slices, then all the mipmaps, must be coherent virtual RID texture_proxy_create(RID p_base); virtual void _texture_2d_update(RID p_texture, const Ref<Image> &p_image, int p_layer, bool p_immediate); virtual void texture_2d_update_immediate(RID p_texture, const Ref<Image> &p_image, int p_layer = 0); //mostly used for video and streaming virtual void texture_2d_update(RID p_texture, const Ref<Image> &p_image, int p_layer = 0); - virtual void texture_3d_update(RID p_texture, const Ref<Image> &p_image, int p_depth, int p_mipmap); + virtual void texture_3d_update(RID p_texture, const Vector<Ref<Image>> &p_data); virtual void texture_proxy_update(RID p_texture, RID p_proxy_to); //these two APIs can be used together or in combination with the others. @@ -749,7 +1005,7 @@ public: virtual Ref<Image> texture_2d_get(RID p_texture) const; virtual Ref<Image> texture_2d_layer_get(RID p_texture, int p_layer) const; - virtual Ref<Image> texture_3d_slice_get(RID p_texture, int p_depth, int p_mipmap) const; + virtual Vector<Ref<Image>> texture_3d_get(RID p_texture) const; virtual void texture_replace(RID p_texture, RID p_by_texture); virtual void texture_set_size_override(RID p_texture, int p_width, int p_height); @@ -977,6 +1233,19 @@ public: return s->multimesh_render_index; } + _FORCE_INLINE_ uint32_t mesh_surface_get_particles_render_pass_index(RID p_mesh, uint32_t p_surface_index, uint64_t p_render_pass, uint32_t *r_index) { + Mesh *mesh = mesh_owner.getornull(p_mesh); + Mesh::Surface *s = mesh->surfaces[p_surface_index]; + + if (s->particles_render_pass != p_render_pass) { + (*r_index)++; + s->particles_render_pass = p_render_pass; + s->particles_render_index = *r_index; + } + + return s->particles_render_index; + } + /* MULTIMESH API */ RID multimesh_create(); @@ -1407,39 +1676,77 @@ public: /* PARTICLES */ - RID particles_create() { return RID(); } + RID particles_create(); - void particles_set_emitting(RID p_particles, bool p_emitting) {} - void particles_set_amount(RID p_particles, int p_amount) {} - void particles_set_lifetime(RID p_particles, float p_lifetime) {} - void particles_set_one_shot(RID p_particles, bool p_one_shot) {} - void particles_set_pre_process_time(RID p_particles, float p_time) {} - void particles_set_explosiveness_ratio(RID p_particles, float p_ratio) {} - void particles_set_randomness_ratio(RID p_particles, float p_ratio) {} - void particles_set_custom_aabb(RID p_particles, const AABB &p_aabb) {} - void particles_set_speed_scale(RID p_particles, float p_scale) {} - void particles_set_use_local_coordinates(RID p_particles, bool p_enable) {} - void particles_set_process_material(RID p_particles, RID p_material) {} - void particles_set_fixed_fps(RID p_particles, int p_fps) {} - void particles_set_fractional_delta(RID p_particles, bool p_enable) {} - void particles_restart(RID p_particles) {} + void particles_set_emitting(RID p_particles, bool p_emitting); + void particles_set_amount(RID p_particles, int p_amount); + void particles_set_lifetime(RID p_particles, float p_lifetime); + void particles_set_one_shot(RID p_particles, bool p_one_shot); + void particles_set_pre_process_time(RID p_particles, float p_time); + void particles_set_explosiveness_ratio(RID p_particles, float p_ratio); + void particles_set_randomness_ratio(RID p_particles, float p_ratio); + void particles_set_custom_aabb(RID p_particles, const AABB &p_aabb); + void particles_set_speed_scale(RID p_particles, float p_scale); + void particles_set_use_local_coordinates(RID p_particles, bool p_enable); + void particles_set_process_material(RID p_particles, RID p_material); + void particles_set_fixed_fps(RID p_particles, int p_fps); + void particles_set_fractional_delta(RID p_particles, bool p_enable); + void particles_restart(RID p_particles); + void particles_emit(RID p_particles, const Transform &p_transform, const Vector3 &p_velocity, const Color &p_color, const Color &p_custom, uint32_t p_emit_flags); + void particles_set_subemitter(RID p_particles, RID p_subemitter_particles); - void particles_set_draw_order(RID p_particles, RS::ParticlesDrawOrder p_order) {} + void particles_set_draw_order(RID p_particles, RS::ParticlesDrawOrder p_order); - void particles_set_draw_passes(RID p_particles, int p_count) {} - void particles_set_draw_pass_mesh(RID p_particles, int p_pass, RID p_mesh) {} + void particles_set_draw_passes(RID p_particles, int p_count); + void particles_set_draw_pass_mesh(RID p_particles, int p_pass, RID p_mesh); - void particles_request_process(RID p_particles) {} - AABB particles_get_current_aabb(RID p_particles) { return AABB(); } - AABB particles_get_aabb(RID p_particles) const { return AABB(); } + void particles_request_process(RID p_particles); + AABB particles_get_current_aabb(RID p_particles); + AABB particles_get_aabb(RID p_particles) const; - void particles_set_emission_transform(RID p_particles, const Transform &p_transform) {} + void particles_set_emission_transform(RID p_particles, const Transform &p_transform); - bool particles_get_emitting(RID p_particles) { return false; } - int particles_get_draw_passes(RID p_particles) const { return 0; } - RID particles_get_draw_pass_mesh(RID p_particles, int p_pass) const { return RID(); } + bool particles_get_emitting(RID p_particles); + int particles_get_draw_passes(RID p_particles) const; + RID particles_get_draw_pass_mesh(RID p_particles, int p_pass) const; + + void particles_set_view_axis(RID p_particles, const Vector3 &p_axis); + + virtual bool particles_is_inactive(RID p_particles) const; + + _FORCE_INLINE_ uint32_t particles_get_amount(RID p_particles) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND_V(!particles, 0); + + return particles->amount; + } - virtual bool particles_is_inactive(RID p_particles) const { return false; } + _FORCE_INLINE_ uint32_t particles_is_using_local_coords(RID p_particles) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND_V(!particles, false); + + return particles->use_local_coords; + } + + _FORCE_INLINE_ RID particles_get_instance_buffer_uniform_set(RID p_particles, RID p_shader, uint32_t p_set) { + Particles *particles = particles_owner.getornull(p_particles); + ERR_FAIL_COND_V(!particles, RID()); + if (particles->particles_transforms_buffer_uniform_set.is_null()) { + Vector<RD::Uniform> uniforms; + + { + RD::Uniform u; + u.type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 0; + u.ids.push_back(particles->particle_instance_buffer); + uniforms.push_back(u); + } + + particles->particles_transforms_buffer_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, p_shader, p_set); + } + + return particles->particles_transforms_buffer_uniform_set; + } /* GLOBAL VARIABLES API */ diff --git a/servers/rendering/rasterizer_rd/shader_compiler_rd.cpp b/servers/rendering/rasterizer_rd/shader_compiler_rd.cpp index 1820c39c5a..f70ddbb75a 100644 --- a/servers/rendering/rasterizer_rd/shader_compiler_rd.cpp +++ b/servers/rendering/rasterizer_rd/shader_compiler_rd.cpp @@ -537,6 +537,7 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge r_gen_code.vertex_global += struct_code; r_gen_code.fragment_global += struct_code; + r_gen_code.compute_global += struct_code; } int max_texture_uniforms = 0; @@ -591,6 +592,7 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge if (SL::is_sampler_type(E->get().type)) { r_gen_code.vertex_global += ucode; r_gen_code.fragment_global += ucode; + r_gen_code.compute_global += ucode; GeneratedCode::Texture texture; texture.name = E->key(); @@ -700,6 +702,7 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge vcode += ";\n"; r_gen_code.vertex_global += "layout(location=" + itos(index) + ") " + interp_mode + "out " + vcode; r_gen_code.fragment_global += "layout(location=" + itos(index) + ") " + interp_mode + "in " + vcode; + r_gen_code.compute_global += "layout(location=" + itos(index) + ") " + interp_mode + "out " + vcode; index++; } @@ -724,6 +727,7 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge gcode += ";\n"; r_gen_code.vertex_global += gcode; r_gen_code.fragment_global += gcode; + r_gen_code.compute_global += gcode; } Map<StringName, String> function_code; @@ -741,6 +745,7 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge Set<StringName> added_vtx; Set<StringName> added_fragment; //share for light + Set<StringName> added_compute; //share for light for (int i = 0; i < pnode->functions.size(); i++) { SL::FunctionNode *fnode = pnode->functions[i].function; @@ -763,6 +768,12 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge _dump_function_deps(pnode, fnode->name, function_code, r_gen_code.fragment_global, added_fragment); r_gen_code.light = function_code[light_name]; } + + if (fnode->name == compute_name) { + _dump_function_deps(pnode, fnode->name, function_code, r_gen_code.compute_global, added_compute); + r_gen_code.compute = function_code[compute_name]; + } + function = nullptr; } @@ -1245,6 +1256,8 @@ Error ShaderCompilerRD::compile(RS::ShaderMode p_mode, const String &p_code, Ide r_gen_code.vertex_global = String(); r_gen_code.fragment = String(); r_gen_code.fragment_global = String(); + r_gen_code.compute = String(); + r_gen_code.compute_global = String(); r_gen_code.light = String(); r_gen_code.uses_fragment_time = false; r_gen_code.uses_vertex_time = false; @@ -1266,6 +1279,7 @@ void ShaderCompilerRD::initialize(DefaultIdentifierActions p_actions) { vertex_name = "vertex"; fragment_name = "fragment"; + compute_name = "compute"; light_name = "light"; time_name = "TIME"; diff --git a/servers/rendering/rasterizer_rd/shader_compiler_rd.h b/servers/rendering/rasterizer_rd/shader_compiler_rd.h index ce94fb743f..565520ec65 100644 --- a/servers/rendering/rasterizer_rd/shader_compiler_rd.h +++ b/servers/rendering/rasterizer_rd/shader_compiler_rd.h @@ -68,6 +68,8 @@ public: String fragment_global; String fragment; String light; + String compute_global; + String compute; bool uses_global_textures; bool uses_fragment_time; @@ -104,6 +106,7 @@ private: StringName vertex_name; StringName fragment_name; StringName light_name; + StringName compute_name; StringName time_name; Set<StringName> texture_functions; diff --git a/servers/rendering/rasterizer_rd/shaders/SCsub b/servers/rendering/rasterizer_rd/shaders/SCsub index 3aa863be98..9d531d63ad 100644 --- a/servers/rendering/rasterizer_rd/shaders/SCsub +++ b/servers/rendering/rasterizer_rd/shaders/SCsub @@ -37,3 +37,6 @@ if "RD_GLSL" in env["BUILDERS"]: env.RD_GLSL("sdfgi_debug_probes.glsl") env.RD_GLSL("volumetric_fog.glsl") env.RD_GLSL("shadow_reduce.glsl") + env.RD_GLSL("particles.glsl") + env.RD_GLSL("particles_copy.glsl") + env.RD_GLSL("sort.glsl") diff --git a/servers/rendering/rasterizer_rd/shaders/copy.glsl b/servers/rendering/rasterizer_rd/shaders/copy.glsl index eb39c28fa9..e565bd8e3d 100644 --- a/servers/rendering/rasterizer_rd/shaders/copy.glsl +++ b/servers/rendering/rasterizer_rd/shaders/copy.glsl @@ -14,6 +14,7 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; #define FLAG_FLIP_Y (1 << 5) #define FLAG_FORCE_LUMINANCE (1 << 6) #define FLAG_COPY_ALL_SOURCE (1 << 7) +#define FLAG_HIGH_QUALITY_GLOW (1 << 8) layout(push_constant, binding = 1, std430) uniform Params { ivec4 section; @@ -116,17 +117,42 @@ void main() { vec4 color = vec4(0.0); if (bool(params.flags & FLAG_HORIZONTAL)) { - ivec2 base_pos = (pos + params.section.xy) << 1; + ivec2 base_pos = ((pos + params.section.xy) << 1) + ivec2(1); ivec2 section_begin = params.section.xy << 1; ivec2 section_end = section_begin + (params.section.zw << 1); - GLOW_ADD(ivec2(0, 0), 0.174938); - GLOW_ADD(ivec2(1, 0), 0.165569); - GLOW_ADD(ivec2(2, 0), 0.140367); - GLOW_ADD(ivec2(3, 0), 0.106595); - GLOW_ADD(ivec2(-1, 0), 0.165569); - GLOW_ADD(ivec2(-2, 0), 0.140367); - GLOW_ADD(ivec2(-3, 0), 0.106595); + if (bool(params.flags & FLAG_HIGH_QUALITY_GLOW)) { + //Sample from two lines to capture single pixel features + GLOW_ADD(ivec2(0, 0), 0.152781); + GLOW_ADD(ivec2(1, 0), 0.144599); + GLOW_ADD(ivec2(2, 0), 0.122589); + GLOW_ADD(ivec2(3, 0), 0.093095); + GLOW_ADD(ivec2(4, 0), 0.063327); + GLOW_ADD(ivec2(-1, 0), 0.144599); + GLOW_ADD(ivec2(-2, 0), 0.122589); + GLOW_ADD(ivec2(-3, 0), 0.093095); + GLOW_ADD(ivec2(-4, 0), 0.063327); + + GLOW_ADD(ivec2(0, 1), 0.152781); + GLOW_ADD(ivec2(1, 1), 0.144599); + GLOW_ADD(ivec2(2, 1), 0.122589); + GLOW_ADD(ivec2(3, 1), 0.093095); + GLOW_ADD(ivec2(4, 1), 0.063327); + GLOW_ADD(ivec2(-1, 1), 0.144599); + GLOW_ADD(ivec2(-2, 1), 0.122589); + GLOW_ADD(ivec2(-3, 1), 0.093095); + GLOW_ADD(ivec2(-4, 1), 0.063327); + color *= 0.5; + } else { + GLOW_ADD(ivec2(0, 0), 0.174938); + GLOW_ADD(ivec2(1, 0), 0.165569); + GLOW_ADD(ivec2(2, 0), 0.140367); + GLOW_ADD(ivec2(3, 0), 0.106595); + GLOW_ADD(ivec2(-1, 0), 0.165569); + GLOW_ADD(ivec2(-2, 0), 0.140367); + GLOW_ADD(ivec2(-3, 0), 0.106595); + } + color *= params.glow_strength; } else { ivec2 base_pos = pos + params.section.xy; diff --git a/servers/rendering/rasterizer_rd/shaders/particles.glsl b/servers/rendering/rasterizer_rd/shaders/particles.glsl new file mode 100644 index 0000000000..3de807b57c --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/particles.glsl @@ -0,0 +1,394 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +#define SAMPLER_NEAREST_CLAMP 0 +#define SAMPLER_LINEAR_CLAMP 1 +#define SAMPLER_NEAREST_WITH_MIPMAPS_CLAMP 2 +#define SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP 3 +#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_CLAMP 4 +#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_CLAMP 5 +#define SAMPLER_NEAREST_REPEAT 6 +#define SAMPLER_LINEAR_REPEAT 7 +#define SAMPLER_NEAREST_WITH_MIPMAPS_REPEAT 8 +#define SAMPLER_LINEAR_WITH_MIPMAPS_REPEAT 9 +#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_REPEAT 10 +#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_REPEAT 11 + +/* SET 0: GLOBAL DATA */ + +layout(set = 0, binding = 1) uniform sampler material_samplers[12]; + +layout(set = 0, binding = 2, std430) restrict readonly buffer GlobalVariableData { + vec4 data[]; +} +global_variables; + +/* Set 1: FRAME AND PARTICLE DATA */ + +// a frame history is kept for trail deterministic behavior +struct FrameParams { + bool emitting; + float system_phase; + float prev_system_phase; + uint cycle; + + float explosiveness; + float randomness; + float time; + float delta; + + uint random_seed; + uint pad[3]; + + mat4 emission_transform; +}; + +layout(set = 1, binding = 0, std430) restrict buffer FrameHistory { + FrameParams data[]; +} +frame_history; + +struct ParticleData { + mat4 xform; + vec3 velocity; + bool is_active; + vec4 color; + vec4 custom; +}; + +layout(set = 1, binding = 1, std430) restrict buffer Particles { + ParticleData data[]; +} +particles; + +#define EMISSION_FLAG_HAS_POSITION 1 +#define EMISSION_FLAG_HAS_ROTATION_SCALE 2 +#define EMISSION_FLAG_HAS_VELOCITY 4 +#define EMISSION_FLAG_HAS_COLOR 8 +#define EMISSION_FLAG_HAS_CUSTOM 16 + +struct ParticleEmission { + mat4 xform; + vec3 velocity; + uint flags; + vec4 color; + vec4 custom; +}; + +layout(set = 1, binding = 2, std430) restrict volatile coherent buffer SourceEmission { + int particle_count; + uint pad0; + uint pad1; + uint pad2; + ParticleEmission data[]; +} +src_particles; + +layout(set = 1, binding = 3, std430) restrict volatile coherent buffer DestEmission { + int particle_count; + int particle_max; + uint pad1; + uint pad2; + ParticleEmission data[]; +} +dst_particles; + +/* SET 2: MATERIAL */ + +#ifdef USE_MATERIAL_UNIFORMS +layout(set = 2, binding = 0, std140) uniform MaterialUniforms{ + /* clang-format off */ +MATERIAL_UNIFORMS + /* clang-format on */ +} material; +#endif + +layout(push_constant, binding = 0, std430) uniform Params { + float lifetime; + bool clear; + uint total_particles; + uint trail_size; + bool use_fractional_delta; + bool sub_emitter_mode; + bool can_emit; + uint pad; +} +params; + +uint hash(uint x) { + x = ((x >> uint(16)) ^ x) * uint(0x45d9f3b); + x = ((x >> uint(16)) ^ x) * uint(0x45d9f3b); + x = (x >> uint(16)) ^ x; + return x; +} + +bool emit_particle(mat4 p_xform, vec3 p_velocity, vec4 p_color, vec4 p_custom, uint p_flags) { + if (!params.can_emit) { + return false; + } + + bool valid = false; + + int dst_index = atomicAdd(dst_particles.particle_count, 1); + + if (dst_index >= dst_particles.particle_max) { + atomicAdd(dst_particles.particle_count, -1); + return false; + } + /* + valid = true; + + int attempts = 256; // never trust compute + while(attempts-- > 0) { + dst_index = dst_particles.particle_count; + if (dst_index == dst_particles.particle_max) { + return false; //cant emit anymore + } + + if (atomicCompSwap(dst_particles.particle_count, dst_index, dst_index +1 ) != dst_index) { + continue; + } + valid=true; + break; + } + + barrier(); + + if (!valid) { + return false; //gave up (attempts exhausted) + } +*/ + dst_particles.data[dst_index].xform = p_xform; + dst_particles.data[dst_index].velocity = p_velocity; + dst_particles.data[dst_index].color = p_color; + dst_particles.data[dst_index].custom = p_custom; + dst_particles.data[dst_index].flags = p_flags; + + return true; +} + +/* clang-format off */ + +COMPUTE_SHADER_GLOBALS + +/* clang-format on */ + +void main() { + uint particle = gl_GlobalInvocationID.x; + + if (particle >= params.total_particles * params.trail_size) { + return; //discard + } + + uint index = particle / params.trail_size; + uint frame = (particle % params.trail_size); + +#define FRAME frame_history.data[frame] +#define PARTICLE particles.data[particle] + + bool apply_forces = true; + bool apply_velocity = true; + float local_delta = FRAME.delta; + + float mass = 1.0; + + bool restart = false; + + bool restart_position = false; + bool restart_rotation_scale = false; + bool restart_velocity = false; + bool restart_color = false; + bool restart_custom = false; + + if (params.clear) { + PARTICLE.color = vec4(1.0); + PARTICLE.custom = vec4(0.0); + PARTICLE.velocity = vec3(0.0); + PARTICLE.is_active = false; + PARTICLE.xform = mat4( + vec4(1.0, 0.0, 0.0, 0.0), + vec4(0.0, 1.0, 0.0, 0.0), + vec4(0.0, 0.0, 1.0, 0.0), + vec4(0.0, 0.0, 0.0, 1.0)); + } + + if (params.sub_emitter_mode) { + if (!PARTICLE.is_active) { + int src_index = atomicAdd(src_particles.particle_count, -1) - 1; + + if (src_index >= 0) { + PARTICLE.is_active = true; + restart = true; + + if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_POSITION)) { + PARTICLE.xform[3] = src_particles.data[src_index].xform[3]; + } else { + PARTICLE.xform[3] = vec4(0, 0, 0, 1); + restart_position = true; + } + if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_ROTATION_SCALE)) { + PARTICLE.xform[0] = src_particles.data[src_index].xform[0]; + PARTICLE.xform[1] = src_particles.data[src_index].xform[1]; + PARTICLE.xform[2] = src_particles.data[src_index].xform[2]; + } else { + PARTICLE.xform[0] = vec4(1, 0, 0, 0); + PARTICLE.xform[1] = vec4(0, 1, 0, 0); + PARTICLE.xform[2] = vec4(0, 0, 1, 0); + restart_rotation_scale = true; + } + if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_VELOCITY)) { + PARTICLE.velocity = src_particles.data[src_index].velocity; + } else { + PARTICLE.velocity = vec3(0); + restart_velocity = true; + } + if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_COLOR)) { + PARTICLE.color = src_particles.data[src_index].color; + } else { + PARTICLE.color = vec4(1); + restart_color = true; + } + + if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_CUSTOM)) { + PARTICLE.custom = src_particles.data[src_index].custom; + } else { + PARTICLE.custom = vec4(0); + restart_custom = true; + } + } + } + + } else if (FRAME.emitting) { + float restart_phase = float(index) / float(params.total_particles); + + if (FRAME.randomness > 0.0) { + uint seed = FRAME.cycle; + if (restart_phase >= FRAME.system_phase) { + seed -= uint(1); + } + seed *= uint(params.total_particles); + seed += uint(index); + float random = float(hash(seed) % uint(65536)) / 65536.0; + restart_phase += FRAME.randomness * random * 1.0 / float(params.total_particles); + } + + restart_phase *= (1.0 - FRAME.explosiveness); + + if (FRAME.system_phase > FRAME.prev_system_phase) { + // restart_phase >= prev_system_phase is used so particles emit in the first frame they are processed + + if (restart_phase >= FRAME.prev_system_phase && restart_phase < FRAME.system_phase) { + restart = true; + if (params.use_fractional_delta) { + local_delta = (FRAME.system_phase - restart_phase) * params.lifetime; + } + } + + } else if (FRAME.delta > 0.0) { + if (restart_phase >= FRAME.prev_system_phase) { + restart = true; + if (params.use_fractional_delta) { + local_delta = (1.0 - restart_phase + FRAME.system_phase) * params.lifetime; + } + + } else if (restart_phase < FRAME.system_phase) { + restart = true; + if (params.use_fractional_delta) { + local_delta = (FRAME.system_phase - restart_phase) * params.lifetime; + } + } + } + + uint current_cycle = FRAME.cycle; + + if (FRAME.system_phase < restart_phase) { + current_cycle -= uint(1); + } + + uint particle_number = current_cycle * uint(params.total_particles) + particle; + + if (restart) { + PARTICLE.is_active = FRAME.emitting; + restart_position = true; + restart_rotation_scale = true; + restart_velocity = true; + restart_color = true; + restart_custom = true; + } + } + + if (PARTICLE.is_active) { + /* clang-format off */ + +COMPUTE_SHADER_CODE + + /* clang-format on */ + } + +#if !defined(DISABLE_VELOCITY) + + if (PARTICLE.is_active) { + PARTICLE.xform[3].xyz += PARTICLE.velocity * local_delta; + } +#endif + +#if 0 + if (PARTICLE.is_active) { + //execute shader + + + + + //!defined(DISABLE_FORCE) + + if (false) { + vec3 force = vec3(0.0); + for (int i = 0; i < attractor_count; i++) { + vec3 rel_vec = xform[3].xyz - attractors[i].pos; + float dist = length(rel_vec); + if (attractors[i].radius < dist) + continue; + if (attractors[i].eat_radius > 0.0 && attractors[i].eat_radius > dist) { + out_velocity_active.a = 0.0; + } + + rel_vec = normalize(rel_vec); + + float attenuation = pow(dist / attractors[i].radius, attractors[i].attenuation); + + if (attractors[i].dir == vec3(0.0)) { + //towards center + force += attractors[i].strength * rel_vec * attenuation * mass; + } else { + force += attractors[i].strength * attractors[i].dir * attenuation * mass; + } + } + + out_velocity_active.xyz += force * local_delta; + } + +#if !defined(DISABLE_VELOCITY) + + if (true) { + xform[3].xyz += out_velocity_active.xyz * local_delta; + } +#endif + } else { + xform = mat4(0.0); + } + + + xform = transpose(xform); + + out_velocity_active.a = mix(0.0, 1.0, shader_active); + + out_xform_1 = xform[0]; + out_xform_2 = xform[1]; + out_xform_3 = xform[2]; +#endif +} diff --git a/servers/rendering/rasterizer_rd/shaders/particles_copy.glsl b/servers/rendering/rasterizer_rd/shaders/particles_copy.glsl new file mode 100644 index 0000000000..6c782b6045 --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/particles_copy.glsl @@ -0,0 +1,82 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +struct ParticleData { + mat4 xform; + vec3 velocity; + bool is_active; + vec4 color; + vec4 custom; +}; + +layout(set = 0, binding = 1, std430) restrict readonly buffer Particles { + ParticleData data[]; +} +particles; + +layout(set = 0, binding = 2, std430) restrict writeonly buffer Transforms { + vec4 data[]; +} +instances; + +#ifdef USE_SORT_BUFFER + +layout(set = 1, binding = 0, std430) restrict buffer SortBuffer { + vec2 data[]; +} +sort_buffer; + +#endif // USE_SORT_BUFFER + +layout(push_constant, binding = 0, std430) uniform Params { + vec3 sort_direction; + uint total_particles; +} +params; + +void main() { +#ifdef MODE_FILL_SORT_BUFFER + + uint particle = gl_GlobalInvocationID.x; + if (particle >= params.total_particles) { + return; //discard + } + + sort_buffer.data[particle].x = dot(params.sort_direction, particles.data[particle].xform[3].xyz); + sort_buffer.data[particle].y = float(particle); +#endif + +#ifdef MODE_FILL_INSTANCES + + uint particle = gl_GlobalInvocationID.x; + uint write_offset = gl_GlobalInvocationID.x * (3 + 1 + 1); //xform + color + custom + + if (particle >= params.total_particles) { + return; //discard + } + +#ifdef USE_SORT_BUFFER + particle = uint(sort_buffer.data[particle].y); //use index from sort buffer +#endif + + mat4 txform; + + if (particles.data[particle].is_active) { + txform = transpose(particles.data[particle].xform); + } else { + txform = mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); //zero scale, becomes invisible + } + + instances.data[write_offset + 0] = txform[0]; + instances.data[write_offset + 1] = txform[1]; + instances.data[write_offset + 2] = txform[2]; + instances.data[write_offset + 3] = particles.data[particle].color; + instances.data[write_offset + 4] = particles.data[particle].custom; + +#endif +} diff --git a/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl b/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl index 5993e68317..2a7b73d9aa 100644 --- a/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl +++ b/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl @@ -681,9 +681,13 @@ LIGHT_SHADER_CODE #ifndef USE_NO_SHADOWS -// Produces cheap but low-quality white noise, nothing special +// Produces cheap white noise, optmized for window-space +// Comes from: https://www.shadertoy.com/view/4djSRW +// Copyright: Dave Hoskins, MIT License float quick_hash(vec2 pos) { - return fract(sin(dot(pos * 19.19, vec2(49.5791, 97.413))) * 49831.189237); + vec3 p3 = fract(vec3(pos.xyx) * .1031); + p3 += dot(p3, p3.yzx + 33.33); + return fract((p3.x + p3.y) * p3.z); } float sample_directional_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, vec4 coord) { diff --git a/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl b/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl index d7d19897e3..dd0ca5c506 100644 --- a/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl +++ b/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl @@ -338,7 +338,7 @@ void main() { continue; //was not initialized yet, ignore } - float q_dist = distance(posf, vec3(p.xyz)); + float q_dist = distance(posf, vec3(q.xyz)); if (p.w == 0 || q_dist < p_dist) { p = q; //just replace because current is unused p_dist = q_dist; diff --git a/servers/rendering/rasterizer_rd/shaders/sort.glsl b/servers/rendering/rasterizer_rd/shaders/sort.glsl new file mode 100644 index 0000000000..e5ebb9c64b --- /dev/null +++ b/servers/rendering/rasterizer_rd/shaders/sort.glsl @@ -0,0 +1,203 @@ +#[compute] + +#version 450 + +VERSION_DEFINES + +// Original version here: +// https://github.com/GPUOpen-LibrariesAndSDKs/GPUParticles11/blob/master/gpuparticles11/src/Shaders + +// +// Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +#define SORT_SIZE 512 +#define NUM_THREADS (SORT_SIZE / 2) +#define INVERSION (16 * 2 + 8 * 3) +#define ITERATIONS 1 + +layout(local_size_x = NUM_THREADS, local_size_y = 1, local_size_z = 1) in; + +#ifndef MODE_SORT_STEP + +shared vec2 g_LDS[SORT_SIZE]; + +#endif + +layout(set = 1, binding = 0, std430) restrict buffer SortBuffer { + vec2 data[]; +} +sort_buffer; + +layout(push_constant, binding = 0, std430) uniform Params { + uint total_elements; + uint pad[3]; + ivec4 job_params; +} +params; + +void main() { +#ifdef MODE_SORT_BLOCK + + uvec3 Gid = gl_WorkGroupID; + uvec3 DTid = gl_GlobalInvocationID; + uvec3 GTid = gl_LocalInvocationID; + uint GI = gl_LocalInvocationIndex; + + int GlobalBaseIndex = int((Gid.x * SORT_SIZE) + GTid.x); + int LocalBaseIndex = int(GI); + int numElementsInThreadGroup = int(min(SORT_SIZE, params.total_elements - (Gid.x * SORT_SIZE))); + + // Load shared data + + int i; + for (i = 0; i < 2 * ITERATIONS; ++i) { + if (GI + i * NUM_THREADS < numElementsInThreadGroup) + g_LDS[LocalBaseIndex + i * NUM_THREADS] = sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS]; + } + + groupMemoryBarrier(); + barrier(); + + // Bitonic sort + for (int nMergeSize = 2; nMergeSize <= SORT_SIZE; nMergeSize = nMergeSize * 2) { + for (int nMergeSubSize = nMergeSize >> 1; nMergeSubSize > 0; nMergeSubSize = nMergeSubSize >> 1) { + for (i = 0; i < ITERATIONS; ++i) { + int tmp_index = int(GI + NUM_THREADS * i); + int index_low = tmp_index & (nMergeSubSize - 1); + int index_high = 2 * (tmp_index - index_low); + int index = index_high + index_low; + + int nSwapElem = nMergeSubSize == nMergeSize >> 1 ? index_high + (2 * nMergeSubSize - 1) - index_low : index_high + nMergeSubSize + index_low; + if (nSwapElem < numElementsInThreadGroup) { + vec2 a = g_LDS[index]; + vec2 b = g_LDS[nSwapElem]; + + if (a.x > b.x) { + g_LDS[index] = b; + g_LDS[nSwapElem] = a; + } + } + groupMemoryBarrier(); + barrier(); + } + } + } + + // Store shared data + for (i = 0; i < 2 * ITERATIONS; ++i) { + if (GI + i * NUM_THREADS < numElementsInThreadGroup) { + sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS] = g_LDS[LocalBaseIndex + i * NUM_THREADS]; + } + } + +#endif + +#ifdef MODE_SORT_STEP + + uvec3 Gid = gl_WorkGroupID; + uvec3 GTid = gl_LocalInvocationID; + + ivec4 tgp; + + tgp.x = int(Gid.x) * 256; + tgp.y = 0; + tgp.z = int(params.total_elements); + tgp.w = min(512, max(0, tgp.z - int(Gid.x) * 512)); + + uint localID = int(tgp.x) + GTid.x; // calculate threadID within this sortable-array + + uint index_low = localID & (params.job_params.x - 1); + uint index_high = 2 * (localID - index_low); + + uint index = tgp.y + index_high + index_low; + uint nSwapElem = tgp.y + index_high + params.job_params.y + params.job_params.z * index_low; + + if (nSwapElem < tgp.y + tgp.z) { + vec2 a = sort_buffer.data[index]; + vec2 b = sort_buffer.data[nSwapElem]; + + if (a.x > b.x) { + sort_buffer.data[index] = b; + sort_buffer.data[nSwapElem] = a; + } + } + +#endif + +#ifdef MODE_SORT_INNER + + uvec3 Gid = gl_WorkGroupID; + uvec3 DTid = gl_GlobalInvocationID; + uvec3 GTid = gl_LocalInvocationID; + uint GI = gl_LocalInvocationIndex; + + ivec4 tgp; + + tgp.x = int(Gid.x * 256); + tgp.y = 0; + tgp.z = int(params.total_elements.x); + tgp.w = int(min(512, max(0, params.total_elements - Gid.x * 512))); + + int GlobalBaseIndex = int(tgp.y + tgp.x * 2 + GTid.x); + int LocalBaseIndex = int(GI); + int i; + + // Load shared data + for (i = 0; i < 2; ++i) { + if (GI + i * NUM_THREADS < tgp.w) + g_LDS[LocalBaseIndex + i * NUM_THREADS] = sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS]; + } + + groupMemoryBarrier(); + barrier(); + + // sort threadgroup shared memory + for (int nMergeSubSize = SORT_SIZE >> 1; nMergeSubSize > 0; nMergeSubSize = nMergeSubSize >> 1) { + int tmp_index = int(GI); + int index_low = tmp_index & (nMergeSubSize - 1); + int index_high = 2 * (tmp_index - index_low); + int index = index_high + index_low; + + int nSwapElem = index_high + nMergeSubSize + index_low; + + if (nSwapElem < tgp.w) { + vec2 a = g_LDS[index]; + vec2 b = g_LDS[nSwapElem]; + + if (a.x > b.x) { + g_LDS[index] = b; + g_LDS[nSwapElem] = a; + } + } + groupMemoryBarrier(); + barrier(); + } + + // Store shared data + for (i = 0; i < 2; ++i) { + if (GI + i * NUM_THREADS < tgp.w) { + sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS] = g_LDS[LocalBaseIndex + i * NUM_THREADS]; + } + } + +#endif +} |