diff options
-rw-r--r-- | doc/classes/ProjectSettings.xml | 2 | ||||
-rw-r--r-- | drivers/vulkan/rendering_device_vulkan.cpp | 105 | ||||
-rw-r--r-- | main/main.cpp | 3 | ||||
-rw-r--r-- | servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp | 26 | ||||
-rw-r--r-- | servers/rendering/renderer_rd/renderer_scene_render_rd.cpp | 1 | ||||
-rw-r--r-- | servers/rendering/renderer_rd/shaders/light_data_inc.glsl | 20 | ||||
-rw-r--r-- | servers/rendering/renderer_rd/shaders/scene_forward_mobile.glsl | 34 | ||||
-rw-r--r-- | servers/rendering/renderer_rd/shaders/scene_forward_mobile_inc.glsl | 16 |
8 files changed, 140 insertions, 67 deletions
diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml index 6eba469e54..fb8776430c 100644 --- a/doc/classes/ProjectSettings.xml +++ b/doc/classes/ProjectSettings.xml @@ -423,6 +423,8 @@ <member name="debug/settings/stdout/print_fps" type="bool" setter="" getter="" default="false"> Print frames per second to standard output every second. </member> + <member name="debug/settings/stdout/print_gpu_profile" type="bool" setter="" getter="" default="false"> + </member> <member name="debug/settings/stdout/verbose_stdout" type="bool" setter="" getter="" default="false"> Print more information to standard output when running. It displays information such as memory leaks, which scenes and resources are being loaded, etc. </member> diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index b5bd6cef38..38bb023f83 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -3299,13 +3299,25 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF // Also, each UNDEFINED will do an immediate layout transition (write), s.t. we must ensure execution synchronization vs. // the read. If this is a performance issue, one could track the actual last accessor of each resource, adding only that // stage + switch (is_depth ? p_initial_depth_action : p_initial_action) { case INITIAL_ACTION_CLEAR_REGION: case INITIAL_ACTION_CLEAR: { - description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there - dependency_from_external.srcStageMask |= reading_stages; + if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { + description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + } else if (p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + dependency_from_external.srcStageMask |= reading_stages; + } else { + description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there + dependency_from_external.srcStageMask |= reading_stages; + } } break; case INITIAL_ACTION_KEEP: { if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { @@ -3363,7 +3375,58 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF } } - switch (is_depth ? p_final_depth_action : p_final_action) { + bool used_last = false; + + { + int last_pass = p_passes.size() - 1; + + if (is_depth) { + //likely missing depth resolve? + if (p_passes[last_pass].depth_attachment == i) { + used_last = true; + } + } else { + if (p_passes[last_pass].resolve_attachments.size()) { + //if using resolve attachments, check resolve attachments + for (int j = 0; j < p_passes[last_pass].resolve_attachments.size(); j++) { + if (p_passes[last_pass].resolve_attachments[j] == i) { + used_last = true; + break; + } + } + } else { + for (int j = 0; j < p_passes[last_pass].color_attachments.size(); j++) { + if (p_passes[last_pass].color_attachments[j] == i) { + used_last = true; + break; + } + } + } + } + + if (!used_last) { + for (int j = 0; j < p_passes[last_pass].preserve_attachments.size(); j++) { + if (p_passes[last_pass].preserve_attachments[j] == i) { + used_last = true; + break; + } + } + } + } + + FinalAction final_action = p_final_action; + FinalAction final_depth_action = p_final_depth_action; + + if (!used_last) { + if (is_depth) { + final_depth_action = FINAL_ACTION_DISCARD; + + } else { + final_action = FINAL_ACTION_DISCARD; + } + } + + switch (is_depth ? final_depth_action : final_action) { case FINAL_ACTION_READ: { if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { description.storeOp = VK_ATTACHMENT_STORE_OP_STORE; @@ -3516,21 +3579,6 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF resolve_references.push_back(reference); } - LocalVector<uint32_t> &preserve_references = preserve_reference_array[i]; - - for (int j = 0; j < pass->preserve_attachments.size(); j++) { - int32_t attachment = pass->preserve_attachments[j]; - - ERR_FAIL_COND_V_MSG(attachment == FramebufferPass::ATTACHMENT_UNUSED, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + "). Preserve attachments can't be unused."); - - ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + ")."); - ERR_FAIL_COND_V_MSG(attachment_last_pass[attachment] == i, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it already was used for something else before in this pass."); - - attachment_last_pass[attachment] = i; - - preserve_references.push_back(attachment); - } - VkAttachmentReference &depth_stencil_reference = depth_reference_array[i]; if (pass->depth_attachment != FramebufferPass::ATTACHMENT_UNUSED) { @@ -3554,6 +3602,22 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF depth_stencil_reference.layout = VK_IMAGE_LAYOUT_UNDEFINED; } + LocalVector<uint32_t> &preserve_references = preserve_reference_array[i]; + + for (int j = 0; j < pass->preserve_attachments.size(); j++) { + int32_t attachment = pass->preserve_attachments[j]; + + ERR_FAIL_COND_V_MSG(attachment == FramebufferPass::ATTACHMENT_UNUSED, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + "). Preserve attachments can't be unused."); + + ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + ")."); + + if (attachment_last_pass[attachment] != i) { + //preserve can still be used to keep depth or color from being discarded after use + attachment_last_pass[attachment] = i; + preserve_references.push_back(attachment); + } + } + VkSubpassDescription &subpass = subpasses[i]; subpass.flags = 0; subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; @@ -8864,6 +8928,7 @@ void RenderingDeviceVulkan::_free_rids(T &p_owner, const char *p_type) { } void RenderingDeviceVulkan::capture_timestamp(const String &p_name) { + ERR_FAIL_COND_MSG(draw_list != nullptr, "Capturing timestamps during draw list creation is not allowed. Offending timestap was: " + p_name); ERR_FAIL_COND(frames[frame].timestamp_count >= max_timestamp_query_elements); //this should be optional for profiling, else it will slow things down diff --git a/main/main.cpp b/main/main.cpp index d91cc5c9bf..8bf5433bbf 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -1352,6 +1352,7 @@ Error Main::setup(const char *execpath, int argc, char *argv[], bool p_second_ph PROPERTY_HINT_RANGE, "0,1000,1")); GLOBAL_DEF("debug/settings/stdout/print_fps", false); + GLOBAL_DEF("debug/settings/stdout/print_gpu_profile", false); GLOBAL_DEF("debug/settings/stdout/verbose_stdout", false); if (!OS::get_singleton()->_verbose_stdout) { // Not manually overridden. @@ -1591,7 +1592,7 @@ Error Main::setup2(Thread::ID p_main_tid_override) { rendering_server->init(); rendering_server->set_render_loop_enabled(!disable_render_loop); - if (profile_gpu) { + if (profile_gpu || (!editor && bool(GLOBAL_GET("debug/settings/stdout/print_gpu_profile")))) { rendering_server->set_print_gpu_profile(true); } diff --git a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp index dea2975660..f810ae2454 100644 --- a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp +++ b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp @@ -519,7 +519,7 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color using_subpass_post_process = false; } - if (scene_state.used_screen_texture || scene_state.used_depth_texture) { + if (using_ssr || using_sss || scene_state.used_screen_texture || scene_state.used_depth_texture) { // can't use our last two subpasses using_subpass_transparent = false; using_subpass_post_process = false; @@ -679,17 +679,20 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color _setup_environment(p_render_data, p_render_data->reflection_probe.is_valid(), screen_size, !p_render_data->reflection_probe.is_valid(), p_default_bg_color, p_render_data->render_buffers.is_valid()); - RENDER_TIMESTAMP("Render Opaque Subpass"); + if (using_subpass_transparent && using_subpass_post_process) { + RENDER_TIMESTAMP("Render Opaque + Transparent + Tonemap"); + } else if (using_subpass_transparent) { + RENDER_TIMESTAMP("Render Opaque + Transparent"); + } else { + RENDER_TIMESTAMP("Render Opaque"); + } RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_OPAQUE, p_render_data, radiance_texture, true); - bool can_continue_color = !scene_state.used_screen_texture && !using_ssr && !using_sss; - bool can_continue_depth = !scene_state.used_depth_texture && !using_ssr && !using_sss; + bool can_continue_color = !using_subpass_transparent && !scene_state.used_screen_texture && !using_ssr && !using_sss; + bool can_continue_depth = !using_subpass_transparent && !scene_state.used_depth_texture && !using_ssr && !using_sss; { - bool will_continue_color = (can_continue_color || draw_sky || draw_sky_fog_only); - bool will_continue_depth = (can_continue_depth || draw_sky || draw_sky_fog_only); - // regular forward for now Vector<Color> c; c.push_back(clear_color.to_linear()); // our render buffer @@ -709,11 +712,11 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color // secondary command buffers need more testing at this time //multi threaded thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count()); - RD::get_singleton()->draw_list_begin_split(framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0); + RD::get_singleton()->draw_list_begin_split(framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, can_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, can_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0); RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RenderForwardMobile::_render_list_thread_function, &render_list_params); } else { //single threaded - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, can_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, can_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0); _render_list(draw_list, fb_format, &render_list_params, 0, render_list_params.element_count); } } @@ -721,8 +724,6 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color RD::get_singleton()->draw_command_end_label(); //Render Opaque Subpass if (draw_sky || draw_sky_fog_only) { - RENDER_TIMESTAMP("Render Sky Subpass"); - RD::get_singleton()->draw_command_begin_label("Draw Sky Subpass"); RD::DrawListID draw_list = RD::get_singleton()->draw_list_switch_to_next_pass(); @@ -752,7 +753,6 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color } // transparent pass - RENDER_TIMESTAMP("Render Transparent Subpass"); RD::get_singleton()->draw_command_begin_label("Render Transparent Subpass"); @@ -789,6 +789,8 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_ALL); } else { + RENDER_TIMESTAMP("Render Transparent"); + framebuffer = render_buffer->color_fbs[FB_CONFIG_ONE_PASS]; // this may be needed if we re-introduced steps that change info, not sure which do so in the previous implementation diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp index 5b734fc603..fd797b1cab 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp @@ -2010,7 +2010,6 @@ void RendererSceneRenderRD::_post_process_subpass(RID p_source_texture, RID p_fr bool can_use_effects = rb->width >= 8 && rb->height >= 8; - RENDER_TIMESTAMP("Tonemap"); RD::DrawListID draw_list = RD::get_singleton()->draw_list_switch_to_next_pass(); EffectsRD::TonemapSettings tonemap; diff --git a/servers/rendering/renderer_rd/shaders/light_data_inc.glsl b/servers/rendering/renderer_rd/shaders/light_data_inc.glsl index 2fce258cff..9155216d7e 100644 --- a/servers/rendering/renderer_rd/shaders/light_data_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/light_data_inc.glsl @@ -3,7 +3,7 @@ #define LIGHT_BAKE_STATIC 2 struct LightData { //this structure needs to be as packed as possible - vec3 position; + highp vec3 position; float inv_radius; vec3 direction; @@ -17,8 +17,8 @@ struct LightData { //this structure needs to be as packed as possible float specular_amount; bool shadow_enabled; - vec4 atlas_rect; // rect in the shadow atlas - mat4 shadow_matrix; + highp vec4 atlas_rect; // rect in the shadow atlas + highp mat4 shadow_matrix; float shadow_bias; float shadow_normal_bias; float transmittance_bias; @@ -27,7 +27,7 @@ struct LightData { //this structure needs to be as packed as possible uint mask; float shadow_volumetric_fog_fade; uint bake_mode; - vec4 projector_rect; //projector rect in srgb decal atlas + highp vec4 projector_rect; //projector rect in srgb decal atlas }; #define REFLECTION_AMBIENT_DISABLED 0 @@ -69,13 +69,13 @@ struct DirectionalLightData { vec4 shadow_bias; vec4 shadow_normal_bias; vec4 shadow_transmittance_bias; - vec4 shadow_z_range; - vec4 shadow_range_begin; + highp vec4 shadow_z_range; + highp vec4 shadow_range_begin; vec4 shadow_split_offsets; - mat4 shadow_matrix1; - mat4 shadow_matrix2; - mat4 shadow_matrix3; - mat4 shadow_matrix4; + highp mat4 shadow_matrix1; + highp mat4 shadow_matrix2; + highp mat4 shadow_matrix3; + highp mat4 shadow_matrix4; vec4 shadow_color1; vec4 shadow_color2; vec4 shadow_color3; diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_mobile.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_mobile.glsl index 681656e1d5..2babe92c1c 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_mobile.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_mobile.glsl @@ -59,27 +59,27 @@ layout(location = 11) in vec4 weight_attrib; /* Varyings */ -layout(location = 0) out vec3 vertex_interp; +layout(location = 0) highp out vec3 vertex_interp; #ifdef NORMAL_USED -layout(location = 1) out vec3 normal_interp; +layout(location = 1) mediump out vec3 normal_interp; #endif #if defined(COLOR_USED) -layout(location = 2) out vec4 color_interp; +layout(location = 2) mediump out vec4 color_interp; #endif #ifdef UV_USED -layout(location = 3) out vec2 uv_interp; +layout(location = 3) mediump out vec2 uv_interp; #endif #if defined(UV2_USED) || defined(USE_LIGHTMAP) -layout(location = 4) out vec2 uv2_interp; +layout(location = 4) mediump out vec2 uv2_interp; #endif #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) -layout(location = 5) out vec3 tangent_interp; -layout(location = 6) out vec3 binormal_interp; +layout(location = 5) mediump out vec3 tangent_interp; +layout(location = 6) mediump out vec3 binormal_interp; #endif #ifdef MATERIAL_UNIFORMS_USED @@ -370,6 +370,10 @@ void main() { #VERSION_DEFINES +//use medium precision for floats on mobile. + +precision mediump float; + /* Specialization Constants */ /* Specialization Constants (Toggles) */ @@ -395,32 +399,32 @@ layout(constant_id = 11) const bool sc_projector_use_mipmaps = true; /* Varyings */ -layout(location = 0) in vec3 vertex_interp; +layout(location = 0) highp in vec3 vertex_interp; #ifdef NORMAL_USED -layout(location = 1) in vec3 normal_interp; +layout(location = 1) mediump in vec3 normal_interp; #endif #if defined(COLOR_USED) -layout(location = 2) in vec4 color_interp; +layout(location = 2) mediump in vec4 color_interp; #endif #ifdef UV_USED -layout(location = 3) in vec2 uv_interp; +layout(location = 3) mediump in vec2 uv_interp; #endif #if defined(UV2_USED) || defined(USE_LIGHTMAP) -layout(location = 4) in vec2 uv2_interp; +layout(location = 4) mediump in vec2 uv2_interp; #endif #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) -layout(location = 5) in vec3 tangent_interp; -layout(location = 6) in vec3 binormal_interp; +layout(location = 5) mediump in vec3 tangent_interp; +layout(location = 6) mediump in vec3 binormal_interp; #endif #ifdef MODE_DUAL_PARABOLOID -layout(location = 8) in float dp_clip; +layout(location = 8) highp in float dp_clip; #endif diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_mobile_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_mobile_inc.glsl index d9682d7b23..f1e554d01c 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_mobile_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_mobile_inc.glsl @@ -16,12 +16,12 @@ /* don't exceed 128 bytes!! */ /* put instance data into our push content, not a array */ layout(push_constant, binding = 0, std430) uniform DrawCall { - mat4 transform; // 64 - 64 + highp mat4 transform; // 64 - 64 uint flags; // 04 - 68 uint instance_uniforms_ofs; //base offset in global buffer for instance variables // 04 - 72 uint gi_offset; //GI information when using lightmapping (VCT or lightmap index) // 04 - 76 uint layer_mask; // 04 - 80 - vec4 lightmap_uv_scale; // 16 - 96 doubles as uv_offset when needed + highp vec4 lightmap_uv_scale; // 16 - 96 doubles as uv_offset when needed uvec2 reflection_probes; // 08 - 104 uvec2 omni_lights; // 08 - 112 @@ -126,14 +126,14 @@ global_variables; /* Set 1: Render Pass (changes per render pass) */ layout(set = 1, binding = 0, std140) uniform SceneData { - mat4 projection_matrix; - mat4 inv_projection_matrix; - mat4 camera_matrix; - mat4 inv_camera_matrix; + highp mat4 projection_matrix; + highp mat4 inv_projection_matrix; + highp mat4 camera_matrix; + highp mat4 inv_camera_matrix; // only used for multiview - mat4 projection_matrix_view[MAX_VIEWS]; - mat4 inv_projection_matrix_view[MAX_VIEWS]; + highp mat4 projection_matrix_view[MAX_VIEWS]; + highp mat4 inv_projection_matrix_view[MAX_VIEWS]; vec2 viewport_size; vec2 screen_pixel_size; |