diff options
author | Juan Linietsky <reduzio@gmail.com> | 2019-09-25 16:44:44 -0300 |
---|---|---|
committer | Juan Linietsky <reduzio@gmail.com> | 2020-02-11 12:02:34 +0100 |
commit | 263bebe0237b85b1343ba17b117c8c43287ecc57 (patch) | |
tree | ca2d803d0e1e3ec664bb0988d6a7896f6f55e8b2 | |
parent | f55332ffad4622b3da5d6ebcd0806d3ce37465f1 (diff) |
Untested support for compute shaders
-rw-r--r-- | drivers/vulkan/rendering_device_vulkan.cpp | 553 | ||||
-rw-r--r-- | drivers/vulkan/rendering_device_vulkan.h | 112 | ||||
-rw-r--r-- | editor/spatial_editor_gizmos.cpp | 3 | ||||
-rw-r--r-- | gles_builders.py | 60 | ||||
-rw-r--r-- | scene/3d/skeleton.cpp | 4 | ||||
-rw-r--r-- | servers/visual/rasterizer_rd/rasterizer_scene_rd.cpp | 9 | ||||
-rw-r--r-- | servers/visual/rasterizer_rd/rasterizer_scene_rd.h | 7 | ||||
-rw-r--r-- | servers/visual/rasterizer_rd/rasterizer_storage_rd.h | 2 | ||||
-rw-r--r-- | servers/visual/rasterizer_rd/shader_rd.cpp | 121 | ||||
-rw-r--r-- | servers/visual/rasterizer_rd/shader_rd.h | 13 | ||||
-rw-r--r-- | servers/visual/rasterizer_rd/shaders/SCsub | 1 | ||||
-rw-r--r-- | servers/visual/rasterizer_rd/shaders/giprobe_lighting.glsl | 241 | ||||
-rw-r--r-- | servers/visual/rendering_device.h | 20 |
13 files changed, 1056 insertions, 90 deletions
diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index dd638bb7ba..7e9d11137e 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -1604,6 +1604,10 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T image_create_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT; } + if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT) { + image_create_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT; + } + if (p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { image_create_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; } @@ -1723,39 +1727,41 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T texture.samples = p_format.samples; texture.allowed_shared_formats = p_format.shareable_formats; - //set bound and unbound layouts - if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + //set base layout based on usage priority - texture.read_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT; - texture.barrier_aspect_mask = texture.read_aspect_mask; - if (format_has_stencil(p_format.format)) { - texture.barrier_aspect_mask |= VK_IMAGE_ASPECT_STENCIL_BIT; - } + if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT) { + //first priority, readable + texture.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT) { - texture.unbound_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - } else { - texture.unbound_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - } - texture.bound_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + } else if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT) { + //second priority, storage + + texture.layout = VK_IMAGE_LAYOUT_GENERAL; } else if (p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { + //third priority, color or depth - texture.read_aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT; - texture.barrier_aspect_mask = texture.read_aspect_mask; + texture.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT) { - texture.unbound_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - } else { - texture.unbound_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + } else if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + + texture.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + } else { + texture.layout = VK_IMAGE_LAYOUT_GENERAL; + } + + if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + + texture.read_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT; + texture.barrier_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT; + + if (format_has_stencil(p_format.format)) { + texture.barrier_aspect_mask |= VK_IMAGE_ASPECT_STENCIL_BIT; } - texture.bound_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; } else { texture.read_aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT; - texture.barrier_aspect_mask = texture.read_aspect_mask; - - texture.unbound_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - texture.bound_layout = VK_IMAGE_LAYOUT_UNDEFINED; //will never be bound + texture.barrier_aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT; } texture.bound = false; @@ -1825,7 +1831,7 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T image_memory_barrier.srcAccessMask = 0; image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - image_memory_barrier.newLayout = texture.unbound_layout; + image_memory_barrier.newLayout = texture.layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_memory_barrier.image = texture.image; @@ -1835,7 +1841,7 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T image_memory_barrier.subresourceRange.baseArrayLayer = 0; image_memory_barrier.subresourceRange.layerCount = image_create_info.arrayLayers; - vkCmdPipelineBarrier(frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier); + vkCmdPipelineBarrier(frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier); } RID id = texture_owner.make_rid(texture); @@ -2081,7 +2087,7 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con image_memory_barrier.pNext = NULL; image_memory_barrier.srcAccessMask = 0; image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_memory_barrier.oldLayout = texture->unbound_layout; + image_memory_barrier.oldLayout = texture->layout; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2221,7 +2227,7 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - image_memory_barrier.newLayout = texture->unbound_layout; + image_memory_barrier.newLayout = texture->layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_memory_barrier.image = texture->image; @@ -2231,7 +2237,7 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con image_memory_barrier.subresourceRange.baseArrayLayer = p_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier); } return OK; @@ -2368,7 +2374,7 @@ PoolVector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint3 image_memory_barrier.pNext = NULL; image_memory_barrier.srcAccessMask = 0; image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - image_memory_barrier.oldLayout = tex->unbound_layout; + image_memory_barrier.oldLayout = tex->layout; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2446,7 +2452,7 @@ PoolVector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint3 image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; - image_memory_barrier.newLayout = tex->unbound_layout; + image_memory_barrier.newLayout = tex->layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_memory_barrier.image = tex->image; @@ -2456,7 +2462,7 @@ PoolVector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint3 image_memory_barrier.subresourceRange.baseArrayLayer = p_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier); } { //make dst readable @@ -2559,7 +2565,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, image_memory_barrier.pNext = NULL; image_memory_barrier.srcAccessMask = 0; image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - image_memory_barrier.oldLayout = src_tex->unbound_layout; + image_memory_barrier.oldLayout = src_tex->layout; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2579,7 +2585,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, image_memory_barrier.pNext = NULL; image_memory_barrier.srcAccessMask = 0; image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_memory_barrier.oldLayout = dst_tex->unbound_layout; + image_memory_barrier.oldLayout = dst_tex->layout; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2631,7 +2637,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; - image_memory_barrier.newLayout = src_tex->unbound_layout; + image_memory_barrier.newLayout = src_tex->layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_memory_barrier.image = src_tex->image; @@ -2641,7 +2647,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier); } { //make dst readable @@ -2652,7 +2658,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - image_memory_barrier.newLayout = dst_tex->unbound_layout; + image_memory_barrier.newLayout = dst_tex->layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2663,7 +2669,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier); } } @@ -2733,7 +2739,8 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF ERR_FAIL_COND_V_MSG(!(p_format[i].usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_RESOLVE_ATTACHMENT_BIT)), VK_NULL_HANDLE, "Texture format for index (" + itos(i) + ") requires an attachment (depth, stencil or resolve) bit set."); - bool can_be_sampled = p_format[i].usage_flags & TEXTURE_USAGE_SAMPLING_BIT; + bool is_sampled = p_format[i].usage_flags & TEXTURE_USAGE_SAMPLING_BIT; + bool is_storage = p_format[i].usage_flags & TEXTURE_USAGE_STORAGE_BIT; switch (p_initial_action) { @@ -2745,7 +2752,7 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF case INITIAL_ACTION_KEEP_COLOR: { if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - description.initialLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; } else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; @@ -2761,10 +2768,10 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - description.initialLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); } else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - description.initialLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; //don't care what is there + description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; } else { description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; @@ -2799,12 +2806,12 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { description.storeOp = VK_ATTACHMENT_STORE_OP_STORE; description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - description.finalLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); } else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { description.storeOp = VK_ATTACHMENT_STORE_OP_STORE; description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; - description.finalLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); } else { description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; @@ -2815,12 +2822,12 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { description.storeOp = VK_ATTACHMENT_STORE_OP_STORE; description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - description.finalLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); } else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { description.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - description.finalLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); } else { description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; @@ -2831,12 +2838,12 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { description.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - description.finalLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); } else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { description.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - description.finalLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); } else { description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; @@ -3531,8 +3538,15 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages uint32_t stages_processed = 0; + bool is_compute = false; + for (int i = 0; i < p_stages.size(); i++) { + if (p_stages[i].shader_stage == SHADER_STAGE_COMPUTE) { + is_compute = true; + ERR_FAIL_COND_V_MSG(p_stages.size() != 1, RID(), + "Compute shaders can only receive one stage, dedicated to compute."); + } ERR_FAIL_COND_V_MSG(stages_processed & (1 << p_stages[i].shader_stage), RID(), "Stage " + String(shader_stage_names[p_stages[i].shader_stage]) + " submitted more than once."); @@ -3793,6 +3807,7 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages shader.vertex_input_mask = vertex_input_mask; shader.fragment_outputs = fragment_outputs; shader.push_constant = push_constant; + shader.is_compute = is_compute; String error_text; @@ -4166,6 +4181,8 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, List<Vector<VkDescriptorImageInfo> > image_infos; //used for verification to make sure a uniform set does not use a framebuffer bound texture Vector<RID> attachable_textures; + Vector<Texture *> mutable_sampled_textures; + Vector<Texture *> mutable_storage_textures; for (uint32_t i = 0; i < set_uniform_count; i++) { const UniformInfo &set_uniform = set_uniforms[i]; @@ -4259,9 +4276,14 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, ERR_FAIL_COND_V(!texture, RID()); //bug, should never happen } - img_info.imageLayout = texture->unbound_layout; + img_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; image_info.push_back(img_info); + + if (texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT) { + //can also be used as storage, add to mutable sampled + mutable_sampled_textures.push_back(texture); + } } write.dstArrayElement = 0; @@ -4306,9 +4328,14 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, ERR_FAIL_COND_V(!texture, RID()); //bug, should never happen } - img_info.imageLayout = texture->unbound_layout; + img_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; image_info.push_back(img_info); + + if (texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT) { + //can also be used as storage, add to mutable sampled + mutable_sampled_textures.push_back(texture); + } } write.dstArrayElement = 0; @@ -4321,7 +4348,54 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, type_size = uniform.ids.size(); } break; case UNIFORM_TYPE_IMAGE: { - //todo + + if (uniform.ids.size() != set_uniform.length) { + if (set_uniform.length > 1) { + ERR_FAIL_V_MSG(RID(), "Image (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") textures, so it should be provided equal number of texture IDs to satisfy it (IDs provided: " + itos(uniform.ids.size()) + ")."); + } else { + ERR_FAIL_V_MSG(RID(), "Image (binding: " + itos(uniform.binding) + ") should provide one ID referencing a texture (IDs provided: " + itos(uniform.ids.size()) + ")."); + } + } + + Vector<VkDescriptorImageInfo> image_info; + + for (int j = 0; j < uniform.ids.size(); j++) { + Texture *texture = texture_owner.getornull(uniform.ids[j]); + + ERR_FAIL_COND_V_MSG(!texture, RID(), + "Image (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid texture."); + + ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT), RID(), + "Image (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") needs the TEXTURE_USAGE_STORAGE_BIT usage flag set in order to be used as uniform."); + + VkDescriptorImageInfo img_info; + img_info.sampler = NULL; + img_info.imageView = texture->view; + + if (texture->owner.is_valid()) { + texture = texture_owner.getornull(texture->owner); + ERR_FAIL_COND_V(!texture, RID()); //bug, should never happen + } + + img_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + + image_info.push_back(img_info); + + if (texture->usage_flags & TEXTURE_USAGE_SAMPLING_BIT) { + //can also be used as storage, add to mutable sampled + mutable_storage_textures.push_back(texture); + } + } + + write.dstArrayElement = 0; + write.descriptorCount = uniform.ids.size(); + write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + write.pImageInfo = image_infos.push_back(image_info)->get().ptr(); + write.pBufferInfo = NULL; + write.pTexelBufferView = NULL; + + type_size = uniform.ids.size(); + } break; case UNIFORM_TYPE_TEXTURE_BUFFER: { if (uniform.ids.size() != set_uniform.length) { @@ -4476,6 +4550,8 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, uniform_set.descriptor_set = descriptor_set; uniform_set.format = shader->set_formats[p_shader_set]; uniform_set.attachable_textures = attachable_textures; + uniform_set.mutable_sampled_textures = mutable_sampled_textures; + uniform_set.mutable_storage_textures = mutable_storage_textures; uniform_set.shader_set = p_shader_set; uniform_set.shader_id = p_shader; @@ -4651,6 +4727,9 @@ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferForma Shader *shader = shader_owner.getornull(p_shader); ERR_FAIL_COND_V(!shader, RID()); + ERR_FAIL_COND_V_MSG(shader->is_compute, RID(), + "Compute shaders can't be used in render pipelines"); + if (p_framebuffer_format == INVALID_ID) { //if nothing provided, use an empty one (no attachments) p_framebuffer_format = framebuffer_format_create(Vector<AttachmentFormat>()); @@ -4996,7 +5075,7 @@ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferForma pipeline.validation.primitive_minimum = primitive_minimum[p_render_primitive]; #endif //create ID to associate with this pipeline - RID id = pipeline_owner.make_rid(pipeline); + RID id = render_pipeline_owner.make_rid(pipeline); //now add aall the dependencies _add_dependency(id, p_shader); return id; @@ -5004,7 +5083,55 @@ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferForma bool RenderingDeviceVulkan::render_pipeline_is_valid(RID p_pipeline) { _THREAD_SAFE_METHOD_ - return pipeline_owner.owns(p_pipeline); + return render_pipeline_owner.owns(p_pipeline); +} + +/**************************/ +/**** COMPUTE PIPELINE ****/ +/**************************/ + +RID RenderingDeviceVulkan::compute_pipeline_create(RID p_shader) { + _THREAD_SAFE_METHOD_ + + //needs a shader + Shader *shader = shader_owner.getornull(p_shader); + ERR_FAIL_COND_V(!shader, RID()); + + ERR_FAIL_COND_V_MSG(!shader->is_compute, RID(), + "Non-compute shaders can't be used in compute pipelines"); + + //finally, pipeline create info + VkComputePipelineCreateInfo compute_pipeline_create_info; + + compute_pipeline_create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + compute_pipeline_create_info.pNext = NULL; + compute_pipeline_create_info.flags = 0; + + compute_pipeline_create_info.stage = shader->pipeline_stages[0]; + compute_pipeline_create_info.layout = shader->pipeline_layout; + compute_pipeline_create_info.basePipelineHandle = NULL; + compute_pipeline_create_info.basePipelineIndex = 0; + + ComputePipeline pipeline; + VkResult err = vkCreateComputePipelines(device, NULL, 1, &compute_pipeline_create_info, NULL, &pipeline.pipeline); + ERR_FAIL_COND_V(err, RID()); + + pipeline.set_formats = shader->set_formats; + pipeline.push_constant_stages = shader->push_constant.push_constants_vk_stage; + pipeline.pipeline_layout = shader->pipeline_layout; + pipeline.shader = p_shader; + pipeline.push_constant_size = shader->push_constant.push_constant_size; + + //create ID to associate with this pipeline + RID id = compute_pipeline_owner.make_rid(pipeline); + //now add aall the dependencies + _add_dependency(id, p_shader); + return id; +} + +bool RenderingDeviceVulkan::compute_pipeline_is_valid(RID p_pipeline) { + + return compute_pipeline_owner.owns(p_pipeline); } /****************/ @@ -5055,6 +5182,8 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin_for_screen(in _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG(draw_list != NULL, INVALID_ID, "Only one draw list can be active at the same time."); + ERR_FAIL_COND_V_MSG(compute_list != NULL, INVALID_ID, "Only one draw/compute list can be active at the same time."); + VkCommandBuffer command_buffer = frames[frame].draw_command_buffer; draw_list = memnew(DrawList); draw_list->command_buffer = command_buffer; @@ -5256,6 +5385,9 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebu _THREAD_SAFE_METHOD_ + ERR_FAIL_COND_V_MSG(draw_list != NULL, INVALID_ID, "Only one draw list can be active at the same time."); + ERR_FAIL_COND_V_MSG(compute_list != NULL, INVALID_ID, "Only one draw/compute list can be active at the same time."); + Framebuffer *framebuffer = framebuffer_owner.getornull(p_framebuffer); ERR_FAIL_COND_V(!framebuffer, INVALID_ID); @@ -5530,7 +5662,7 @@ void RenderingDeviceVulkan::draw_list_bind_render_pipeline(DrawListID p_list, RI ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified."); #endif - const RenderPipeline *pipeline = pipeline_owner.getornull(p_render_pipeline); + const RenderPipeline *pipeline = render_pipeline_owner.getornull(p_render_pipeline); ERR_FAIL_COND(!pipeline); #ifdef DEBUG_ENABLED ERR_FAIL_COND(pipeline->validation.framebuffer_format != dl->validation.framebuffer_format); @@ -5887,6 +6019,284 @@ void RenderingDeviceVulkan::draw_list_end() { // * Another render pass happens (since we may be done _memory_barrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT, true); } + +/***********************/ +/**** COMPUTE LISTS ****/ +/***********************/ + +RenderingDevice::ComputeListID RenderingDeviceVulkan::compute_list_begin() { + + ERR_FAIL_COND_V_MSG(draw_list != NULL, INVALID_ID, "Only one draw list can be active at the same time."); + ERR_FAIL_COND_V_MSG(compute_list != NULL, INVALID_ID, "Only one draw/compute list can be active at the same time."); + + compute_list = memnew(ComputeList); + compute_list->command_buffer = frames[frame].draw_command_buffer; + + return ID_TYPE_COMPUTE_LIST; +} + +void RenderingDeviceVulkan::compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline) { + ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); + ERR_FAIL_COND(!compute_list); + + ComputeList *cl = compute_list; + + const ComputePipeline *pipeline = compute_pipeline_owner.getornull(p_compute_pipeline); + ERR_FAIL_COND(!pipeline); + + if (p_compute_pipeline == cl->state.pipeline) { + return; //redundant state, return. + } + + cl->state.pipeline = p_compute_pipeline; + cl->state.pipeline_layout = pipeline->pipeline_layout; + + vkCmdBindPipeline(cl->command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline); + + if (cl->state.pipeline_shader != pipeline->shader) { + // shader changed, so descriptor sets may become incompatible. + + //go through ALL sets, and unbind them (and all those above) if the format is different + + uint32_t pcount = pipeline->set_formats.size(); //formats count in this pipeline + cl->state.set_count = MAX(cl->state.set_count, pcount); + const uint32_t *pformats = pipeline->set_formats.ptr(); //pipeline set formats + + bool sets_valid = true; //once invalid, all above become invalid + for (uint32_t i = 0; i < pcount; i++) { + //if a part of the format is different, invalidate it (and the rest) + if (!sets_valid || cl->state.sets[i].pipeline_expected_format != pformats[i]) { + cl->state.sets[i].bound = false; + cl->state.sets[i].pipeline_expected_format = pformats[i]; + sets_valid = false; + } + } + + for (uint32_t i = pcount; i < cl->state.set_count; i++) { + //unbind the ones above (not used) if exist + cl->state.sets[i].bound = false; + } + + cl->state.set_count = pcount; //update set count + + if (pipeline->push_constant_size) { + cl->state.pipeline_push_constant_stages = pipeline->push_constant_stages; +#ifdef DEBUG_ENABLED + cl->validation.pipeline_push_constant_suppplied = false; +#endif + } + } + +#ifdef DEBUG_ENABLED + //update compute pass pipeline info + cl->validation.pipeline_active = true; + cl->validation.pipeline_push_constant_size = pipeline->push_constant_size; +#endif +} +void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index) { + ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); + ERR_FAIL_COND(!compute_list); + + ComputeList *cl = compute_list; + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(p_index >= limits.maxBoundDescriptorSets || p_index > MAX_UNIFORM_SETS, + "Attempting to bind a descriptor set (" + itos(p_index) + ") greater than what the hardware supports (" + itos(limits.maxBoundDescriptorSets) + ")."); +#endif + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!cl->validation.active, "Submitted Compute Lists can no longer be modified."); +#endif + + UniformSet *uniform_set = uniform_set_owner.getornull(p_uniform_set); + ERR_FAIL_COND(!uniform_set); + + if (p_index > cl->state.set_count) { + cl->state.set_count = p_index; + } + + cl->state.sets[p_index].descriptor_set = uniform_set->descriptor_set; //update set pointer + cl->state.sets[p_index].bound = false; //needs rebind + cl->state.sets[p_index].uniform_set_format = uniform_set->format; + cl->state.sets[p_index].uniform_set = p_uniform_set; + + uint32_t textures_to_sampled_count = uniform_set->mutable_sampled_textures.size(); + Texture **textures_to_sampled = uniform_set->mutable_sampled_textures.ptrw(); + + for (uint32_t i = 0; i < textures_to_sampled_count; i++) { + if (textures_to_sampled[i]->layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { + + VkImageMemoryBarrier image_memory_barrier; + image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_memory_barrier.pNext = NULL; + image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.oldLayout = textures_to_sampled[i]->layout; + image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_memory_barrier.image = textures_to_sampled[i]->image; + image_memory_barrier.subresourceRange.aspectMask = textures_to_sampled[i]->read_aspect_mask; + image_memory_barrier.subresourceRange.baseMipLevel = 0; + image_memory_barrier.subresourceRange.levelCount = textures_to_sampled[i]->mipmaps; + image_memory_barrier.subresourceRange.baseArrayLayer = 0; + image_memory_barrier.subresourceRange.layerCount = textures_to_sampled[i]->layers; + + vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier); + + textures_to_sampled[i]->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + cl->state.textures_to_sampled_layout.erase(textures_to_sampled[i]); + } + } + + uint32_t textures_to_storage_count = uniform_set->mutable_storage_textures.size(); + Texture **textures_to_storage = uniform_set->mutable_storage_textures.ptrw(); + + for (uint32_t i = 0; i < textures_to_storage_count; i++) { + if (textures_to_storage[i]->layout != VK_IMAGE_LAYOUT_GENERAL) { + + VkImageMemoryBarrier image_memory_barrier; + image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_memory_barrier.pNext = NULL; + image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.oldLayout = textures_to_storage[i]->layout; + image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + + image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_memory_barrier.image = textures_to_storage[i]->image; + image_memory_barrier.subresourceRange.aspectMask = textures_to_sampled[i]->read_aspect_mask; + image_memory_barrier.subresourceRange.baseMipLevel = 0; + image_memory_barrier.subresourceRange.levelCount = textures_to_storage[i]->mipmaps; + image_memory_barrier.subresourceRange.baseArrayLayer = 0; + image_memory_barrier.subresourceRange.layerCount = textures_to_storage[i]->layers; + + vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier); + + textures_to_storage[i]->layout = VK_IMAGE_LAYOUT_GENERAL; + + cl->state.textures_to_sampled_layout.insert(textures_to_storage[i]); //needs to go back to sampled layout afterwards + } + } + +#if 0 + { //validate that textures bound are not attached as framebuffer bindings + uint32_t attachable_count = uniform_set->attachable_textures.size(); + const RID *attachable_ptr = uniform_set->attachable_textures.ptr(); + uint32_t bound_count = draw_list_bound_textures.size(); + const RID *bound_ptr = draw_list_bound_textures.ptr(); + for (uint32_t i = 0; i < attachable_count; i++) { + for (uint32_t j = 0; j < bound_count; j++) { + ERR_FAIL_COND_MSG(attachable_ptr[i] == bound_ptr[j], + "Attempted to use the same texture in framebuffer attachment and a uniform set, this is not allowed."); + } + } + } +#endif +} +void RenderingDeviceVulkan::compute_list_set_push_constant(ComputeListID p_list, void *p_data, uint32_t p_data_size) { + ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); + ERR_FAIL_COND(!compute_list); + + ComputeList *cl = compute_list; + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!cl->validation.active, "Submitted Compute Lists can no longer be modified."); +#endif + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(p_data_size != cl->validation.pipeline_push_constant_size, + "This compute pipeline requires (" + itos(cl->validation.pipeline_push_constant_size) + ") bytes of push constant data, supplied: (" + itos(p_data_size) + ")"); +#endif + vkCmdPushConstants(cl->command_buffer, cl->state.pipeline_layout, cl->state.pipeline_push_constant_stages, 0, p_data_size, p_data); +#ifdef DEBUG_ENABLED + cl->validation.pipeline_push_constant_suppplied = true; +#endif +} +void RenderingDeviceVulkan::compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { + ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); + ERR_FAIL_COND(!compute_list); + + ComputeList *cl = compute_list; + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!cl->validation.active, "Submitted Compute Lists can no longer be modified."); +#endif + +#ifdef DEBUG_ENABLED + + ERR_FAIL_COND_MSG(!cl->validation.pipeline_active, "No compute pipeline was set before attempting to draw."); + + if (cl->validation.pipeline_push_constant_size > 0) { + //using push constants, check that they were supplied + ERR_FAIL_COND_MSG(!cl->validation.pipeline_push_constant_suppplied, + "The shader in this pipeline requires a push constant to be set before drawing, but it's not present."); + } + +#endif + + //Bind descriptor sets + + for (uint32_t i = 0; i < cl->state.set_count; i++) { + + if (cl->state.sets[i].pipeline_expected_format == 0) { + continue; //nothing expected by this pipeline + } +#ifdef DEBUG_ENABLED + if (cl->state.sets[i].pipeline_expected_format != cl->state.sets[i].uniform_set_format) { + + if (cl->state.sets[i].uniform_set_format == 0) { + ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline"); + } else if (uniform_set_owner.owns(cl->state.sets[i].uniform_set)) { + UniformSet *us = uniform_set_owner.getornull(cl->state.sets[i].uniform_set); + ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + "):\n" + _shader_uniform_debug(us->shader_id, us->shader_set) + "\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(cl->state.pipeline_shader)); + } else { + ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + ", which was was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(cl->state.pipeline_shader)); + } + } +#endif + if (!cl->state.sets[i].bound) { + //All good, see if this requires re-binding + vkCmdBindDescriptorSets(cl->command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, cl->state.pipeline_layout, i, 1, &cl->state.sets[i].descriptor_set, 0, NULL); + cl->state.sets[i].bound = true; + } + } + + vkCmdDispatch(cl->command_buffer, p_x_groups, p_y_groups, p_z_groups); +} +void RenderingDeviceVulkan::compute_list_end() { + ERR_FAIL_COND(!compute_list); + + for (Set<Texture *>::Element *E = compute_list->state.textures_to_sampled_layout.front(); E; E = E->next()) { + + VkImageMemoryBarrier image_memory_barrier; + image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_memory_barrier.pNext = NULL; + image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + image_memory_barrier.oldLayout = E->get()->layout; + image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_memory_barrier.image = E->get()->image; + image_memory_barrier.subresourceRange.aspectMask = E->get()->read_aspect_mask; + image_memory_barrier.subresourceRange.baseMipLevel = 0; + image_memory_barrier.subresourceRange.levelCount = E->get()->mipmaps; + image_memory_barrier.subresourceRange.baseArrayLayer = 0; + image_memory_barrier.subresourceRange.layerCount = E->get()->layers; + + vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier); + + E->get()->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + + memdelete(compute_list); +} + #if 0 void RenderingDeviceVulkan::draw_list_render_secondary_to_framebuffer(ID p_framebuffer, ID *p_draw_lists, uint32_t p_draw_list_count, InitialAction p_initial_action, FinalAction p_final_action, const Vector<Variant> &p_clear_colors) { @@ -5998,10 +6408,14 @@ void RenderingDeviceVulkan::_free_internal(RID p_id) { UniformSet *uniform_set = uniform_set_owner.getornull(p_id); frames[frame].uniform_sets_to_dispose_of.push_back(*uniform_set); uniform_set_owner.free(p_id); - } else if (pipeline_owner.owns(p_id)) { - RenderPipeline *pipeline = pipeline_owner.getornull(p_id); - frames[frame].pipelines_to_dispose_of.push_back(*pipeline); - pipeline_owner.free(p_id); + } else if (render_pipeline_owner.owns(p_id)) { + RenderPipeline *pipeline = render_pipeline_owner.getornull(p_id); + frames[frame].render_pipelines_to_dispose_of.push_back(*pipeline); + render_pipeline_owner.free(p_id); + } else if (compute_pipeline_owner.owns(p_id)) { + ComputePipeline *pipeline = compute_pipeline_owner.getornull(p_id); + frames[frame].compute_pipelines_to_dispose_of.push_back(*pipeline); + compute_pipeline_owner.free(p_id); } else { ERR_PRINT("Attempted to free invalid ID: " + itos(p_id.get_id())); } @@ -6022,6 +6436,10 @@ void RenderingDeviceVulkan::finalize_frame() { ERR_PRINT("Found open draw list at the end of the frame, this should never happen (further drawing will likely not work)."); } + if (compute_list) { + ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work)."); + } + { //complete the setup buffer (that needs to be processed before anything else) vkEndCommandBuffer(frames[frame].setup_command_buffer); vkEndCommandBuffer(frames[frame].draw_command_buffer); @@ -6032,12 +6450,20 @@ void RenderingDeviceVulkan::finalize_frame() { void RenderingDeviceVulkan::_free_pending_resources(int p_frame) { //free in dependency usage order, so nothing weird happens //pipelines - while (frames[p_frame].pipelines_to_dispose_of.front()) { - RenderPipeline *pipeline = &frames[p_frame].pipelines_to_dispose_of.front()->get(); + while (frames[p_frame].render_pipelines_to_dispose_of.front()) { + RenderPipeline *pipeline = &frames[p_frame].render_pipelines_to_dispose_of.front()->get(); + + vkDestroyPipeline(device, pipeline->pipeline, NULL); + + frames[p_frame].render_pipelines_to_dispose_of.pop_front(); + } + + while (frames[p_frame].compute_pipelines_to_dispose_of.front()) { + ComputePipeline *pipeline = &frames[p_frame].compute_pipelines_to_dispose_of.front()->get(); vkDestroyPipeline(device, pipeline->pipeline, NULL); - frames[p_frame].pipelines_to_dispose_of.pop_front(); + frames[p_frame].compute_pipelines_to_dispose_of.pop_front(); } //uniform sets @@ -6344,6 +6770,8 @@ void RenderingDeviceVulkan::initialize(VulkanContext *p_context) { draw_list = NULL; draw_list_count = 0; draw_list_split = false; + + compute_list = NULL; } template <class T> @@ -6430,7 +6858,8 @@ void RenderingDeviceVulkan::finalize() { _flush(false); - _free_rids(pipeline_owner, "Pipeline"); + _free_rids(render_pipeline_owner, "Pipeline"); + _free_rids(compute_pipeline_owner, "Compute"); _free_rids(uniform_set_owner, "UniformSet"); _free_rids(texture_buffer_owner, "TextureBuffer"); _free_rids(storage_buffer_owner, "StorageBuffer"); diff --git a/drivers/vulkan/rendering_device_vulkan.h b/drivers/vulkan/rendering_device_vulkan.h index be776bd6e2..2781fc71e2 100644 --- a/drivers/vulkan/rendering_device_vulkan.h +++ b/drivers/vulkan/rendering_device_vulkan.h @@ -91,6 +91,7 @@ class RenderingDeviceVulkan : public RenderingDevice { ID_TYPE_VERTEX_FORMAT, ID_TYPE_DRAW_LIST, ID_TYPE_SPLIT_DRAW_LIST, + ID_TYPE_COMPUTE_LIST, ID_TYPE_MAX, ID_BASE_SHIFT = 58 //5 bits for ID types }; @@ -138,8 +139,8 @@ class RenderingDeviceVulkan : public RenderingDevice { Vector<DataFormat> allowed_shared_formats; - VkImageLayout bound_layout; //layout used when bound to framebuffer being drawn - VkImageLayout unbound_layout; //layout used otherwise + VkImageLayout layout; + uint32_t read_aspect_mask; uint32_t barrier_aspect_mask; bool bound; //bound to framebffer @@ -286,6 +287,7 @@ class RenderingDeviceVulkan : public RenderingDevice { } }; + uint32_t storage_mask; Vector<RID> texture_ids; struct Version { @@ -519,6 +521,7 @@ class RenderingDeviceVulkan : public RenderingDevice { PushConstant push_constant; + bool is_compute = false; int max_output; Vector<Set> sets; Vector<uint32_t> set_formats; @@ -620,6 +623,8 @@ class RenderingDeviceVulkan : public RenderingDevice { VkDescriptorSet descriptor_set; //VkPipelineLayout pipeline_layout; //not owned, inherited from shader Vector<RID> attachable_textures; //used for validation + Vector<Texture *> mutable_sampled_textures; //used for layout change + Vector<Texture *> mutable_storage_textures; //used for layout change }; RID_Owner<UniformSet, true> uniform_set_owner; @@ -660,7 +665,19 @@ class RenderingDeviceVulkan : public RenderingDevice { uint32_t push_constant_stages; }; - RID_Owner<RenderPipeline, true> pipeline_owner; + RID_Owner<RenderPipeline, true> render_pipeline_owner; + + struct ComputePipeline { + + RID shader; + Vector<uint32_t> set_formats; + VkPipelineLayout pipeline_layout; // not owned, needed for push constants + VkPipeline pipeline; + uint32_t push_constant_size; + uint32_t push_constant_stages; + }; + + RID_Owner<ComputePipeline, true> compute_pipeline_owner; /*******************/ /**** DRAW LIST ****/ @@ -796,6 +813,74 @@ class RenderingDeviceVulkan : public RenderingDevice { Error _draw_list_render_pass_begin(Framebuffer *framebuffer, InitialAction p_initial_action, FinalAction p_final_action, const Vector<Color> &p_clear_colors, Point2i viewport_offset, Point2i viewport_size, VkFramebuffer vkframebuffer, VkRenderPass render_pass, VkCommandBuffer command_buffer, VkSubpassContents subpass_contents); _FORCE_INLINE_ DrawList *_get_draw_list_ptr(DrawListID p_id); + /**********************/ + /**** COMPUTE LIST ****/ + /**********************/ + + struct ComputeList { + + VkCommandBuffer command_buffer; //if persistent, this is owned, otherwise it's shared with the ringbuffer + + struct SetState { + uint32_t pipeline_expected_format; + uint32_t uniform_set_format; + VkDescriptorSet descriptor_set; + RID uniform_set; + bool bound; + SetState() { + bound = false; + pipeline_expected_format = 0; + uniform_set_format = 0; + descriptor_set = VK_NULL_HANDLE; + } + }; + + struct State { + Set<Texture *> textures_to_sampled_layout; + + SetState sets[MAX_UNIFORM_SETS]; + uint32_t set_count; + RID pipeline; + RID pipeline_shader; + VkPipelineLayout pipeline_layout; + uint32_t pipeline_push_constant_stages; + + State() { + set_count = 0; + pipeline_layout = VK_NULL_HANDLE; + pipeline_push_constant_stages = 0; + } + } state; +#ifdef DEBUG_ENABLED + + struct Validation { + bool active; //means command buffer was not closes, so you can keep adding things + Vector<uint32_t> set_formats; + Vector<bool> set_bound; + Vector<RID> set_rids; + //last pipeline set values + bool pipeline_active; + RID pipeline_shader; + uint32_t invalid_set_from; + Vector<uint32_t> pipeline_set_formats; + uint32_t pipeline_push_constant_size; + bool pipeline_push_constant_suppplied; + + Validation() { + active = true; + invalid_set_from = 0; + + //pipeline state initalize + pipeline_active = false; + pipeline_push_constant_size = 0; + pipeline_push_constant_suppplied = false; + } + } validation; +#endif + }; + + ComputeList *compute_list; + /**************************/ /**** FRAME MANAGEMENT ****/ /**************************/ @@ -823,7 +908,8 @@ class RenderingDeviceVulkan : public RenderingDevice { List<Shader> shaders_to_dispose_of; List<VkBufferView> buffer_views_to_dispose_of; List<UniformSet> uniform_sets_to_dispose_of; - List<RenderPipeline> pipelines_to_dispose_of; + List<RenderPipeline> render_pipelines_to_dispose_of; + List<ComputePipeline> compute_pipelines_to_dispose_of; VkCommandPool command_pool; VkCommandBuffer setup_command_buffer; //used at the begining of every frame for set-up @@ -940,6 +1026,13 @@ public: virtual RID render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const PipelineRasterizationState &p_rasterization_state, const PipelineMultisampleState &p_multisample_state, const PipelineDepthStencilState &p_depth_stencil_state, const PipelineColorBlendState &p_blend_state, int p_dynamic_state_flags = 0); virtual bool render_pipeline_is_valid(RID p_pipeline); + /**************************/ + /**** COMPUTE PIPELINE ****/ + /**************************/ + + virtual RID compute_pipeline_create(RID p_shader); + virtual bool compute_pipeline_is_valid(RID p_pipeline); + /****************/ /**** SCREEN ****/ /****************/ @@ -970,6 +1063,17 @@ public: virtual void draw_list_end(); + /***********************/ + /**** COMPUTE LISTS ****/ + /***********************/ + + virtual ComputeListID compute_list_begin(); + virtual void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline); + virtual void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index); + virtual void compute_list_set_push_constant(ComputeListID p_list, void *p_data, uint32_t p_data_size); + virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); + virtual void compute_list_end(); + /**************/ /**** FREE ****/ /**************/ diff --git a/editor/spatial_editor_gizmos.cpp b/editor/spatial_editor_gizmos.cpp index 68c6c12a6d..3541b132da 100644 --- a/editor/spatial_editor_gizmos.cpp +++ b/editor/spatial_editor_gizmos.cpp @@ -170,8 +170,9 @@ void EditorSpatialGizmo::Instance::create_instance(Spatial *p_base, bool p_hidde instance = VS::get_singleton()->instance_create2(mesh->get_rid(), p_base->get_world()->get_scenario()); VS::get_singleton()->instance_attach_object_instance_id(instance, p_base->get_instance_id()); - if (skin_reference.is_valid()) + if (skin_reference.is_valid()) { VS::get_singleton()->instance_attach_skeleton(instance, skin_reference->get_skeleton()); + } if (extra_margin) VS::get_singleton()->instance_set_extra_visibility_margin(instance, 1); VS::get_singleton()->instance_geometry_set_cast_shadows_setting(instance, VS::SHADOW_CASTING_SETTING_OFF); diff --git a/gles_builders.py b/gles_builders.py index cde41a8f36..7b2d1ba7d5 100644 --- a/gles_builders.py +++ b/gles_builders.py @@ -509,6 +509,7 @@ class RDHeaderStruct: def __init__(self): self.vertex_lines = [] self.fragment_lines = [] + self.compute_lines = [] self.vertex_included_files = [] self.fragment_included_files = [] @@ -517,6 +518,7 @@ class RDHeaderStruct: self.line_offset = 0 self.vertex_offset = 0 self.fragment_offset = 0 + self.compute_offset = 0 def include_file_in_rd_header(filename, header_data, depth): @@ -539,6 +541,13 @@ def include_file_in_rd_header(filename, header_data, depth): header_data.fragment_offset = header_data.line_offset continue + if line.find("[compute]") != -1: + header_data.reading = "compute" + line = fs.readline() + header_data.line_offset += 1 + header_data.compute_offset = header_data.line_offset + continue + while line.find("#include ") != -1: includeline = line.replace("#include ", "").strip()[1:-1] @@ -553,6 +562,10 @@ def include_file_in_rd_header(filename, header_data, depth): header_data.fragment_included_files += [included_file] if include_file_in_rd_header(included_file, header_data, depth + 1) is None: print("Error in file '" + filename + "': #include " + includeline + "could not be found!") + elif not included_file in header_data.compute_included_files and header_data.reading == "compute": + header_data.compute_included_files += [included_file] + if include_file_in_rd_header(included_file, header_data, depth + 1) is None: + print("Error in file '" + filename + "': #include " + includeline + "could not be found!") line = fs.readline() @@ -563,6 +576,8 @@ def include_file_in_rd_header(filename, header_data, depth): header_data.vertex_lines += [line] if header_data.reading == "fragment": header_data.fragment_lines += [line] + if header_data.reading == "compute": + header_data.compute_lines += [line] line = fs.readline() header_data.line_offset += 1 @@ -572,7 +587,7 @@ def include_file_in_rd_header(filename, header_data, depth): return header_data def build_rd_header(filename): - header_data = LegacyGLHeaderStruct() + header_data = RDHeaderStruct() include_file_in_rd_header(filename, header_data, 0) out_file = filename + ".gen.h" @@ -598,24 +613,39 @@ def build_rd_header(filename): fd.write("\t"+out_file_class+"() {\n\n") + if (len(header_data.compute_lines)): - fd.write("\t\tstatic const char _vertex_code[]={\n") - for x in header_data.vertex_lines: - for c in x: - fd.write(str(ord(c)) + ",") + fd.write("\t\tstatic const char _compute_code[]={\n") + for x in header_data.compute_lines: + for c in x: + fd.write(str(ord(c)) + ",") - fd.write(str(ord('\n')) + ",") - fd.write("\t\t0};\n\n") + fd.write(str(ord('\n')) + ",") - fd.write("\t\tstatic const char _fragment_code[]={\n") - for x in header_data.fragment_lines: - for c in x: - fd.write(str(ord(c)) + ",") + fd.write("\t\t0};\n\n") + fd.write("\t\tsetup(nullptr,nullptr,_compute_code,\""+out_file_class+"\");\n") + fd.write("\t}\n") - fd.write(str(ord('\n')) + ",") - fd.write("\t\t0};\n\n") - fd.write("\t\tsetup(_vertex_code,_fragment_code,\""+out_file_class+"\");\n") - fd.write("\t}\n") + else: + + fd.write("\t\tstatic const char _vertex_code[]={\n") + for x in header_data.vertex_lines: + for c in x: + fd.write(str(ord(c)) + ",") + + fd.write(str(ord('\n')) + ",") + fd.write("\t\t0};\n\n") + + fd.write("\t\tstatic const char _fragment_code[]={\n") + for x in header_data.fragment_lines: + for c in x: + fd.write(str(ord(c)) + ",") + + fd.write(str(ord('\n')) + ",") + + fd.write("\t\t0};\n\n") + fd.write("\t\tsetup(_vertex_code,_fragment_code,nullptr,\""+out_file_class+"\");\n") + fd.write("\t}\n") fd.write("};\n\n") diff --git a/scene/3d/skeleton.cpp b/scene/3d/skeleton.cpp index 4089e0c23b..3e4cd54664 100644 --- a/scene/3d/skeleton.cpp +++ b/scene/3d/skeleton.cpp @@ -830,7 +830,9 @@ Ref<SkinReference> Skeleton::register_skin(const Ref<Skin> &p_skin) { skin_bindings.insert(skin_ref.operator->()); skin->connect("changed", skin_ref.operator->(), "_skin_changed"); - _make_dirty(); + + _make_dirty(); //skin needs to be updated, so update skeleton + return skin_ref; } diff --git a/servers/visual/rasterizer_rd/rasterizer_scene_rd.cpp b/servers/visual/rasterizer_rd/rasterizer_scene_rd.cpp index 7bb615eda9..888568e21a 100644 --- a/servers/visual/rasterizer_rd/rasterizer_scene_rd.cpp +++ b/servers/visual/rasterizer_rd/rasterizer_scene_rd.cpp @@ -1469,6 +1469,15 @@ RasterizerSceneRD::RasterizerSceneRD(RasterizerStorageRD *p_storage) { sky_ggx_samples_realtime = GLOBAL_GET("rendering/quality/reflections/ggx_samples_realtime"); sky_use_cubemap_array = GLOBAL_GET("rendering/quality/reflections/texture_array_reflections"); // sky_use_cubemap_array = false; + + { + String defines = ""; + Vector<String> versions; + versions.push_back(""); + giprobe_lighting_shader.initialize(versions, defines); + giprobe_lighting_shader_version = giprobe_lighting_shader.version_create(); + giprobe_lighting_shader_version_shader = giprobe_lighting_shader.version_get_shader(giprobe_lighting_shader_version, 0); + } } RasterizerSceneRD::~RasterizerSceneRD() { diff --git a/servers/visual/rasterizer_rd/rasterizer_scene_rd.h b/servers/visual/rasterizer_rd/rasterizer_scene_rd.h index a1897cabe1..97e1d08d90 100644 --- a/servers/visual/rasterizer_rd/rasterizer_scene_rd.h +++ b/servers/visual/rasterizer_rd/rasterizer_scene_rd.h @@ -4,6 +4,7 @@ #include "core/rid_owner.h" #include "servers/visual/rasterizer.h" #include "servers/visual/rasterizer_rd/rasterizer_storage_rd.h" +#include "servers/visual/rasterizer_rd/shaders/giprobe_lighting.glsl.gen.h" #include "servers/visual/rendering_device.h" class RasterizerSceneRD : public RasterizerScene { @@ -108,6 +109,12 @@ private: mutable RID_Owner<ReflectionProbeInstance> reflection_probe_instance_owner; + /* GIPROBE INSTANCE */ + + GiprobeLightingShaderRD giprobe_lighting_shader; + RID giprobe_lighting_shader_version; + RID giprobe_lighting_shader_version_shader; + /* SHADOW ATLAS */ struct ShadowAtlas { diff --git a/servers/visual/rasterizer_rd/rasterizer_storage_rd.h b/servers/visual/rasterizer_rd/rasterizer_storage_rd.h index 397e3c1b95..6cdc21c2f0 100644 --- a/servers/visual/rasterizer_rd/rasterizer_storage_rd.h +++ b/servers/visual/rasterizer_rd/rasterizer_storage_rd.h @@ -778,6 +778,8 @@ public: _FORCE_INLINE_ RID skeleton_get_3d_uniform_set(RID p_skeleton, RID p_shader, uint32_t p_set) const { Skeleton *skeleton = skeleton_owner.getornull(p_skeleton); + ERR_FAIL_COND_V(!skeleton, RID()); + ERR_FAIL_COND_V(skeleton->size == 0, RID()); if (skeleton->use_2d) { return RID(); } diff --git a/servers/visual/rasterizer_rd/shader_rd.cpp b/servers/visual/rasterizer_rd/shader_rd.cpp index 58596f6a72..98e0c99c2e 100644 --- a/servers/visual/rasterizer_rd/shader_rd.cpp +++ b/servers/visual/rasterizer_rd/shader_rd.cpp @@ -33,11 +33,11 @@ #include "rasterizer_rd.h" #include "servers/visual/rendering_device.h" -void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_name) { +void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name) { name = p_name; //split vertex and shader code (thank you, shader compiler programmers from you know what company). - { + if (p_vertex_code) { String defines_tag = "\nVERSION_DEFINES"; String globals_tag = "\nVERTEX_SHADER_GLOBALS"; String material_tag = "\nMATERIAL_UNIFORMS"; @@ -79,7 +79,7 @@ void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, con } } - { + if (p_fragment_code) { String defines_tag = "\nVERSION_DEFINES"; String globals_tag = "\nFRAGMENT_SHADER_GLOBALS"; String material_tag = "\nMATERIAL_UNIFORMS"; @@ -135,6 +135,50 @@ void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, con } } } + + if (p_compute_code) { + is_compute = true; + + String defines_tag = "\nVERSION_DEFINES"; + String globals_tag = "\nCOMPUTE_SHADER_GLOBALS"; + String material_tag = "\nMATERIAL_UNIFORMS"; + String code_tag = "\nCOMPUTE_SHADER_CODE"; + String code = p_compute_code; + + int cpos = code.find(defines_tag); + if (cpos != -1) { + compute_codev = code.substr(0, cpos).ascii(); + code = code.substr(cpos + defines_tag.length(), code.length()); + } + + cpos = code.find(material_tag); + + if (cpos == -1) { + compute_code0 = code.ascii(); + } else { + compute_code0 = code.substr(0, cpos).ascii(); + code = code.substr(cpos + material_tag.length(), code.length()); + + cpos = code.find(globals_tag); + + if (cpos == -1) { + compute_code1 = code.ascii(); + } else { + + compute_code1 = code.substr(0, cpos).ascii(); + String code2 = code.substr(cpos + globals_tag.length(), code.length()); + + cpos = code2.find(code_tag); + if (cpos == -1) { + compute_code2 = code2.ascii(); + } else { + + compute_code2 = code2.substr(0, cpos).ascii(); + compute_code3 = code2.substr(cpos + code_tag.length(), code2.length()).ascii(); + } + } + } + } } RID ShaderRD::version_create() { @@ -171,7 +215,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) { RD::ShaderStage current_stage = RD::SHADER_STAGE_VERTEX; bool build_ok = true; - { + if (!is_compute) { //vertex stage StringBuilder builder; @@ -211,7 +255,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) { } } - if (build_ok) { + if (!is_compute && build_ok) { //fragment stage current_stage = RD::SHADER_STAGE_FRAGMENT; @@ -256,9 +300,50 @@ void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) { } } + if (is_compute) { + //compute stage + current_stage = RD::SHADER_STAGE_COMPUTE; + + StringBuilder builder; + + builder.append(compute_codev.get_data()); // version info (if exists) + builder.append("\n"); //make sure defines begin at newline + builder.append(general_defines.get_data()); + builder.append(variant_defines[p_variant].get_data()); + + for (int j = 0; j < p_version->custom_defines.size(); j++) { + builder.append(p_version->custom_defines[j].get_data()); + } + + builder.append(compute_code0.get_data()); //first part of compute + + builder.append(p_version->uniforms.get_data()); //uniforms (same for compute and fragment) + + builder.append(compute_code1.get_data()); //second part of compute + + builder.append(p_version->compute_globals.get_data()); // compute globals + + builder.append(compute_code2.get_data()); //third part of compute + + builder.append(p_version->compute_code.get_data()); // code + + builder.append(compute_code3.get_data()); //fourth of compute + + current_source = builder.as_string(); + RD::ShaderStageData stage; + stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_COMPUTE, current_source, RD::SHADER_LANGUAGE_GLSL, &error); + if (stage.spir_v.size() == 0) { + build_ok = false; + } else { + + stage.shader_stage = RD::SHADER_STAGE_COMPUTE; + stages.push_back(stage); + } + } + if (!build_ok) { variant_set_mutex.lock(); //properly print the errors - ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment") + " shader, variant #" + itos(p_variant) + " (" + variant_defines[p_variant].get_data() + ")."); + ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_COMPUTE ? "Compute " : (current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment")) + " shader, variant #" + itos(p_variant) + " (" + variant_defines[p_variant].get_data() + ")."); ERR_PRINT(error); #ifdef DEBUG_ENABLED @@ -319,6 +404,8 @@ void ShaderRD::_compile_version(Version *p_version) { void ShaderRD::version_set_code(RID p_version, const String &p_uniforms, const String &p_vertex_globals, const String &p_vertex_code, const String &p_fragment_globals, const String &p_fragment_light, const String &p_fragment_code, const Vector<String> &p_custom_defines) { + ERR_FAIL_COND(is_compute); + Version *version = version_owner.getornull(p_version); ERR_FAIL_COND(!version); version->vertex_globals = p_vertex_globals.utf8(); @@ -340,6 +427,28 @@ void ShaderRD::version_set_code(RID p_version, const String &p_uniforms, const S } } +void ShaderRD::version_set_compute_code(RID p_version, const String &p_uniforms, const String &p_compute_globals, const String &p_compute_code, const Vector<String> &p_custom_defines) { + + ERR_FAIL_COND(!is_compute); + + Version *version = version_owner.getornull(p_version); + ERR_FAIL_COND(!version); + version->compute_globals = p_compute_globals.utf8(); + version->compute_code = p_compute_code.utf8(); + version->uniforms = p_uniforms.utf8(); + + version->custom_defines.clear(); + for (int i = 0; i < p_custom_defines.size(); i++) { + version->custom_defines.push_back(p_custom_defines[i].utf8()); + } + + version->dirty = true; + if (version->initialize_needed) { + _compile_version(version); + version->initialize_needed = false; + } +} + bool ShaderRD::version_is_valid(RID p_version) { Version *version = version_owner.getornull(p_version); ERR_FAIL_COND_V(!version, false); diff --git a/servers/visual/rasterizer_rd/shader_rd.h b/servers/visual/rasterizer_rd/shader_rd.h index 81169343d6..4d1e9576ce 100644 --- a/servers/visual/rasterizer_rd/shader_rd.h +++ b/servers/visual/rasterizer_rd/shader_rd.h @@ -55,6 +55,8 @@ class ShaderRD { CharString uniforms; CharString vertex_globals; CharString vertex_code; + CharString compute_globals; + CharString compute_code; CharString fragment_light; CharString fragment_globals; CharString fragment_code; @@ -89,16 +91,25 @@ class ShaderRD { CharString vertex_code2; CharString vertex_code3; + bool is_compute = false; + + CharString compute_codev; //for version and extensions + CharString compute_code0; + CharString compute_code1; + CharString compute_code2; + CharString compute_code3; + const char *name; protected: ShaderRD() {} - void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_name); + void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name); public: RID version_create(); void version_set_code(RID p_version, const String &p_uniforms, const String &p_vertex_globals, const String &p_vertex_code, const String &p_fragment_globals, const String &p_fragment_light, const String &p_fragment_code, const Vector<String> &p_custom_defines); + void version_set_compute_code(RID p_version, const String &p_uniforms, const String &p_compute_globals, const String &p_compute_code, const Vector<String> &p_custom_defines); _FORCE_INLINE_ RID version_get_shader(RID p_version, int p_variant) { ERR_FAIL_INDEX_V(p_variant, variant_defines.size(), RID()); diff --git a/servers/visual/rasterizer_rd/shaders/SCsub b/servers/visual/rasterizer_rd/shaders/SCsub index b1fa9cd3f4..660523e29f 100644 --- a/servers/visual/rasterizer_rd/shaders/SCsub +++ b/servers/visual/rasterizer_rd/shaders/SCsub @@ -11,4 +11,5 @@ if 'RD_GLSL' in env['BUILDERS']: env.RD_GLSL('sky.glsl'); env.RD_GLSL('tonemap.glsl'); env.RD_GLSL('copy.glsl'); + env.RD_GLSL('giprobe_lighting.glsl'); diff --git a/servers/visual/rasterizer_rd/shaders/giprobe_lighting.glsl b/servers/visual/rasterizer_rd/shaders/giprobe_lighting.glsl new file mode 100644 index 0000000000..cec25f86f9 --- /dev/null +++ b/servers/visual/rasterizer_rd/shaders/giprobe_lighting.glsl @@ -0,0 +1,241 @@ +[compute] + +#version 450 + +VERSION_DEFINES + +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +#define NO_CHILDREN 0xFFFFFFFF +#define GREY_VEC vec3(0.33333,0.33333,0.33333) + +struct CellPosition { + uint children[8]; +}; + + +layout(set=0,binding=1,std140) buffer CellPositions { + CellPosition data[]; +} cell_positions; + +struct CellMaterial { + uint position; // xyz 10 bits + uint albedo; //rgb albedo + uint emission; //rgb normalized with e as multiplier + uint normal; //RGB normal encoded +}; + +layout(set=0,binding=2,std140) buffer CellMaterials { + CellMaterial data[]; +} cell_materials; + +#define LIGHT_TYPE_DIRECTIONAL 0 +#define LIGHT_TYPE_OMNI 1 +#define LIGHT_TYPE_SPOT 2 + +struct Light { + + uint type; + float energy; + float radius; + float attenuation; + + vec3 color; + float spot_angle_radians; + + float advance; + float max_length; + uint pad0; + uint pad2; + + vec3 position; + float spot_attenuation; + + + vec3 direction; + bool visible; + + vec4 clip_planes[3]; +}; + +layout(set=0,binding=3,std140) buffer Lights { + Light data[]; +} lights; + + +layout(set=0,binding=4,std140) uniform Params { + vec3 limits; + float max_length; + uint size; + uint stack_size; + uint light_count; + float emission_scale; +} params; + + +layout (rgba8,set=0,binding=5) uniform restrict writeonly image3D color_tex; + + +uint raymarch(float distance,float distance_adv,vec3 from,vec3 direction) { + + uint result = NO_CHILDREN; + + while (distance > -distance_adv) { //use this to avoid precision errors + + uint cell = 0; + + ivec3 pos = ivec3(from); + ivec3 ofs = ivec3(0); + ivec3 half_size = ivec3(params.size) / 2; + if (any(lessThan(pos,ivec3(0))) || any(greaterThanEqual(pos,ivec3(params.size)))) { + return NO_CHILDREN; //outside range + } + + for (int i = 0; i < params.stack_size - 1; i++) { + + bvec3 greater = greaterThanEqual(pos,ofs+half_size); + + ofs += mix(ivec3(0),half_size,greater); + + uint child = 0; //wonder if this can be done faster + if (greater.x) { + child|=1; + } + if (greater.y) { + child|=2; + } + if (greater.z) { + child|=4; + } + + cell = cell_positions.data[cell].children[child]; + if (cell == NO_CHILDREN) + break; + + half_size >>= ivec3(1); + } + + if ( cell != NO_CHILDREN) { + return cell; //found cell! + } + + from += direction * distance_adv; + distance -= distance_adv; + } + + return NO_CHILDREN; +} + +bool compute_light_vector(uint light,uint cell, vec3 pos,out float attenuation, out vec3 light_pos) { + + if (lights.data[light].type==LIGHT_TYPE_DIRECTIONAL) { + + light_pos = pos - lights.data[light].direction * params.max_length; + attenuation = 1.0; + + } else { + + light_pos = lights.data[light].position; + float distance = length(pos - light_pos); + if (distance >= lights.data[light].radius) { + return false; + } + + attenuation = pow( distance / lights.data[light].radius + 0.0001, lights.data[light].attenuation ); + + + if (lights.data[light].type==LIGHT_TYPE_SPOT) { + + vec3 rel = normalize(pos - light_pos); + float angle = acos(dot(rel,lights.data[light].direction)); + if (angle > lights.data[light].spot_angle_radians) { + return false; + } + + float d = clamp(angle / lights.data[light].spot_angle_radians, 0, 1); + attenuation *= pow(1.0 - d, lights.data[light].spot_attenuation); + } + } + + return true; +} + +void main() { + + uint cell_index = gl_GlobalInvocationID.x; + + uvec3 posu = uvec3(cell_materials.data[cell_index].position&0x3FF,(cell_materials.data[cell_index].position>>10)&0x3FF,cell_materials.data[cell_index].position>>20); + vec3 pos = vec3(posu); + + vec3 emission = vec3(ivec3(cell_materials.data[cell_index].emission&0x3FF,(cell_materials.data[cell_index].emission>>10)&0x7FF,cell_materials.data[cell_index].emission>>21)) * params.emission_scale; + vec4 albedo = unpackUnorm4x8(cell_materials.data[cell_index].albedo); + vec4 normal = unpackSnorm4x8(cell_materials.data[cell_index].normal); //w >0.5 means, all directions + +#ifdef MODE_ANISOTROPIC + vec3 accum[6]=vec3[](vec3(0.0),vec3(0.0),vec3(0.0),vec3(0.0),vec3(0.0),vec3(0.0)); + const vec3 accum_dirs[6]=vec3[](vec3(1.0,0.0,0.0),vec3(-1.0,0.0,0.0),vec3(0.0,1.0,0.0),vec3(0.0,-1.0,0.0),vec3(0.0,0.0,1.0),vec3(0.0,0.0,-1.0)); +#else + vec3 accum = vec3(0); +#endif + + for(uint i=0;i<params.light_count;i++) { + + float attenuation; + vec3 light_pos; + + if (!compute_light_vector(i,cell_index,pos,attenuation,light_pos)) { + continue; + } + + float distance_adv = lights.data[i].advance; + + vec3 light_dir = pos - light_pos; + float distance = length(light_dir); + + light_dir=normalize(light_dir); + + distance += distance_adv - mod(distance, distance_adv); //make it reach the center of the box always + + vec3 from = pos - light_dir * distance; //approximate + + if (normal.w < 0.5 && dot(normal.xyz,light_dir)>=0) { + continue; //not facing the light + } + + uint result = raymarch(distance,distance_adv,from,lights.data[i].direction); + + if (result != cell_index) { + continue; //was occluded + } + + vec3 light = lights.data[i].color * albedo.rgb * attenuation; + +#ifdef MODE_ANISOTROPIC + for(uint j=0;j<6;j++) { + accum[j]+=max(0.0,dot(accum_dir,-light_dir))*light+emission; + } +#else + if (normal.w < 0.5) { + accum+=max(0.0,dot(normal.xyz,-light_dir))*light+emission; + } else { + //all directions + accum+=light+emission; + } +#endif + + } + +#ifdef MODE_ANISOTROPIC + + vec3 accum_total = accum[0]+accum[1]+accum[2]+accum[3]+accum[4]+accum[5]; + float accum_total_energy = max(dot(accum_total,GREY_VEC),0.00001); + vec3 iso_positive = vec3(dot(aniso[0],GREY_VEC),dot(aniso[2],GREY_VEC),dot(aniso[4],GREY_VEC))/vec3(accum_total_energy); + vec3 iso_negative = vec3(dot(aniso[1],GREY_VEC),dot(aniso[3],GREY_VEC),dot(aniso[5],GREY_VEC))/vec3(accum_total_energy); + + //store in 3D textures, total color, and isotropic magnitudes +#else + //store in 3D texture pos, accum + imageStore(color_tex,ivec3(posu),vec4(accum,albedo.a)); +#endif + +} diff --git a/servers/visual/rendering_device.h b/servers/visual/rendering_device.h index 859a9e798c..d0afb3f13b 100644 --- a/servers/visual/rendering_device.h +++ b/servers/visual/rendering_device.h @@ -882,6 +882,13 @@ public: virtual RID render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const PipelineRasterizationState &p_rasterization_state, const PipelineMultisampleState &p_multisample_state, const PipelineDepthStencilState &p_depth_stencil_state, const PipelineColorBlendState &p_blend_state, int p_dynamic_state_flags = 0) = 0; virtual bool render_pipeline_is_valid(RID p_pipeline) = 0; + /**************************/ + /**** COMPUTE PIPELINE ****/ + /**************************/ + + virtual RID compute_pipeline_create(RID p_shader) = 0; + virtual bool compute_pipeline_is_valid(RID p_pipeline) = 0; + /****************/ /**** SCREEN ****/ /****************/ @@ -930,6 +937,19 @@ public: virtual void draw_list_end() = 0; + /***********************/ + /**** COMPUTE LISTS ****/ + /***********************/ + + typedef int64_t ComputeListID; + + virtual ComputeListID compute_list_begin() = 0; + virtual void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline) = 0; + virtual void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index) = 0; + virtual void compute_list_set_push_constant(ComputeListID p_list, void *p_data, uint32_t p_data_size) = 0; + virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) = 0; + virtual void compute_list_end() = 0; + /***************/ /**** FREE! ****/ /***************/ |