diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gles3/rasterizer_canvas_gles3.cpp | 40 | ||||
-rw-r--r-- | drivers/gles3/rasterizer_scene_gles3.cpp | 5 | ||||
-rw-r--r-- | drivers/gles3/shaders/SCsub | 1 | ||||
-rw-r--r-- | drivers/gles3/shaders/canvas.glsl | 51 | ||||
-rw-r--r-- | drivers/gles3/shaders/scene.glsl | 3 | ||||
-rw-r--r-- | drivers/gles3/shaders/skeleton.glsl | 269 | ||||
-rw-r--r-- | drivers/gles3/shaders/stdlib_inc.glsl | 3 | ||||
-rw-r--r-- | drivers/gles3/storage/mesh_storage.cpp | 581 | ||||
-rw-r--r-- | drivers/gles3/storage/mesh_storage.h | 38 | ||||
-rw-r--r-- | drivers/gles3/storage/utilities.cpp | 2 | ||||
-rw-r--r-- | drivers/unix/os_unix.cpp | 2 | ||||
-rw-r--r-- | drivers/vulkan/rendering_device_vulkan.cpp | 158 | ||||
-rw-r--r-- | drivers/vulkan/rendering_device_vulkan.h | 26 | ||||
-rw-r--r-- | drivers/vulkan/vulkan_context.cpp | 605 | ||||
-rw-r--r-- | drivers/vulkan/vulkan_context.h | 32 |
15 files changed, 1314 insertions, 502 deletions
diff --git a/drivers/gles3/rasterizer_canvas_gles3.cpp b/drivers/gles3/rasterizer_canvas_gles3.cpp index 1fd08ffe01..e5d4077393 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.cpp +++ b/drivers/gles3/rasterizer_canvas_gles3.cpp @@ -106,6 +106,7 @@ void RasterizerCanvasGLES3::_update_transform_to_mat4(const Transform3D &p_trans void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_item_list, const Color &p_modulate, Light *p_light_list, Light *p_directional_light_list, const Transform2D &p_canvas_transform, RS::CanvasItemTextureFilter p_default_filter, RS::CanvasItemTextureRepeat p_default_repeat, bool p_snap_2d_vertices_to_pixel, bool &r_sdf_used) { GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton(); GLES3::MaterialStorage *material_storage = GLES3::MaterialStorage::get_singleton(); + GLES3::MeshStorage *mesh_storage = GLES3::MeshStorage::get_singleton(); Transform2D canvas_transform_inverse = p_canvas_transform.affine_inverse(); @@ -389,6 +390,7 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_ Rect2 back_buffer_rect; bool backbuffer_copy = false; bool backbuffer_gen_mipmaps = false; + bool update_skeletons = false; Item *ci = p_item_list; Item *canvas_group_owner = nullptr; @@ -430,8 +432,27 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_ } } + if (ci->skeleton.is_valid()) { + const Item::Command *c = ci->commands; + + while (c) { + if (c->type == Item::Command::TYPE_MESH) { + const Item::CommandMesh *cm = static_cast<const Item::CommandMesh *>(c); + if (cm->mesh_instance.is_valid()) { + mesh_storage->mesh_instance_check_for_update(cm->mesh_instance); + update_skeletons = true; + } + } + c = c->next; + } + } + if (ci->canvas_group_owner != nullptr) { if (canvas_group_owner == nullptr) { + if (update_skeletons) { + mesh_storage->update_mesh_instances(); + update_skeletons = false; + } // Canvas group begins here, render until before this item _render_items(p_to_render_target, item_count, canvas_transform_inverse, p_light_list, starting_index, false); item_count = 0; @@ -460,6 +481,10 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_ } if (ci == canvas_group_owner) { + if (update_skeletons) { + mesh_storage->update_mesh_instances(); + update_skeletons = false; + } _render_items(p_to_render_target, item_count, canvas_transform_inverse, p_light_list, starting_index, true); item_count = 0; @@ -473,6 +498,10 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_ } if (backbuffer_copy) { + if (update_skeletons) { + mesh_storage->update_mesh_instances(); + update_skeletons = false; + } //render anything pending, including clearing if no items _render_items(p_to_render_target, item_count, canvas_transform_inverse, p_light_list, starting_index, false); @@ -497,6 +526,10 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_ items[item_count++] = ci; if (!ci->next || item_count == MAX_RENDER_ITEMS - 1) { + if (update_skeletons) { + mesh_storage->update_mesh_instances(); + update_skeletons = false; + } _render_items(p_to_render_target, item_count, canvas_transform_inverse, p_light_list, starting_index, false); //then reset item_count = 0; @@ -1311,10 +1344,6 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) { instance_uses_custom_data = true; } - if (instance_buffer == 0) { - break; - } - ERR_FAIL_COND(mesh.is_null()); uint32_t surf_count = mesh_storage->mesh_get_surface_count(mesh); @@ -1344,6 +1373,9 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) { } if (instance_count > 1) { + if (instance_buffer == 0) { + break; + } // Bind instance buffers. glBindBuffer(GL_ARRAY_BUFFER, instance_buffer); glEnableVertexAttribArray(1); diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp index 026ec85e6b..8250140c3f 100644 --- a/drivers/gles3/rasterizer_scene_gles3.cpp +++ b/drivers/gles3/rasterizer_scene_gles3.cpp @@ -420,6 +420,11 @@ void RasterizerSceneGLES3::_geometry_instance_update(RenderGeometryInstance *p_g } } else if (ginstance->data->base_type == RS::INSTANCE_MESH) { + if (mesh_storage->skeleton_is_valid(ginstance->data->skeleton)) { + if (ginstance->data->dirty_dependencies) { + mesh_storage->skeleton_update_dependency(ginstance->data->skeleton, &ginstance->data->dependency_tracker); + } + } } ginstance->store_transform_cache = store_transform; diff --git a/drivers/gles3/shaders/SCsub b/drivers/gles3/shaders/SCsub index 2686b1aa48..34713e7e29 100644 --- a/drivers/gles3/shaders/SCsub +++ b/drivers/gles3/shaders/SCsub @@ -21,3 +21,4 @@ if "GLES3_GLSL" in env["BUILDERS"]: env.GLES3_GLSL("canvas_sdf.glsl") env.GLES3_GLSL("particles.glsl") env.GLES3_GLSL("particles_copy.glsl") + env.GLES3_GLSL("skeleton.glsl") diff --git a/drivers/gles3/shaders/canvas.glsl b/drivers/gles3/shaders/canvas.glsl index ca806304c5..60139de472 100644 --- a/drivers/gles3/shaders/canvas.glsl +++ b/drivers/gles3/shaders/canvas.glsl @@ -19,9 +19,6 @@ layout(location = 0) in vec2 vertex_attrib; layout(location = 3) in vec4 color_attrib; layout(location = 4) in vec2 uv_attrib; -layout(location = 10) in uvec4 bone_attrib; -layout(location = 11) in vec4 weight_attrib; - #ifdef USE_INSTANCING layout(location = 1) in highp vec4 instance_xform0; @@ -81,8 +78,6 @@ void main() { uv = draw_data[draw_data_instance].uv_c; color = vec4(unpackHalf2x16(draw_data[draw_data_instance].color_c_rg), unpackHalf2x16(draw_data[draw_data_instance].color_c_ba)); } - uvec4 bones = uvec4(0, 0, 0, 0); - vec4 bone_weights = vec4(0.0); #elif defined(USE_ATTRIBUTES) draw_data_instance = gl_InstanceID; @@ -93,9 +88,6 @@ void main() { vec4 color = color_attrib * draw_data[draw_data_instance].modulation; vec2 uv = uv_attrib; - uvec4 bones = bone_attrib; - vec4 bone_weights = weight_attrib; - #ifdef USE_INSTANCING vec4 instance_color = vec4(unpackHalf2x16(instance_color_custom_data.x), unpackHalf2x16(instance_color_custom_data.y)); color *= instance_color; @@ -110,7 +102,6 @@ void main() { vec2 uv = draw_data[draw_data_instance].src_rect.xy + abs(draw_data[draw_data_instance].src_rect.zw) * ((draw_data[draw_data_instance].flags & FLAGS_TRANSPOSE_RECT) != uint(0) ? vertex_base.yx : vertex_base.xy); vec4 color = draw_data[draw_data_instance].modulation; vec2 vertex = draw_data[draw_data_instance].dst_rect.xy + abs(draw_data[draw_data_instance].dst_rect.zw) * mix(vertex_base, vec2(1.0, 1.0) - vertex_base, lessThan(draw_data[draw_data_instance].src_rect.zw, vec2(0.0, 0.0))); - uvec4 bones = uvec4(0, 0, 0, 0); #endif @@ -153,48 +144,6 @@ void main() { uv += 1e-5; } -#ifdef USE_ATTRIBUTES -#if 0 - if (bool(draw_data[draw_data_instance].flags & FLAGS_USE_SKELETON) && bone_weights != vec4(0.0)) { //must be a valid bone - //skeleton transform - ivec4 bone_indicesi = ivec4(bone_indices); - - uvec2 tex_ofs = bone_indicesi.x * 2; - - mat2x4 m; - m = mat2x4( - texelFetch(skeleton_buffer, tex_ofs + 0), - texelFetch(skeleton_buffer, tex_ofs + 1)) * - bone_weights.x; - - tex_ofs = bone_indicesi.y * 2; - - m += mat2x4( - texelFetch(skeleton_buffer, tex_ofs + 0), - texelFetch(skeleton_buffer, tex_ofs + 1)) * - bone_weights.y; - - tex_ofs = bone_indicesi.z * 2; - - m += mat2x4( - texelFetch(skeleton_buffer, tex_ofs + 0), - texelFetch(skeleton_buffer, tex_ofs + 1)) * - bone_weights.z; - - tex_ofs = bone_indicesi.w * 2; - - m += mat2x4( - texelFetch(skeleton_buffer, tex_ofs + 0), - texelFetch(skeleton_buffer, tex_ofs + 1)) * - bone_weights.w; - - mat4 bone_matrix = skeleton_data.skeleton_transform * transpose(mat4(m[0], m[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))) * skeleton_data.skeleton_transform_inverse; - - //outvec = bone_matrix * outvec; - } -#endif -#endif - vertex = (canvas_transform * vec4(vertex, 0.0, 1.0)).xy; vertex_interp = vertex; diff --git a/drivers/gles3/shaders/scene.glsl b/drivers/gles3/shaders/scene.glsl index f14ed24965..04dba602dd 100644 --- a/drivers/gles3/shaders/scene.glsl +++ b/drivers/gles3/shaders/scene.glsl @@ -102,7 +102,7 @@ vec3 oct_to_vec3(vec2 e) { vec3 v = vec3(e.xy, 1.0 - abs(e.x) - abs(e.y)); float t = max(-v.z, 0.0); v.xy += t * -sign(v.xy); - return v; + return normalize(v); } #ifdef USE_INSTANCING @@ -920,6 +920,7 @@ void main() { #else vec3 view = -normalize(vertex_interp); #endif + highp mat4 model_matrix = world_transform; vec3 albedo = vec3(1.0); vec3 backlight = vec3(0.0); vec4 transmittance_color = vec4(0.0, 0.0, 0.0, 1.0); diff --git a/drivers/gles3/shaders/skeleton.glsl b/drivers/gles3/shaders/skeleton.glsl new file mode 100644 index 0000000000..a1e3c098f4 --- /dev/null +++ b/drivers/gles3/shaders/skeleton.glsl @@ -0,0 +1,269 @@ +/* clang-format off */ +#[modes] + +mode_base_pass = +mode_blend_pass = #define MODE_BLEND_PASS + +#[specializations] + +MODE_2D = true +USE_BLEND_SHAPES = false +USE_SKELETON = false +USE_NORMAL = false +USE_TANGENT = false +FINAL_PASS = false +USE_EIGHT_WEIGHTS = false + +#[vertex] + +#include "stdlib_inc.glsl" + +#ifdef MODE_2D +#define VFORMAT vec2 +#else +#define VFORMAT vec3 +#endif + +#ifdef FINAL_PASS +#define OFORMAT vec2 +#else +#define OFORMAT uvec2 +#endif + +// These come from the source mesh and the output from previous passes. +layout(location = 0) in highp VFORMAT in_vertex; +#ifdef MODE_BLEND_PASS +#ifdef USE_NORMAL +layout(location = 1) in highp uvec2 in_normal; +#endif +#ifdef USE_TANGENT +layout(location = 2) in highp uvec2 in_tangent; +#endif +#else // MODE_BLEND_PASS +#ifdef USE_NORMAL +layout(location = 1) in highp vec2 in_normal; +#endif +#ifdef USE_TANGENT +layout(location = 2) in highp vec2 in_tangent; +#endif +#endif // MODE_BLEND_PASS + +#ifdef USE_SKELETON +#ifdef USE_EIGHT_WEIGHTS +layout(location = 10) in highp uvec4 in_bone_attrib; +layout(location = 11) in highp uvec4 in_bone_attrib2; +layout(location = 12) in mediump vec4 in_weight_attrib; +layout(location = 13) in mediump vec4 in_weight_attrib2; +#else +layout(location = 10) in highp uvec4 in_bone_attrib; +layout(location = 11) in mediump vec4 in_weight_attrib; +#endif + +uniform mediump sampler2D skeleton_texture; // texunit:0 +#endif + +/* clang-format on */ +#ifdef MODE_BLEND_PASS +layout(location = 3) in highp VFORMAT blend_vertex; +#ifdef USE_NORMAL +layout(location = 4) in highp vec2 blend_normal; +#endif +#ifdef USE_TANGENT +layout(location = 5) in highp vec2 blend_tangent; +#endif +#endif // MODE_BLEND_PASS + +out highp VFORMAT out_vertex; //tfb: + +#ifdef USE_NORMAL +flat out highp OFORMAT out_normal; //tfb:USE_NORMAL +#endif +#ifdef USE_TANGENT +flat out highp OFORMAT out_tangent; //tfb:USE_TANGENT +#endif + +#ifdef USE_BLEND_SHAPES +uniform highp float blend_weight; +uniform lowp float blend_shape_count; +#endif + +vec2 signNotZero(vec2 v) { + return mix(vec2(-1.0), vec2(1.0), greaterThanEqual(v.xy, vec2(0.0))); +} + +vec3 oct_to_vec3(vec2 oct) { + oct = oct * 2.0 - 1.0; + vec3 v = vec3(oct.xy, 1.0 - abs(oct.x) - abs(oct.y)); + if (v.z < 0.0) { + v.xy = (1.0 - abs(v.yx)) * signNotZero(v.xy); + } + return normalize(v); +} + +vec2 vec3_to_oct(vec3 e) { + e /= abs(e.x) + abs(e.y) + abs(e.z); + vec2 oct = e.z >= 0.0f ? e.xy : (vec2(1.0f) - abs(e.yx)) * signNotZero(e.xy); + return oct * 0.5f + 0.5f; +} + +vec4 oct_to_tang(vec2 oct_sign_encoded) { + // Binormal sign encoded in y component + vec2 oct = vec2(oct_sign_encoded.x, abs(oct_sign_encoded.y) * 2.0 - 1.0); + return vec4(oct_to_vec3(oct), sign(oct_sign_encoded.y)); +} + +vec2 tang_to_oct(vec4 base) { + vec2 oct = vec3_to_oct(base.xyz); + // Encode binormal sign in y component + oct.y = oct.y * 0.5f + 0.5f; + oct.y = base.w >= 0.0f ? oct.y : 1.0 - oct.y; + return oct; +} + +// Our original input for normals and tangents is 2 16-bit floats. +// Transform Feedback has to write out 32-bits per channel. +// Octahedral compression requires normalized vectors, but we need to store +// non-normalized vectors until the very end. +// Therefore, we will compress our normals into 16 bits using signed-normalized +// fixed point precision. This works well, because we know that each normal +// is no larger than |1| so we can normalize by dividing by the number of blend +// shapes. +uvec2 vec4_to_vec2(vec4 p_vec) { + return uvec2(packSnorm2x16(p_vec.xy), packSnorm2x16(p_vec.zw)); +} + +vec4 vec2_to_vec4(uvec2 p_vec) { + return vec4(unpackSnorm2x16(p_vec.x), unpackSnorm2x16(p_vec.y)); +} + +void main() { +#ifdef MODE_2D + out_vertex = in_vertex; + +#ifdef USE_BLEND_SHAPES +#ifdef MODE_BLEND_PASS + out_vertex = in_vertex + blend_vertex * blend_weight; +#else + out_vertex = in_vertex * blend_weight; +#endif +#ifdef FINAL_PASS + out_vertex = normalize(out_vertex); +#endif +#endif // USE_BLEND_SHAPES + +#ifdef USE_SKELETON + +#define TEX(m) texelFetch(skeleton_texture, ivec2(m % 256u, m / 256u), 0) +#define GET_BONE_MATRIX(a, b, w) mat2x4(TEX(a), TEX(b)) * w + + uvec4 bones = in_bone_attrib * uvec4(2u); + uvec4 bones_a = bones + uvec4(1u); + + highp mat2x4 m = GET_BONE_MATRIX(bones.x, bones_a.x, in_weight_attrib.x); + m += GET_BONE_MATRIX(bones.y, bones_a.y, in_weight_attrib.y); + m += GET_BONE_MATRIX(bones.z, bones_a.z, in_weight_attrib.z); + m += GET_BONE_MATRIX(bones.w, bones_a.w, in_weight_attrib.w); + + mat4 bone_matrix = mat4(m[0], m[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); + + //reverse order because its transposed + out_vertex = (vec4(out_vertex, 0.0, 1.0) * bone_matrix).xy; +#endif // USE_SKELETON + +#else // MODE_2D + +#ifdef USE_BLEND_SHAPES +#ifdef MODE_BLEND_PASS + out_vertex = in_vertex + blend_vertex * blend_weight; + +#ifdef USE_NORMAL + vec3 normal = vec2_to_vec4(in_normal).xyz * blend_shape_count; + vec3 normal_blend = oct_to_vec3(blend_normal) * blend_weight; +#ifdef FINAL_PASS + out_normal = vec3_to_oct(normalize(normal + normal_blend)); +#else + out_normal = vec4_to_vec2(vec4(normal + normal_blend, 0.0) / blend_shape_count); +#endif +#endif // USE_NORMAL + +#ifdef USE_TANGENT + vec4 tangent = vec2_to_vec4(in_tangent) * blend_shape_count; + vec4 tangent_blend = oct_to_tang(blend_tangent) * blend_weight; +#ifdef FINAL_PASS + out_tangent = tang_to_oct(vec4(normalize(tangent.xyz + tangent_blend.xyz), tangent.w)); +#else + out_tangent = vec4_to_vec2(vec4((tangent.xyz + tangent_blend.xyz) / blend_shape_count, tangent.w)); +#endif +#endif // USE_TANGENT + +#else // MODE_BLEND_PASS + out_vertex = in_vertex * blend_weight; + +#ifdef USE_NORMAL + vec3 normal = oct_to_vec3(in_normal); + out_normal = vec4_to_vec2(vec4(normal * blend_weight / blend_shape_count, 0.0)); +#endif +#ifdef USE_TANGENT + vec4 tangent = oct_to_tang(in_tangent); + out_tangent = vec4_to_vec2(vec4(tangent.rgb * blend_weight / blend_shape_count, tangent.w)); +#endif +#endif // MODE_BLEND_PASS +#else // USE_BLEND_SHAPES + + // Make attributes available to the skeleton shader if not written by blend shapes. + out_vertex = in_vertex; +#ifdef USE_NORMAL + out_normal = in_normal; +#endif +#ifdef USE_TANGENT + out_tangent = in_tangent; +#endif +#endif // USE_BLEND_SHAPES + +#ifdef USE_SKELETON + +#define TEX(m) texelFetch(skeleton_texture, ivec2(m % 256u, m / 256u), 0) +#define GET_BONE_MATRIX(a, b, c, w) mat4(TEX(a), TEX(b), TEX(c), vec4(0.0, 0.0, 0.0, 1.0)) * w + + uvec4 bones = in_bone_attrib * uvec4(3); + uvec4 bones_a = bones + uvec4(1); + uvec4 bones_b = bones + uvec4(2); + + highp mat4 m; + m = GET_BONE_MATRIX(bones.x, bones_a.x, bones_b.x, in_weight_attrib.x); + m += GET_BONE_MATRIX(bones.y, bones_a.y, bones_b.y, in_weight_attrib.y); + m += GET_BONE_MATRIX(bones.z, bones_a.z, bones_b.z, in_weight_attrib.z); + m += GET_BONE_MATRIX(bones.w, bones_a.w, bones_b.w, in_weight_attrib.w); + +#ifdef USE_EIGHT_WEIGHTS + bones = in_bone_attrib2 * uvec4(3); + bones_a = bones + uvec4(1); + bones_b = bones + uvec4(2); + + m += GET_BONE_MATRIX(bones.x, bones_a.x, bones_b.x, in_weight_attrib2.x); + m += GET_BONE_MATRIX(bones.y, bones_a.y, bones_b.y, in_weight_attrib2.y); + m += GET_BONE_MATRIX(bones.z, bones_a.z, bones_b.z, in_weight_attrib2.z); + m += GET_BONE_MATRIX(bones.w, bones_a.w, bones_b.w, in_weight_attrib2.w); +#endif + + // Reverse order because its transposed. + out_vertex = (vec4(out_vertex, 1.0) * m).xyz; +#ifdef USE_NORMAL + vec3 vertex_normal = oct_to_vec3(out_normal); + out_normal = vec3_to_oct(normalize((vec4(vertex_normal, 0.0) * m).xyz)); +#endif // USE_NORMAL +#ifdef USE_TANGENT + vec4 vertex_tangent = oct_to_tang(out_tangent); + out_tangent = tang_to_oct(vec4(normalize((vec4(vertex_tangent.xyz, 0.0) * m).xyz), vertex_tangent.w)); +#endif // USE_TANGENT +#endif // USE_SKELETON +#endif // MODE_2D +} + +/* clang-format off */ +#[fragment] + +void main() { + +} +/* clang-format on */ diff --git a/drivers/gles3/shaders/stdlib_inc.glsl b/drivers/gles3/shaders/stdlib_inc.glsl index d5051760d7..d819940b1d 100644 --- a/drivers/gles3/shaders/stdlib_inc.glsl +++ b/drivers/gles3/shaders/stdlib_inc.glsl @@ -38,7 +38,6 @@ vec2 unpackSnorm2x16(uint p) { vec2 v = vec2(float(p & uint(0xffff)), float(p >> uint(16))); return clamp((v - 32767.0) * vec2(0.00003051851), vec2(-1.0), vec2(1.0)); } -#endif uint packUnorm4x8(vec4 v) { uvec4 uv = uvec4(round(clamp(v, vec4(0.0), vec4(1.0)) * 255.0)); @@ -58,3 +57,5 @@ vec4 unpackSnorm4x8(uint p) { vec4 v = vec4(float(p & uint(0xff)), float((p >> uint(8)) & uint(0xff)), float((p >> uint(16)) & uint(0xff)), float(p >> uint(24))); return clamp((v - vec4(127.0)) * vec4(0.00787401574), vec4(-1.0), vec4(1.0)); } + +#endif diff --git a/drivers/gles3/storage/mesh_storage.cpp b/drivers/gles3/storage/mesh_storage.cpp index a47df42500..285f32f1a5 100644 --- a/drivers/gles3/storage/mesh_storage.cpp +++ b/drivers/gles3/storage/mesh_storage.cpp @@ -44,10 +44,16 @@ MeshStorage *MeshStorage::get_singleton() { MeshStorage::MeshStorage() { singleton = this; + + { + skeleton_shader.shader.initialize(); + skeleton_shader.shader_version = skeleton_shader.shader.version_create(); + } } MeshStorage::~MeshStorage() { singleton = nullptr; + skeleton_shader.shader.version_free(skeleton_shader.shader_version); } /* MESH API */ @@ -88,10 +94,6 @@ void MeshStorage::mesh_set_blend_shape_count(RID p_mesh, int p_blend_shape_count ERR_FAIL_COND(mesh->surface_count > 0); //surfaces already exist mesh->blend_shape_count = p_blend_shape_count; - - if (p_blend_shape_count > 0) { - WARN_PRINT_ONCE("blend shapes not supported by GLES3 renderer yet"); - } } bool MeshStorage::mesh_needs_instance(RID p_mesh, bool p_has_skeleton) { @@ -114,7 +116,6 @@ void MeshStorage::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface) uint32_t attrib_stride = 0; uint32_t skin_stride = 0; - // TODO: I think this should be <=, but it is copied from RendererRD, will have to verify later for (int i = 0; i < RS::ARRAY_WEIGHTS; i++) { if ((p_surface.format & (1 << i))) { switch (i) { @@ -248,8 +249,77 @@ void MeshStorage::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface) s->aabb = p_surface.aabb; s->bone_aabbs = p_surface.bone_aabbs; //only really useful for returning them. - if (mesh->blend_shape_count > 0) { - //s->blend_shape_buffer = RD::get_singleton()->storage_buffer_create(p_surface.blend_shape_data.size(), p_surface.blend_shape_data); + if (p_surface.skin_data.size() || mesh->blend_shape_count > 0) { + // Size must match the size of the vertex array. + int size = p_surface.vertex_data.size(); + int vertex_size = 0; + int stride = 0; + int normal_offset = 0; + int tangent_offset = 0; + if ((p_surface.format & (1 << RS::ARRAY_VERTEX))) { + if (p_surface.format & RS::ARRAY_FLAG_USE_2D_VERTICES) { + vertex_size = 2; + } else { + vertex_size = 3; + } + stride = sizeof(float) * vertex_size; + } + if ((p_surface.format & (1 << RS::ARRAY_NORMAL))) { + normal_offset = stride; + stride += sizeof(uint16_t) * 2; + } + if ((p_surface.format & (1 << RS::ARRAY_TANGENT))) { + tangent_offset = stride; + stride += sizeof(uint16_t) * 2; + } + + if (mesh->blend_shape_count > 0) { + // Blend shapes are passed as one large array, for OpenGL, we need to split each of them into their own buffer + s->blend_shapes = memnew_arr(Mesh::Surface::BlendShape, mesh->blend_shape_count); + + for (uint32_t i = 0; i < mesh->blend_shape_count; i++) { + glGenVertexArrays(1, &s->blend_shapes[i].vertex_array); + glBindVertexArray(s->blend_shapes[i].vertex_array); + glGenBuffers(1, &s->blend_shapes[i].vertex_buffer); + glBindBuffer(GL_ARRAY_BUFFER, s->blend_shapes[i].vertex_buffer); + glBufferData(GL_ARRAY_BUFFER, size, p_surface.blend_shape_data.ptr() + i * size, (s->format & RS::ARRAY_FLAG_USE_DYNAMIC_UPDATE) ? GL_DYNAMIC_DRAW : GL_STATIC_DRAW); + + if ((p_surface.format & (1 << RS::ARRAY_VERTEX))) { + glEnableVertexAttribArray(RS::ARRAY_VERTEX + 3); + glVertexAttribPointer(RS::ARRAY_VERTEX + 3, vertex_size, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(0)); + } + if ((p_surface.format & (1 << RS::ARRAY_NORMAL))) { + glEnableVertexAttribArray(RS::ARRAY_NORMAL + 3); + glVertexAttribPointer(RS::ARRAY_NORMAL + 3, 2, GL_UNSIGNED_SHORT, GL_TRUE, stride, CAST_INT_TO_UCHAR_PTR(normal_offset)); + } + if ((p_surface.format & (1 << RS::ARRAY_TANGENT))) { + glEnableVertexAttribArray(RS::ARRAY_TANGENT + 3); + glVertexAttribPointer(RS::ARRAY_TANGENT + 3, 2, GL_UNSIGNED_SHORT, GL_TRUE, stride, CAST_INT_TO_UCHAR_PTR(tangent_offset)); + } + } + glBindVertexArray(0); + glBindBuffer(GL_ARRAY_BUFFER, 0); + } + + // Create a vertex array to use for skeleton/blend shapes. + glGenVertexArrays(1, &s->skeleton_vertex_array); + glBindVertexArray(s->skeleton_vertex_array); + glBindBuffer(GL_ARRAY_BUFFER, s->vertex_buffer); + + if ((p_surface.format & (1 << RS::ARRAY_VERTEX))) { + glEnableVertexAttribArray(RS::ARRAY_VERTEX); + glVertexAttribPointer(RS::ARRAY_VERTEX, vertex_size, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(0)); + } + if ((p_surface.format & (1 << RS::ARRAY_NORMAL))) { + glEnableVertexAttribArray(RS::ARRAY_NORMAL); + glVertexAttribPointer(RS::ARRAY_NORMAL, 2, GL_UNSIGNED_SHORT, GL_TRUE, stride, CAST_INT_TO_UCHAR_PTR(normal_offset)); + } + if ((p_surface.format & (1 << RS::ARRAY_TANGENT))) { + glEnableVertexAttribArray(RS::ARRAY_TANGENT); + glVertexAttribPointer(RS::ARRAY_TANGENT, 2, GL_UNSIGNED_SHORT, GL_TRUE, stride, CAST_INT_TO_UCHAR_PTR(tangent_offset)); + } + glBindVertexArray(0); + glBindBuffer(GL_ARRAY_BUFFER, 0); } if (mesh->surface_count == 0) { @@ -412,7 +482,13 @@ RS::SurfaceData MeshStorage::mesh_get_surface(RID p_mesh, int p_surface) const { } sd.bone_aabbs = s.bone_aabbs; - glBindBuffer(GL_ARRAY_BUFFER, 0); + + if (mesh->blend_shape_count) { + sd.blend_shape_data = Vector<uint8_t>(); + for (uint32_t i = 0; i < mesh->blend_shape_count; i++) { + sd.blend_shape_data.append_array(Utilities::buffer_get_data(GL_ARRAY_BUFFER, s.blend_shapes[i].vertex_buffer, s.vertex_buffer_size)); + } + } return sd; } @@ -608,6 +684,24 @@ void MeshStorage::mesh_clear(RID p_mesh) { memdelete_arr(s.lods); } + if (mesh->blend_shape_count) { + for (uint32_t j = 0; j < mesh->blend_shape_count; j++) { + if (s.blend_shapes[j].vertex_buffer != 0) { + glDeleteBuffers(1, &s.blend_shapes[j].vertex_buffer); + s.blend_shapes[j].vertex_buffer = 0; + } + if (s.blend_shapes[j].vertex_array != 0) { + glDeleteVertexArrays(1, &s.blend_shapes[j].vertex_array); + s.blend_shapes[j].vertex_array = 0; + } + } + memdelete_arr(s.blend_shapes); + } + if (s.skeleton_vertex_array != 0) { + glDeleteVertexArrays(1, &s.skeleton_vertex_array); + s.skeleton_vertex_array = 0; + } + memdelete(mesh->surfaces[i]); } if (mesh->surfaces) { @@ -663,15 +757,15 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V case RS::ARRAY_NORMAL: { attribs[i].offset = vertex_stride; attribs[i].size = 2; - attribs[i].type = GL_UNSIGNED_SHORT; - vertex_stride += sizeof(uint16_t) * 2; + attribs[i].type = (mis ? GL_FLOAT : GL_UNSIGNED_SHORT); + vertex_stride += sizeof(uint16_t) * 2 * (mis ? 2 : 1); attribs[i].normalized = GL_TRUE; } break; case RS::ARRAY_TANGENT: { attribs[i].offset = vertex_stride; attribs[i].size = 2; - attribs[i].type = GL_UNSIGNED_SHORT; - vertex_stride += sizeof(uint16_t) * 2; + attribs[i].type = (mis ? GL_FLOAT : GL_UNSIGNED_SHORT); + vertex_stride += sizeof(uint16_t) * 2 * (mis ? 2 : 1); attribs[i].normalized = GL_TRUE; } break; case RS::ARRAY_COLOR: { @@ -716,7 +810,7 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V attribs[i].offset = skin_stride; attribs[i].size = 4; attribs[i].type = GL_UNSIGNED_SHORT; - attributes_stride += 4 * sizeof(uint16_t); + skin_stride += 4 * sizeof(uint16_t); attribs[i].normalized = GL_FALSE; attribs[i].integer = true; } break; @@ -724,7 +818,7 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V attribs[i].offset = skin_stride; attribs[i].size = 4; attribs[i].type = GL_UNSIGNED_SHORT; - attributes_stride += 4 * sizeof(uint16_t); + skin_stride += 4 * sizeof(uint16_t); attribs[i].normalized = GL_TRUE; } break; } @@ -815,7 +909,7 @@ void MeshStorage::mesh_instance_set_blend_shape_weight(RID p_mesh_instance, int ERR_FAIL_COND(!mi); ERR_FAIL_INDEX(p_shape, (int)mi->blend_weights.size()); mi->blend_weights[p_shape] = p_weight; - mi->weights_dirty = true; + mi->dirty = true; } void MeshStorage::_mesh_instance_clear(MeshInstance *mi) { @@ -827,38 +921,65 @@ void MeshStorage::_mesh_instance_clear(MeshInstance *mi) { } memfree(mi->surfaces[i].versions); } + + if (mi->surfaces[i].vertex_buffers[0] != 0) { + glDeleteBuffers(2, mi->surfaces[i].vertex_buffers); + mi->surfaces[i].vertex_buffers[0] = 0; + mi->surfaces[i].vertex_buffers[1] = 0; + } + if (mi->surfaces[i].vertex_buffer != 0) { glDeleteBuffers(1, &mi->surfaces[i].vertex_buffer); mi->surfaces[i].vertex_buffer = 0; } } mi->surfaces.clear(); - - if (mi->blend_weights_buffer != 0) { - glDeleteBuffers(1, &mi->blend_weights_buffer); - mi->blend_weights_buffer = 0; - } mi->blend_weights.clear(); - mi->weights_dirty = false; mi->skeleton_version = 0; } void MeshStorage::_mesh_instance_add_surface(MeshInstance *mi, Mesh *mesh, uint32_t p_surface) { - if (mesh->blend_shape_count > 0 && mi->blend_weights_buffer == 0) { + if (mesh->blend_shape_count > 0) { mi->blend_weights.resize(mesh->blend_shape_count); for (uint32_t i = 0; i < mi->blend_weights.size(); i++) { - mi->blend_weights[i] = 0; + mi->blend_weights[i] = 0.0; } - // Todo allocate buffer for blend_weights and copy data to it - //mi->blend_weights_buffer = RD::get_singleton()->storage_buffer_create(sizeof(float) * mi->blend_weights.size(), mi->blend_weights.to_byte_array()); - - mi->weights_dirty = true; } MeshInstance::Surface s; - if (mesh->blend_shape_count > 0 || (mesh->surfaces[p_surface]->format & RS::ARRAY_FORMAT_BONES)) { - //surface warrants transform - //s.vertex_buffer = RD::get_singleton()->vertex_buffer_create(mesh->surfaces[p_surface]->vertex_buffer_size, Vector<uint8_t>(), true); + if ((mesh->blend_shape_count > 0 || (mesh->surfaces[p_surface]->format & RS::ARRAY_FORMAT_BONES)) && mesh->surfaces[p_surface]->vertex_buffer_size > 0) { + // Cache surface properties + s.format_cache = mesh->surfaces[p_surface]->format; + if ((s.format_cache & (1 << RS::ARRAY_VERTEX))) { + if (s.format_cache & RS::ARRAY_FLAG_USE_2D_VERTICES) { + s.vertex_size_cache = 2; + } else { + s.vertex_size_cache = 3; + } + s.vertex_stride_cache = sizeof(float) * s.vertex_size_cache; + } + if ((s.format_cache & (1 << RS::ARRAY_NORMAL))) { + s.vertex_normal_offset_cache = s.vertex_stride_cache; + s.vertex_stride_cache += sizeof(uint32_t) * 2; + } + if ((s.format_cache & (1 << RS::ARRAY_TANGENT))) { + s.vertex_tangent_offset_cache = s.vertex_stride_cache; + s.vertex_stride_cache += sizeof(uint32_t) * 2; + } + + // Buffer to be used for rendering. Final output of skeleton and blend shapes. + glGenBuffers(1, &s.vertex_buffer); + glBindBuffer(GL_ARRAY_BUFFER, s.vertex_buffer); + glBufferData(GL_ARRAY_BUFFER, s.vertex_stride_cache * mesh->surfaces[p_surface]->vertex_count, nullptr, GL_DYNAMIC_DRAW); + if (mesh->blend_shape_count > 0) { + // Ping-Pong buffers for processing blendshapes. + glGenBuffers(2, s.vertex_buffers); + for (uint32_t i = 0; i < 2; i++) { + glBindBuffer(GL_ARRAY_BUFFER, s.vertex_buffers[i]); + glBufferData(GL_ARRAY_BUFFER, s.vertex_stride_cache * mesh->surfaces[p_surface]->vertex_count, nullptr, GL_DYNAMIC_DRAW); + } + } + glBindBuffer(GL_ARRAY_BUFFER, 0); //unbind } mi->surfaces.push_back(s); @@ -870,11 +991,6 @@ void MeshStorage::mesh_instance_check_for_update(RID p_mesh_instance) { bool needs_update = mi->dirty; - if (mi->weights_dirty && !mi->weight_update_list.in_list()) { - dirty_mesh_instance_weights.add(&mi->weight_update_list); - needs_update = true; - } - if (mi->array_update_list.in_list()) { return; } @@ -891,22 +1007,223 @@ void MeshStorage::mesh_instance_check_for_update(RID p_mesh_instance) { } } -void MeshStorage::update_mesh_instances() { - while (dirty_mesh_instance_weights.first()) { - MeshInstance *mi = dirty_mesh_instance_weights.first()->self(); +void MeshStorage::_blend_shape_bind_mesh_instance_buffer(MeshInstance *p_mi, uint32_t p_surface) { + glBindBuffer(GL_ARRAY_BUFFER, p_mi->surfaces[p_surface].vertex_buffers[0]); - if (mi->blend_weights_buffer != 0) { - //RD::get_singleton()->buffer_update(mi->blend_weights_buffer, 0, mi->blend_weights.size() * sizeof(float), mi->blend_weights.ptr()); - } - dirty_mesh_instance_weights.remove(&mi->weight_update_list); - mi->weights_dirty = false; + if ((p_mi->surfaces[p_surface].format_cache & (1 << RS::ARRAY_VERTEX))) { + glEnableVertexAttribArray(RS::ARRAY_VERTEX); + glVertexAttribPointer(RS::ARRAY_VERTEX, p_mi->surfaces[p_surface].vertex_size_cache, GL_FLOAT, GL_FALSE, p_mi->surfaces[p_surface].vertex_stride_cache, CAST_INT_TO_UCHAR_PTR(0)); + } else { + glDisableVertexAttribArray(RS::ARRAY_VERTEX); } + if ((p_mi->surfaces[p_surface].format_cache & (1 << RS::ARRAY_NORMAL))) { + glEnableVertexAttribArray(RS::ARRAY_NORMAL); + glVertexAttribIPointer(RS::ARRAY_NORMAL, 2, GL_UNSIGNED_INT, p_mi->surfaces[p_surface].vertex_stride_cache, CAST_INT_TO_UCHAR_PTR(p_mi->surfaces[p_surface].vertex_normal_offset_cache)); + } else { + glDisableVertexAttribArray(RS::ARRAY_NORMAL); + } + if ((p_mi->surfaces[p_surface].format_cache & (1 << RS::ARRAY_TANGENT))) { + glEnableVertexAttribArray(RS::ARRAY_TANGENT); + glVertexAttribIPointer(RS::ARRAY_TANGENT, 2, GL_UNSIGNED_INT, p_mi->surfaces[p_surface].vertex_stride_cache, CAST_INT_TO_UCHAR_PTR(p_mi->surfaces[p_surface].vertex_tangent_offset_cache)); + } else { + glDisableVertexAttribArray(RS::ARRAY_TANGENT); + } +} + +void MeshStorage::_compute_skeleton(MeshInstance *p_mi, Skeleton *p_sk, uint32_t p_surface) { + glBindBuffer(GL_ARRAY_BUFFER, 0); + + // Add in the bones and weights. + glBindBuffer(GL_ARRAY_BUFFER, p_mi->mesh->surfaces[p_surface]->skin_buffer); + + bool use_8_weights = p_mi->surfaces[p_surface].format_cache & RS::ARRAY_FLAG_USE_8_BONE_WEIGHTS; + int skin_stride = sizeof(int16_t) * (use_8_weights ? 16 : 8); + glEnableVertexAttribArray(RS::ARRAY_BONES); + glVertexAttribIPointer(RS::ARRAY_BONES, 4, GL_UNSIGNED_SHORT, skin_stride, CAST_INT_TO_UCHAR_PTR(0)); + if (use_8_weights) { + glEnableVertexAttribArray(11); + glVertexAttribIPointer(11, 4, GL_UNSIGNED_SHORT, skin_stride, CAST_INT_TO_UCHAR_PTR(4 * sizeof(uint16_t))); + glEnableVertexAttribArray(12); + glVertexAttribPointer(12, 4, GL_UNSIGNED_SHORT, GL_TRUE, skin_stride, CAST_INT_TO_UCHAR_PTR(8 * sizeof(uint16_t))); + glEnableVertexAttribArray(13); + glVertexAttribPointer(13, 4, GL_UNSIGNED_SHORT, GL_TRUE, skin_stride, CAST_INT_TO_UCHAR_PTR(12 * sizeof(uint16_t))); + } else { + glEnableVertexAttribArray(RS::ARRAY_WEIGHTS); + glVertexAttribPointer(RS::ARRAY_WEIGHTS, 4, GL_UNSIGNED_SHORT, GL_TRUE, skin_stride, CAST_INT_TO_UCHAR_PTR(4 * sizeof(uint16_t))); + } + + glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, p_mi->surfaces[p_surface].vertex_buffer); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, p_sk->transforms_texture); + + glBeginTransformFeedback(GL_POINTS); + glDrawArrays(GL_POINTS, 0, p_mi->mesh->surfaces[p_surface]->vertex_count); + glEndTransformFeedback(); + + glDisableVertexAttribArray(RS::ARRAY_BONES); + glDisableVertexAttribArray(RS::ARRAY_WEIGHTS); + glDisableVertexAttribArray(RS::ARRAY_BONES + 2); + glDisableVertexAttribArray(RS::ARRAY_WEIGHTS + 2); + glBindVertexArray(0); + glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0); +} + +void MeshStorage::update_mesh_instances() { if (dirty_mesh_instance_arrays.first() == nullptr) { return; //nothing to do } + glEnable(GL_RASTERIZER_DISCARD); // Process skeletons and blend shapes using transform feedback - // TODO: Implement when working on skeletons and blend shapes + while (dirty_mesh_instance_arrays.first()) { + MeshInstance *mi = dirty_mesh_instance_arrays.first()->self(); + + Skeleton *sk = skeleton_owner.get_or_null(mi->skeleton); + + // Precompute base weight if using blend shapes. + float base_weight = 1.0; + if (mi->mesh->blend_shape_count && mi->mesh->blend_shape_mode == RS::BLEND_SHAPE_MODE_NORMALIZED) { + for (uint32_t i = 0; i < mi->mesh->blend_shape_count; i++) { + base_weight -= mi->blend_weights[i]; + } + } + + for (uint32_t i = 0; i < mi->surfaces.size(); i++) { + if (mi->surfaces[i].vertex_buffer == 0 || mi->mesh->surfaces[i]->skeleton_vertex_array == 0) { + continue; + } + + bool array_is_2d = mi->surfaces[i].format_cache & RS::ARRAY_FLAG_USE_2D_VERTICES; + bool can_use_skeleton = sk != nullptr && sk->use_2d == array_is_2d && (mi->surfaces[i].format_cache & RS::ARRAY_FORMAT_BONES); + bool use_8_weights = mi->surfaces[i].format_cache & RS::ARRAY_FLAG_USE_8_BONE_WEIGHTS; + + // Always process blend shapes first. + if (mi->mesh->blend_shape_count) { + SkeletonShaderGLES3::ShaderVariant variant = SkeletonShaderGLES3::MODE_BASE_PASS; + uint64_t specialization = 0; + specialization |= array_is_2d ? SkeletonShaderGLES3::MODE_2D : 0; + specialization |= SkeletonShaderGLES3::USE_BLEND_SHAPES; + if (!array_is_2d) { + if ((mi->surfaces[i].format_cache & (1 << RS::ARRAY_NORMAL))) { + specialization |= SkeletonShaderGLES3::USE_NORMAL; + } + if ((mi->surfaces[i].format_cache & (1 << RS::ARRAY_TANGENT))) { + specialization |= SkeletonShaderGLES3::USE_TANGENT; + } + } + + bool success = skeleton_shader.shader.version_bind_shader(skeleton_shader.shader_version, variant, specialization); + if (!success) { + continue; + } + + skeleton_shader.shader.version_set_uniform(SkeletonShaderGLES3::BLEND_WEIGHT, base_weight, skeleton_shader.shader_version, variant, specialization); + skeleton_shader.shader.version_set_uniform(SkeletonShaderGLES3::BLEND_SHAPE_COUNT, float(mi->mesh->blend_shape_count), skeleton_shader.shader_version, variant, specialization); + + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindVertexArray(mi->mesh->surfaces[i]->skeleton_vertex_array); + glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, mi->surfaces[i].vertex_buffers[0]); + glBeginTransformFeedback(GL_POINTS); + glDrawArrays(GL_POINTS, 0, mi->mesh->surfaces[i]->vertex_count); + glEndTransformFeedback(); + + variant = SkeletonShaderGLES3::MODE_BLEND_PASS; + success = skeleton_shader.shader.version_bind_shader(skeleton_shader.shader_version, variant, specialization); + if (!success) { + continue; + } + + //Do the last blend shape separately, as it can be combined with the skeleton pass. + for (uint32_t bs = 0; bs < mi->mesh->blend_shape_count - 1; bs++) { + float weight = mi->blend_weights[bs]; + + if (Math::is_zero_approx(weight)) { + //not bother with this one + continue; + } + skeleton_shader.shader.version_set_uniform(SkeletonShaderGLES3::BLEND_WEIGHT, weight, skeleton_shader.shader_version, variant, specialization); + skeleton_shader.shader.version_set_uniform(SkeletonShaderGLES3::BLEND_SHAPE_COUNT, float(mi->mesh->blend_shape_count), skeleton_shader.shader_version, variant, specialization); + + glBindVertexArray(mi->mesh->surfaces[i]->blend_shapes[bs].vertex_array); + _blend_shape_bind_mesh_instance_buffer(mi, i); + glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, mi->surfaces[i].vertex_buffers[1]); + + glBeginTransformFeedback(GL_POINTS); + glDrawArrays(GL_POINTS, 0, mi->mesh->surfaces[i]->vertex_count); + glEndTransformFeedback(); + + SWAP(mi->surfaces[i].vertex_buffers[0], mi->surfaces[i].vertex_buffers[1]); + } + uint32_t bs = mi->mesh->blend_shape_count - 1; + + float weight = mi->blend_weights[bs]; + + glBindVertexArray(mi->mesh->surfaces[i]->blend_shapes[bs].vertex_array); + _blend_shape_bind_mesh_instance_buffer(mi, i); + + specialization |= can_use_skeleton ? SkeletonShaderGLES3::USE_SKELETON : 0; + specialization |= (can_use_skeleton && use_8_weights) ? SkeletonShaderGLES3::USE_EIGHT_WEIGHTS : 0; + specialization |= SkeletonShaderGLES3::FINAL_PASS; + success = skeleton_shader.shader.version_bind_shader(skeleton_shader.shader_version, variant, specialization); + if (!success) { + continue; + } + + skeleton_shader.shader.version_set_uniform(SkeletonShaderGLES3::BLEND_WEIGHT, weight, skeleton_shader.shader_version, variant, specialization); + skeleton_shader.shader.version_set_uniform(SkeletonShaderGLES3::BLEND_SHAPE_COUNT, float(mi->mesh->blend_shape_count), skeleton_shader.shader_version, variant, specialization); + + if (can_use_skeleton) { + // Do last blendshape in the same pass as the Skeleton. + _compute_skeleton(mi, sk, i); + can_use_skeleton = false; + } else { + // Do last blendshape by itself and prepare vertex data for use by the renderer. + glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, mi->surfaces[i].vertex_buffer); + + glBeginTransformFeedback(GL_POINTS); + glDrawArrays(GL_POINTS, 0, mi->mesh->surfaces[i]->vertex_count); + glEndTransformFeedback(); + } + + glBindVertexArray(0); + glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0); + } + + // This branch should only execute when Skeleton is run by itself. + if (can_use_skeleton) { + SkeletonShaderGLES3::ShaderVariant variant = SkeletonShaderGLES3::MODE_BASE_PASS; + uint64_t specialization = 0; + specialization |= array_is_2d ? SkeletonShaderGLES3::MODE_2D : 0; + specialization |= SkeletonShaderGLES3::USE_SKELETON; + specialization |= SkeletonShaderGLES3::FINAL_PASS; + specialization |= use_8_weights ? SkeletonShaderGLES3::USE_EIGHT_WEIGHTS : 0; + if (!array_is_2d) { + if ((mi->surfaces[i].format_cache & (1 << RS::ARRAY_NORMAL))) { + specialization |= SkeletonShaderGLES3::USE_NORMAL; + } + if ((mi->surfaces[i].format_cache & (1 << RS::ARRAY_TANGENT))) { + specialization |= SkeletonShaderGLES3::USE_TANGENT; + } + } + + bool success = skeleton_shader.shader.version_bind_shader(skeleton_shader.shader_version, variant, specialization); + if (!success) { + continue; + } + + glBindVertexArray(mi->mesh->surfaces[i]->skeleton_vertex_array); + _compute_skeleton(mi, sk, i); + } + } + mi->dirty = false; + if (sk) { + mi->skeleton_version = sk->version; + } + dirty_mesh_instance_arrays.remove(&mi->array_update_list); + } + glDisable(GL_RASTERIZER_DISCARD); + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, 0); } /* MULTIMESH API */ @@ -1577,45 +1894,207 @@ void MeshStorage::_update_dirty_multimeshes() { /* SKELETON API */ RID MeshStorage::skeleton_allocate() { - return RID(); + return skeleton_owner.allocate_rid(); } void MeshStorage::skeleton_initialize(RID p_rid) { + skeleton_owner.initialize_rid(p_rid, Skeleton()); } void MeshStorage::skeleton_free(RID p_rid) { + _update_dirty_skeletons(); + skeleton_allocate_data(p_rid, 0); + Skeleton *skeleton = skeleton_owner.get_or_null(p_rid); + skeleton->dependency.deleted_notify(p_rid); + skeleton_owner.free(p_rid); +} + +void MeshStorage::_skeleton_make_dirty(Skeleton *skeleton) { + if (!skeleton->dirty) { + skeleton->dirty = true; + skeleton->dirty_list = skeleton_dirty_list; + skeleton_dirty_list = skeleton; + } } void MeshStorage::skeleton_allocate_data(RID p_skeleton, int p_bones, bool p_2d_skeleton) { + Skeleton *skeleton = skeleton_owner.get_or_null(p_skeleton); + ERR_FAIL_COND(!skeleton); + ERR_FAIL_COND(p_bones < 0); + + if (skeleton->size == p_bones && skeleton->use_2d == p_2d_skeleton) { + return; + } + + skeleton->size = p_bones; + skeleton->use_2d = p_2d_skeleton; + skeleton->height = (p_bones * (p_2d_skeleton ? 2 : 3)) / 256; + if ((p_bones * (p_2d_skeleton ? 2 : 3)) % 256) { + skeleton->height++; + } + + if (skeleton->transforms_texture != 0) { + glDeleteTextures(1, &skeleton->transforms_texture); + skeleton->transforms_texture = 0; + skeleton->data.clear(); + } + + if (skeleton->size) { + skeleton->data.resize(256 * skeleton->height * 4); + glGenTextures(1, &skeleton->transforms_texture); + glBindTexture(GL_TEXTURE_2D, skeleton->transforms_texture); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, 256, skeleton->height, 0, GL_RGBA, GL_FLOAT, nullptr); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glBindTexture(GL_TEXTURE_2D, 0); + + memset(skeleton->data.ptrw(), 0, skeleton->data.size() * sizeof(float)); + + _skeleton_make_dirty(skeleton); + } + + skeleton->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_SKELETON_DATA); } void MeshStorage::skeleton_set_base_transform_2d(RID p_skeleton, const Transform2D &p_base_transform) { + Skeleton *skeleton = skeleton_owner.get_or_null(p_skeleton); + + ERR_FAIL_NULL(skeleton); + ERR_FAIL_COND(!skeleton->use_2d); + + skeleton->base_transform_2d = p_base_transform; } int MeshStorage::skeleton_get_bone_count(RID p_skeleton) const { - return 0; + Skeleton *skeleton = skeleton_owner.get_or_null(p_skeleton); + ERR_FAIL_COND_V(!skeleton, 0); + + return skeleton->size; } void MeshStorage::skeleton_bone_set_transform(RID p_skeleton, int p_bone, const Transform3D &p_transform) { + Skeleton *skeleton = skeleton_owner.get_or_null(p_skeleton); + + ERR_FAIL_COND(!skeleton); + ERR_FAIL_INDEX(p_bone, skeleton->size); + ERR_FAIL_COND(skeleton->use_2d); + + float *dataptr = skeleton->data.ptrw() + p_bone * 12; + + dataptr[0] = p_transform.basis.rows[0][0]; + dataptr[1] = p_transform.basis.rows[0][1]; + dataptr[2] = p_transform.basis.rows[0][2]; + dataptr[3] = p_transform.origin.x; + dataptr[4] = p_transform.basis.rows[1][0]; + dataptr[5] = p_transform.basis.rows[1][1]; + dataptr[6] = p_transform.basis.rows[1][2]; + dataptr[7] = p_transform.origin.y; + dataptr[8] = p_transform.basis.rows[2][0]; + dataptr[9] = p_transform.basis.rows[2][1]; + dataptr[10] = p_transform.basis.rows[2][2]; + dataptr[11] = p_transform.origin.z; + + _skeleton_make_dirty(skeleton); } Transform3D MeshStorage::skeleton_bone_get_transform(RID p_skeleton, int p_bone) const { - return Transform3D(); + Skeleton *skeleton = skeleton_owner.get_or_null(p_skeleton); + + ERR_FAIL_COND_V(!skeleton, Transform3D()); + ERR_FAIL_INDEX_V(p_bone, skeleton->size, Transform3D()); + ERR_FAIL_COND_V(skeleton->use_2d, Transform3D()); + + const float *dataptr = skeleton->data.ptr() + p_bone * 12; + + Transform3D t; + + t.basis.rows[0][0] = dataptr[0]; + t.basis.rows[0][1] = dataptr[1]; + t.basis.rows[0][2] = dataptr[2]; + t.origin.x = dataptr[3]; + t.basis.rows[1][0] = dataptr[4]; + t.basis.rows[1][1] = dataptr[5]; + t.basis.rows[1][2] = dataptr[6]; + t.origin.y = dataptr[7]; + t.basis.rows[2][0] = dataptr[8]; + t.basis.rows[2][1] = dataptr[9]; + t.basis.rows[2][2] = dataptr[10]; + t.origin.z = dataptr[11]; + + return t; } void MeshStorage::skeleton_bone_set_transform_2d(RID p_skeleton, int p_bone, const Transform2D &p_transform) { + Skeleton *skeleton = skeleton_owner.get_or_null(p_skeleton); + + ERR_FAIL_COND(!skeleton); + ERR_FAIL_INDEX(p_bone, skeleton->size); + ERR_FAIL_COND(!skeleton->use_2d); + + float *dataptr = skeleton->data.ptrw() + p_bone * 8; + + dataptr[0] = p_transform.columns[0][0]; + dataptr[1] = p_transform.columns[1][0]; + dataptr[2] = 0; + dataptr[3] = p_transform.columns[2][0]; + dataptr[4] = p_transform.columns[0][1]; + dataptr[5] = p_transform.columns[1][1]; + dataptr[6] = 0; + dataptr[7] = p_transform.columns[2][1]; + + _skeleton_make_dirty(skeleton); } Transform2D MeshStorage::skeleton_bone_get_transform_2d(RID p_skeleton, int p_bone) const { - return Transform2D(); + Skeleton *skeleton = skeleton_owner.get_or_null(p_skeleton); + + ERR_FAIL_COND_V(!skeleton, Transform2D()); + ERR_FAIL_INDEX_V(p_bone, skeleton->size, Transform2D()); + ERR_FAIL_COND_V(!skeleton->use_2d, Transform2D()); + + const float *dataptr = skeleton->data.ptr() + p_bone * 8; + + Transform2D t; + t.columns[0][0] = dataptr[0]; + t.columns[1][0] = dataptr[1]; + t.columns[2][0] = dataptr[3]; + t.columns[0][1] = dataptr[4]; + t.columns[1][1] = dataptr[5]; + t.columns[2][1] = dataptr[7]; + + return t; } -void MeshStorage::skeleton_update_dependency(RID p_base, DependencyTracker *p_instance) { +void MeshStorage::_update_dirty_skeletons() { + while (skeleton_dirty_list) { + Skeleton *skeleton = skeleton_dirty_list; + + if (skeleton->size) { + glBindTexture(GL_TEXTURE_2D, skeleton->transforms_texture); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, 256, skeleton->height, 0, GL_RGBA, GL_FLOAT, skeleton->data.ptr()); + glBindTexture(GL_TEXTURE_2D, 0); + } + + skeleton_dirty_list = skeleton->dirty_list; + + skeleton->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_SKELETON_BONES); + + skeleton->version++; + + skeleton->dirty = false; + skeleton->dirty_list = nullptr; + } + + skeleton_dirty_list = nullptr; } -/* OCCLUDER */ +void MeshStorage::skeleton_update_dependency(RID p_skeleton, DependencyTracker *p_instance) { + Skeleton *skeleton = skeleton_owner.get_or_null(p_skeleton); + ERR_FAIL_COND(!skeleton); -void MeshStorage::occluder_set_mesh(RID p_occluder, const PackedVector3Array &p_vertices, const PackedInt32Array &p_indices) { + p_instance->update_dependency(&skeleton->dependency); } #endif // GLES3_ENABLED diff --git a/drivers/gles3/storage/mesh_storage.h b/drivers/gles3/storage/mesh_storage.h index 1aef3cbf78..0f30814928 100644 --- a/drivers/gles3/storage/mesh_storage.h +++ b/drivers/gles3/storage/mesh_storage.h @@ -33,6 +33,7 @@ #ifdef GLES3_ENABLED +#include "../shaders/skeleton.glsl.gen.h" #include "core/templates/local_vector.h" #include "core/templates/rid_owner.h" #include "core/templates/self_list.h" @@ -102,7 +103,13 @@ struct Mesh { Vector<AABB> bone_aabbs; - GLuint blend_shape_buffer = 0; + struct BlendShape { + GLuint vertex_buffer = 0; + GLuint vertex_array = 0; + }; + + BlendShape *blend_shapes = nullptr; + GLuint skeleton_vertex_array = 0; RID material; }; @@ -136,7 +143,14 @@ struct MeshInstance { Mesh *mesh = nullptr; RID skeleton; struct Surface { + GLuint vertex_buffers[2] = { 0, 0 }; + GLuint vertex_arrays[2] = { 0, 0 }; GLuint vertex_buffer = 0; + int vertex_stride_cache = 0; + int vertex_size_cache = 0; + int vertex_normal_offset_cache = 0; + int vertex_tangent_offset_cache = 0; + uint32_t format_cache = 0; Mesh::Surface::Version *versions = nullptr; //allocated on demand uint32_t version_count = 0; @@ -144,7 +158,6 @@ struct MeshInstance { LocalVector<Surface> surfaces; LocalVector<float> blend_weights; - GLuint blend_weights_buffer = 0; List<MeshInstance *>::Element *I = nullptr; //used to erase itself uint64_t skeleton_version = 0; bool dirty = false; @@ -186,13 +199,15 @@ struct MultiMesh { struct Skeleton { bool use_2d = false; int size = 0; + int height = 0; Vector<float> data; - GLuint buffer = 0; bool dirty = false; Skeleton *dirty_list = nullptr; Transform2D base_transform_2d; + GLuint transforms_texture = 0; + uint64_t version = 1; Dependency dependency; @@ -202,6 +217,11 @@ class MeshStorage : public RendererMeshStorage { private: static MeshStorage *singleton; + struct { + SkeletonShaderGLES3 shader; + RID shader_version; + } skeleton_shader; + /* Mesh */ mutable RID_Owner<Mesh, true> mesh_owner; @@ -214,6 +234,7 @@ private: void _mesh_instance_clear(MeshInstance *mi); void _mesh_instance_add_surface(MeshInstance *mi, Mesh *mesh, uint32_t p_surface); + void _blend_shape_bind_mesh_instance_buffer(MeshInstance *p_mi, uint32_t p_surface); SelfList<MeshInstance>::List dirty_mesh_instance_weights; SelfList<MeshInstance>::List dirty_mesh_instance_arrays; @@ -232,9 +253,10 @@ private: mutable RID_Owner<Skeleton, true> skeleton_owner; - Skeleton *skeleton_dirty_list = nullptr; - _FORCE_INLINE_ void _skeleton_make_dirty(Skeleton *skeleton); + void _compute_skeleton(MeshInstance *p_mi, Skeleton *p_sk, uint32_t p_surface); + + Skeleton *skeleton_dirty_list = nullptr; public: static MeshStorage *get_singleton(); @@ -534,9 +556,11 @@ public: virtual void skeleton_update_dependency(RID p_base, DependencyTracker *p_instance) override; - /* OCCLUDER */ + void _update_dirty_skeletons(); - void occluder_set_mesh(RID p_occluder, const PackedVector3Array &p_vertices, const PackedInt32Array &p_indices); + _FORCE_INLINE_ bool skeleton_is_valid(RID p_skeleton) { + return skeleton_owner.get_or_null(p_skeleton) != nullptr; + } }; } // namespace GLES3 diff --git a/drivers/gles3/storage/utilities.cpp b/drivers/gles3/storage/utilities.cpp index 393093c2a7..fe900c7cfb 100644 --- a/drivers/gles3/storage/utilities.cpp +++ b/drivers/gles3/storage/utilities.cpp @@ -281,7 +281,7 @@ String Utilities::get_captured_timestamp_name(uint32_t p_index) const { void Utilities::update_dirty_resources() { MaterialStorage::get_singleton()->_update_global_shader_uniforms(); MaterialStorage::get_singleton()->_update_queued_materials(); - //MeshStorage::get_singleton()->_update_dirty_skeletons(); + MeshStorage::get_singleton()->_update_dirty_skeletons(); MeshStorage::get_singleton()->_update_dirty_multimeshes(); TextureStorage::get_singleton()->update_texture_atlas(); } diff --git a/drivers/unix/os_unix.cpp b/drivers/unix/os_unix.cpp index 161706489f..b02a100784 100644 --- a/drivers/unix/os_unix.cpp +++ b/drivers/unix/os_unix.cpp @@ -565,7 +565,7 @@ String OS_Unix::get_executable_path() const { WARN_PRINT("MAXPATHLEN is too small"); } - String path(resolved_path); + String path = String::utf8(resolved_path); delete[] resolved_path; return path; diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index 6e61006395..7f5bac30f1 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -47,7 +47,7 @@ static const uint32_t SMALL_ALLOCATION_MAX_SIZE = 4096; // Get the Vulkan object information and possible stage access types (bitwise OR'd with incoming values). -RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &r_stage_mask, VkAccessFlags &r_access_mask, uint32_t p_post_barrier) { +RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &r_stage_mask, VkAccessFlags &r_access_mask, BitField<BarrierMask> p_post_barrier) { Buffer *buffer = nullptr; if (vertex_buffer_owner.owns(p_buffer)) { buffer = vertex_buffer_owner.get_or_null(p_buffer); @@ -55,11 +55,11 @@ RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; r_access_mask |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; if (buffer->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) { - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; } - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; } @@ -69,20 +69,20 @@ RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID r_access_mask |= VK_ACCESS_INDEX_READ_BIT; buffer = index_buffer_owner.get_or_null(p_buffer); } else if (uniform_buffer_owner.owns(p_buffer)) { - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; } - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; } r_access_mask |= VK_ACCESS_UNIFORM_READ_BIT; buffer = uniform_buffer_owner.get_or_null(p_buffer); } else if (texture_buffer_owner.owns(p_buffer)) { - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; r_access_mask |= VK_ACCESS_SHADER_READ_BIT; } - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; r_access_mask |= VK_ACCESS_SHADER_READ_BIT; } @@ -90,11 +90,11 @@ RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID buffer = &texture_buffer_owner.get_or_null(p_buffer)->buffer; } else if (storage_buffer_owner.owns(p_buffer)) { buffer = storage_buffer_owner.get_or_null(p_buffer); - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } @@ -2009,7 +2009,7 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T if (p_data.size()) { for (uint32_t i = 0; i < image_create_info.arrayLayers; i++) { - _texture_update(id, i, p_data[i], RD::BARRIER_MASK_ALL, true); + _texture_update(id, i, p_data[i], RD::BARRIER_MASK_ALL_BARRIERS, true); } } return id; @@ -2414,7 +2414,7 @@ RID RenderingDeviceVulkan::texture_create_shared_from_slice(const TextureView &p return id; } -Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, uint32_t p_post_barrier) { +Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, BitField<BarrierMask> p_post_barrier) { return _texture_update(p_texture, p_layer, p_data, p_post_barrier, false); } @@ -2434,7 +2434,7 @@ static _ALWAYS_INLINE_ void _copy_region(uint8_t const *__restrict p_src, uint8_ } } -Error RenderingDeviceVulkan::_texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, uint32_t p_post_barrier, bool p_use_setup_queue) { +Error RenderingDeviceVulkan::_texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, BitField<BarrierMask> p_post_barrier, bool p_use_setup_queue) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG((draw_list || compute_list) && !p_use_setup_queue, ERR_INVALID_PARAMETER, @@ -2608,15 +2608,15 @@ Error RenderingDeviceVulkan::_texture_update(RID p_texture, uint32_t p_layer, co { uint32_t barrier_flags = 0; uint32_t access_flags = 0; - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_TRANSFER) { + if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } @@ -2869,7 +2869,7 @@ Size2i RenderingDeviceVulkan::texture_size(RID p_texture) { return Size2i(tex->width, tex->height); } -Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, uint32_t p_post_barrier) { +Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, BitField<BarrierMask> p_post_barrier) { _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.get_or_null(p_from_texture); @@ -2994,15 +2994,15 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, uint32_t barrier_flags = 0; uint32_t access_flags = 0; - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_TRANSFER) { + if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } @@ -3064,7 +3064,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, return OK; } -Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID p_to_texture, uint32_t p_post_barrier) { +Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID p_to_texture, BitField<BarrierMask> p_post_barrier) { _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.get_or_null(p_from_texture); @@ -3172,15 +3172,15 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID uint32_t barrier_flags = 0; uint32_t access_flags = 0; - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_TRANSFER) { + if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } @@ -3235,7 +3235,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID return OK; } -Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, uint32_t p_post_barrier) { +Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, BitField<BarrierMask> p_post_barrier) { _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.get_or_null(p_texture); @@ -3308,15 +3308,15 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, uint32_t barrier_flags = 0; uint32_t access_flags = 0; - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_TRANSFER) { + if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } @@ -3355,7 +3355,7 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, return OK; } -bool RenderingDeviceVulkan::texture_is_format_supported_for_usage(DataFormat p_format, uint32_t p_usage) const { +bool RenderingDeviceVulkan::texture_is_format_supported_for_usage(DataFormat p_format, BitField<RenderingDevice::TextureUsageBits> p_usage) const { ERR_FAIL_INDEX_V(p_format, DATA_FORMAT_MAX, false); _THREAD_SAFE_METHOD_ @@ -3365,34 +3365,34 @@ bool RenderingDeviceVulkan::texture_is_format_supported_for_usage(DataFormat p_f vkGetPhysicalDeviceFormatProperties(context->get_physical_device(), vulkan_formats[p_format], &properties); VkFormatFeatureFlags flags; - if (p_usage & TEXTURE_USAGE_CPU_READ_BIT) { + if (p_usage.has_flag(TEXTURE_USAGE_CPU_READ_BIT)) { flags = properties.linearTilingFeatures; } else { flags = properties.optimalTilingFeatures; } - if (p_usage & TEXTURE_USAGE_SAMPLING_BIT && !(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + if (p_usage.has_flag(TEXTURE_USAGE_SAMPLING_BIT) && !(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { return false; } - if (p_usage & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT && !(flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) { + if (p_usage.has_flag(TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) && !(flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) { return false; } - if (p_usage & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT && !(flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { + if (p_usage.has_flag(TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) && !(flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { return false; } - if (p_usage & TEXTURE_USAGE_STORAGE_BIT && !(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) { + if (p_usage.has_flag(TEXTURE_USAGE_STORAGE_BIT) && !(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) { return false; } - if (p_usage & TEXTURE_USAGE_STORAGE_ATOMIC_BIT && !(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT)) { + if (p_usage.has_flag(TEXTURE_USAGE_STORAGE_ATOMIC_BIT) && !(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT)) { return false; } // Validation via VK_FORMAT_FEATURE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR fails if VRS attachment is not supported. - if (p_usage & TEXTURE_USAGE_VRS_ATTACHMENT_BIT && p_format != DATA_FORMAT_R8_UINT) { + if (p_usage.has_flag(TEXTURE_USAGE_VRS_ATTACHMENT_BIT) && p_format != DATA_FORMAT_R8_UINT) { return false; } @@ -4378,7 +4378,7 @@ RenderingDevice::VertexFormatID RenderingDeviceVulkan::vertex_format_create(cons return id; } -RID RenderingDeviceVulkan::vertex_array_create(uint32_t p_vertex_count, VertexFormatID p_vertex_format, const Vector<RID> &p_src_buffers) { +RID RenderingDeviceVulkan::vertex_array_create(uint32_t p_vertex_count, VertexFormatID p_vertex_format, const Vector<RID> &p_src_buffers, const Vector<uint64_t> &p_offsets) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V(!vertex_formats.has(p_vertex_format), RID()); @@ -4392,6 +4392,13 @@ RID RenderingDeviceVulkan::vertex_array_create(uint32_t p_vertex_count, VertexFo VertexArray vertex_array; + if (p_offsets.is_empty()) { + vertex_array.offsets.resize_zeroed(p_src_buffers.size()); + } else { + ERR_FAIL_COND_V(p_offsets.size() != p_src_buffers.size(), RID()); + vertex_array.offsets = p_offsets; + } + vertex_array.vertex_count = p_vertex_count; vertex_array.description = p_vertex_format; vertex_array.max_instances_allowed = 0xFFFFFFFF; // By default as many as you want. @@ -4423,7 +4430,6 @@ RID RenderingDeviceVulkan::vertex_array_create(uint32_t p_vertex_count, VertexFo } vertex_array.buffers.push_back(buffer->buffer); - vertex_array.offsets.push_back(0); // Offset unused, but passing anyway. } RID id = vertex_array_owner.make_rid(vertex_array); @@ -6315,7 +6321,7 @@ void RenderingDeviceVulkan::uniform_set_set_invalidation_callback(RID p_uniform_ us->invalidated_callback_userdata = p_userdata; } -Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, uint32_t p_post_barrier) { +Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, BitField<BarrierMask> p_post_barrier) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER, @@ -6325,7 +6331,7 @@ Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint VkPipelineStageFlags dst_stage_mask = 0; VkAccessFlags dst_access = 0; - if (p_post_barrier & BARRIER_MASK_TRANSFER) { + if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { // Protect subsequent updates. dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; dst_access = VK_ACCESS_TRANSFER_WRITE_BIT; @@ -6361,7 +6367,7 @@ Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint return err; } -Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, uint32_t p_post_barrier) { +Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, BitField<BarrierMask> p_post_barrier) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG((p_size % 4) != 0, ERR_INVALID_PARAMETER, @@ -6373,7 +6379,7 @@ Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint3 VkPipelineStageFlags dst_stage_mask = 0; VkAccessFlags dst_access = 0; - if (p_post_barrier & BARRIER_MASK_TRANSFER) { + if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { // Protect subsequent updates. dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; dst_access = VK_ACCESS_TRANSFER_WRITE_BIT; @@ -6412,7 +6418,7 @@ Vector<uint8_t> RenderingDeviceVulkan::buffer_get_data(RID p_buffer) { VkPipelineShaderStageCreateFlags src_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; VkAccessFlags src_access_mask = VK_ACCESS_TRANSFER_WRITE_BIT; // Get the vulkan buffer and the potential stage/access possible. - Buffer *buffer = _get_buffer_from_owner(p_buffer, src_stage_mask, src_access_mask, BARRIER_MASK_ALL); + Buffer *buffer = _get_buffer_from_owner(p_buffer, src_stage_mask, src_access_mask, BARRIER_MASK_ALL_BARRIERS); if (!buffer) { ERR_FAIL_V_MSG(Vector<uint8_t>(), "Buffer is either invalid or this type of buffer can't be retrieved. Only Index and Vertex buffers allow retrieving."); } @@ -8084,7 +8090,7 @@ void RenderingDeviceVulkan::_draw_list_free(Rect2i *r_last_viewport) { _THREAD_SAFE_UNLOCK_ } -void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) { +void RenderingDeviceVulkan::draw_list_end(BitField<BarrierMask> p_post_barrier) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_MSG(!draw_list, "Immediate draw list is already inactive."); @@ -8106,15 +8112,15 @@ void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) { uint32_t barrier_flags = 0; uint32_t access_flags = 0; - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT /*| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT*/; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT /*| VK_ACCESS_INDIRECT_COMMAND_READ_BIT*/; } - if (p_post_barrier & BARRIER_MASK_TRANSFER) { + if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; } @@ -8592,20 +8598,20 @@ void RenderingDeviceVulkan::compute_list_add_barrier(ComputeListID p_list) { #endif } -void RenderingDeviceVulkan::compute_list_end(uint32_t p_post_barrier) { +void RenderingDeviceVulkan::compute_list_end(BitField<BarrierMask> p_post_barrier) { ERR_FAIL_COND(!compute_list); uint32_t barrier_flags = 0; uint32_t access_flags = 0; - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; } - if (p_post_barrier & BARRIER_MASK_TRANSFER) { + if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; } @@ -8673,43 +8679,45 @@ void RenderingDeviceVulkan::compute_list_end(uint32_t p_post_barrier) { _THREAD_SAFE_UNLOCK_ } -void RenderingDeviceVulkan::barrier(uint32_t p_from, uint32_t p_to) { +void RenderingDeviceVulkan::barrier(BitField<BarrierMask> p_from, BitField<BarrierMask> p_to) { uint32_t src_barrier_flags = 0; uint32_t src_access_flags = 0; - if (p_from & BARRIER_MASK_COMPUTE) { - src_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - src_access_flags |= VK_ACCESS_SHADER_WRITE_BIT; - } - if (p_from & BARRIER_MASK_RASTER) { - src_barrier_flags |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - src_access_flags |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - } - if (p_from & BARRIER_MASK_TRANSFER) { - src_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; - src_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; - } if (p_from == 0) { src_barrier_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + } else { + if (p_from.has_flag(BARRIER_MASK_COMPUTE)) { + src_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + src_access_flags |= VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_from.has_flag(BARRIER_MASK_RASTER)) { + src_barrier_flags |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + src_access_flags |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + if (p_from.has_flag(BARRIER_MASK_TRANSFER)) { + src_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + src_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + } } uint32_t dst_barrier_flags = 0; uint32_t dst_access_flags = 0; - if (p_to & BARRIER_MASK_COMPUTE) { - dst_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - } - if (p_to & BARRIER_MASK_RASTER) { - dst_barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; - dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; - } - if (p_to & BARRIER_MASK_TRANSFER) { - dst_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; - dst_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; - } if (p_to == 0) { dst_barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } else { + if (p_to.has_flag(BARRIER_MASK_COMPUTE)) { + dst_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_to.has_flag(BARRIER_MASK_RASTER)) { + dst_barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + } + if (p_to.has_flag(BARRIER_MASK_TRANSFER)) { + dst_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + dst_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; + } } _memory_barrier(src_barrier_flags, dst_barrier_flags, src_access_flags, dst_access_flags, true); diff --git a/drivers/vulkan/rendering_device_vulkan.h b/drivers/vulkan/rendering_device_vulkan.h index 7c75e9bb2e..c6e1830e90 100644 --- a/drivers/vulkan/rendering_device_vulkan.h +++ b/drivers/vulkan/rendering_device_vulkan.h @@ -162,7 +162,7 @@ class RenderingDeviceVulkan : public RenderingDevice { uint32_t texture_upload_region_size_px = 0; Vector<uint8_t> _texture_get_data_from_image(Texture *tex, VkImage p_image, VmaAllocation p_allocation, uint32_t p_layer, bool p_2d = false); - Error _texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, uint32_t p_post_barrier, bool p_use_setup_queue); + Error _texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, BitField<BarrierMask> p_post_barrier, bool p_use_setup_queue); /*****************/ /**** SAMPLER ****/ @@ -909,7 +909,7 @@ class RenderingDeviceVulkan : public RenderingDevice { Error _draw_list_setup_framebuffer(Framebuffer *p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, VkFramebuffer *r_framebuffer, VkRenderPass *r_render_pass, uint32_t *r_subpass_count); Error _draw_list_render_pass_begin(Framebuffer *framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_colors, float p_clear_depth, uint32_t p_clear_stencil, Point2i viewport_offset, Point2i viewport_size, VkFramebuffer vkframebuffer, VkRenderPass render_pass, VkCommandBuffer command_buffer, VkSubpassContents subpass_contents, const Vector<RID> &p_storage_textures); _FORCE_INLINE_ DrawList *_get_draw_list_ptr(DrawListID p_id); - Buffer *_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &dst_stage_mask, VkAccessFlags &dst_access, uint32_t p_post_barrier); + Buffer *_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &dst_stage_mask, VkAccessFlags &dst_access, BitField<BarrierMask> p_post_barrier); Error _draw_list_allocate(const Rect2i &p_viewport, uint32_t p_splits, uint32_t p_subpass); void _draw_list_free(Rect2i *r_last_viewport = nullptr); @@ -1052,17 +1052,17 @@ public: virtual RID texture_create_from_extension(TextureType p_type, DataFormat p_format, TextureSamples p_samples, uint64_t p_flags, uint64_t p_image, uint64_t p_width, uint64_t p_height, uint64_t p_depth, uint64_t p_layers); virtual RID texture_create_shared_from_slice(const TextureView &p_view, RID p_with_texture, uint32_t p_layer, uint32_t p_mipmap, uint32_t p_mipmaps = 1, TextureSliceType p_slice_type = TEXTURE_SLICE_2D); - virtual Error texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, uint32_t p_post_barrier = BARRIER_MASK_ALL); + virtual Error texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); virtual Vector<uint8_t> texture_get_data(RID p_texture, uint32_t p_layer); - virtual bool texture_is_format_supported_for_usage(DataFormat p_format, uint32_t p_usage) const; + virtual bool texture_is_format_supported_for_usage(DataFormat p_format, BitField<RenderingDevice::TextureUsageBits> p_usage) const; virtual bool texture_is_shared(RID p_texture); virtual bool texture_is_valid(RID p_texture); virtual Size2i texture_size(RID p_texture); - virtual Error texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, uint32_t p_post_barrier = BARRIER_MASK_ALL); - virtual Error texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, uint32_t p_post_barrier = BARRIER_MASK_ALL); - virtual Error texture_resolve_multisample(RID p_from_texture, RID p_to_texture, uint32_t p_post_barrier = BARRIER_MASK_ALL); + virtual Error texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); + virtual Error texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); + virtual Error texture_resolve_multisample(RID p_from_texture, RID p_to_texture, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); /*********************/ /**** FRAMEBUFFER ****/ @@ -1095,7 +1095,7 @@ public: // Internally reference counted, this ID is warranted to be unique for the same description, but needs to be freed as many times as it was allocated. virtual VertexFormatID vertex_format_create(const Vector<VertexAttribute> &p_vertex_formats); - virtual RID vertex_array_create(uint32_t p_vertex_count, VertexFormatID p_vertex_format, const Vector<RID> &p_src_buffers); + virtual RID vertex_array_create(uint32_t p_vertex_count, VertexFormatID p_vertex_format, const Vector<RID> &p_src_buffers, const Vector<uint64_t> &p_offsets = Vector<uint64_t>()); virtual RID index_buffer_create(uint32_t p_size_indices, IndexBufferFormat p_format, const Vector<uint8_t> &p_data = Vector<uint8_t>(), bool p_use_restart_indices = false); @@ -1124,8 +1124,8 @@ public: virtual bool uniform_set_is_valid(RID p_uniform_set); virtual void uniform_set_set_invalidation_callback(RID p_uniform_set, InvalidationCallback p_callback, void *p_userdata); - virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, uint32_t p_post_barrier = BARRIER_MASK_ALL); // Works for any buffer. - virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, uint32_t p_post_barrier = BARRIER_MASK_ALL); + virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); // Works for any buffer. + virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); virtual Vector<uint8_t> buffer_get_data(RID p_buffer); /*************************/ @@ -1176,7 +1176,7 @@ public: virtual DrawListID draw_list_switch_to_next_pass(); virtual Error draw_list_switch_to_next_pass_split(uint32_t p_splits, DrawListID *r_split_ids); - virtual void draw_list_end(uint32_t p_post_barrier = BARRIER_MASK_ALL); + virtual void draw_list_end(BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); /***********************/ /**** COMPUTE LISTS ****/ @@ -1191,9 +1191,9 @@ public: virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); virtual void compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads); virtual void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset); - virtual void compute_list_end(uint32_t p_post_barrier = BARRIER_MASK_ALL); + virtual void compute_list_end(BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); - virtual void barrier(uint32_t p_from = BARRIER_MASK_ALL, uint32_t p_to = BARRIER_MASK_ALL); + virtual void barrier(BitField<BarrierMask> p_from = BARRIER_MASK_ALL_BARRIERS, BitField<BarrierMask> p_to = BARRIER_MASK_ALL_BARRIERS); virtual void full_barrier(); /**************/ diff --git a/drivers/vulkan/vulkan_context.cpp b/drivers/vulkan/vulkan_context.cpp index 381df6d65e..028c7dca6f 100644 --- a/drivers/vulkan/vulkan_context.cpp +++ b/drivers/vulkan/vulkan_context.cpp @@ -68,7 +68,7 @@ Vector<VkAttachmentReference> VulkanContext::_convert_VkAttachmentReference2(uin } VkResult VulkanContext::vkCreateRenderPass2KHR(VkDevice p_device, const VkRenderPassCreateInfo2 *p_create_info, const VkAllocationCallbacks *p_allocator, VkRenderPass *p_render_pass) { - if (has_renderpass2_ext) { + if (is_device_extension_enabled(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME)) { if (fpCreateRenderPass2KHR == nullptr) { fpCreateRenderPass2KHR = (PFN_vkCreateRenderPass2KHR)vkGetDeviceProcAddr(p_device, "vkCreateRenderPass2KHR"); } @@ -400,16 +400,28 @@ Error VulkanContext::_obtain_vulkan_version() { return OK; } -Error VulkanContext::_initialize_extensions() { - uint32_t instance_extension_count = 0; +bool VulkanContext::instance_extensions_initialized = false; +HashMap<CharString, bool> VulkanContext::requested_instance_extensions; + +void VulkanContext::register_requested_instance_extension(const CharString &extension_name, bool p_required) { + ERR_FAIL_COND_MSG(instance_extensions_initialized, "You can only registered extensions before the Vulkan instance is created"); + ERR_FAIL_COND(requested_instance_extensions.has(extension_name)); + + requested_instance_extensions[extension_name] = p_required; +} + +Error VulkanContext::_initialize_instance_extensions() { + enabled_instance_extension_names.clear(); - enabled_extension_count = 0; - enabled_debug_utils = false; - enabled_debug_report = false; - // Look for instance extensions. - VkBool32 surfaceExtFound = 0; - VkBool32 platformSurfaceExtFound = 0; - memset(extension_names, 0, sizeof(extension_names)); + // Make sure our core extensions are here + register_requested_instance_extension(VK_KHR_SURFACE_EXTENSION_NAME, true); + register_requested_instance_extension(_get_platform_surface_extension(), true); + + if (_use_validation_layers()) { + register_requested_instance_extension(VK_EXT_DEBUG_REPORT_EXTENSION_NAME, false); + } + + register_requested_instance_extension(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, false); // Only enable debug utils in verbose mode or DEV_ENABLED. // End users would get spammed with messages of varying verbosity due to the @@ -420,54 +432,141 @@ Error VulkanContext::_initialize_extensions() { #else bool want_debug_utils = OS::get_singleton()->is_stdout_verbose(); #endif + if (want_debug_utils) { + register_requested_instance_extension(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, false); + } + // Load instance extensions that are available... + uint32_t instance_extension_count = 0; VkResult err = vkEnumerateInstanceExtensionProperties(nullptr, &instance_extension_count, nullptr); ERR_FAIL_COND_V(err != VK_SUCCESS && err != VK_INCOMPLETE, ERR_CANT_CREATE); + ERR_FAIL_COND_V_MSG(instance_extension_count == 0, ERR_CANT_CREATE, "No instance extensions found, is a driver installed?"); - if (instance_extension_count > 0) { - VkExtensionProperties *instance_extensions = (VkExtensionProperties *)malloc(sizeof(VkExtensionProperties) * instance_extension_count); - err = vkEnumerateInstanceExtensionProperties(nullptr, &instance_extension_count, instance_extensions); - if (err != VK_SUCCESS && err != VK_INCOMPLETE) { - free(instance_extensions); - ERR_FAIL_V(ERR_CANT_CREATE); + VkExtensionProperties *instance_extensions = (VkExtensionProperties *)malloc(sizeof(VkExtensionProperties) * instance_extension_count); + err = vkEnumerateInstanceExtensionProperties(nullptr, &instance_extension_count, instance_extensions); + if (err != VK_SUCCESS && err != VK_INCOMPLETE) { + free(instance_extensions); + ERR_FAIL_V(ERR_CANT_CREATE); + } +#ifdef DEV_ENABLED + for (uint32_t i = 0; i < instance_extension_count; i++) { + print_verbose(String("VULKAN: Found instance extension ") + String(instance_extensions[i].extensionName)); + } +#endif + + // Enable all extensions that are supported and requested + for (uint32_t i = 0; i < instance_extension_count; i++) { + CharString extension_name(instance_extensions[i].extensionName); + if (requested_instance_extensions.has(extension_name)) { + enabled_instance_extension_names.insert(extension_name); } - for (uint32_t i = 0; i < instance_extension_count; i++) { - if (!strcmp(VK_KHR_SURFACE_EXTENSION_NAME, instance_extensions[i].extensionName)) { - surfaceExtFound = 1; - extension_names[enabled_extension_count++] = VK_KHR_SURFACE_EXTENSION_NAME; - } + } - if (!strcmp(_get_platform_surface_extension(), instance_extensions[i].extensionName)) { - platformSurfaceExtFound = 1; - extension_names[enabled_extension_count++] = _get_platform_surface_extension(); - } - if (!strcmp(VK_EXT_DEBUG_REPORT_EXTENSION_NAME, instance_extensions[i].extensionName)) { - if (_use_validation_layers()) { - extension_names[enabled_extension_count++] = VK_EXT_DEBUG_REPORT_EXTENSION_NAME; - enabled_debug_report = true; - } - } - if (!strcmp(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, instance_extensions[i].extensionName)) { - if (want_debug_utils) { - extension_names[enabled_extension_count++] = VK_EXT_DEBUG_UTILS_EXTENSION_NAME; - enabled_debug_utils = true; - } - } - if (!strcmp(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, instance_extensions[i].extensionName)) { - extension_names[enabled_extension_count++] = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME; - } - if (enabled_extension_count >= MAX_EXTENSIONS) { + // Now check our requested extensions + for (KeyValue<CharString, bool> &requested_extension : requested_instance_extensions) { + if (!enabled_instance_extension_names.has(requested_extension.key)) { + if (requested_extension.value) { free(instance_extensions); - ERR_FAIL_V_MSG(ERR_BUG, "Enabled extension count reaches MAX_EXTENSIONS, BUG"); + ERR_FAIL_V_MSG(ERR_BUG, String("Required extension ") + String(requested_extension.key) + String(" not found, is a driver installed?")); + } else { + print_verbose(String("Optional extension ") + String(requested_extension.key) + String(" not found")); } } + } - free(instance_extensions); + free(instance_extensions); + + instance_extensions_initialized = true; + return OK; +} + +bool VulkanContext::device_extensions_initialized = false; +HashMap<CharString, bool> VulkanContext::requested_device_extensions; + +void VulkanContext::register_requested_device_extension(const CharString &extension_name, bool p_required) { + ERR_FAIL_COND_MSG(device_extensions_initialized, "You can only registered extensions before the Vulkan instance is created"); + ERR_FAIL_COND(requested_device_extensions.has(extension_name)); + + requested_device_extensions[extension_name] = p_required; +} + +Error VulkanContext::_initialize_device_extensions() { + // Look for device extensions. + enabled_device_extension_names.clear(); + + // Make sure our core extensions are here + register_requested_device_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true); + + register_requested_device_extension(VK_KHR_MULTIVIEW_EXTENSION_NAME, false); + register_requested_device_extension(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME, false); + register_requested_device_extension(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME, false); + register_requested_device_extension(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); + register_requested_device_extension(VK_KHR_16BIT_STORAGE_EXTENSION_NAME, false); + + // TODO consider the following extensions: + // - VK_KHR_spirv_1_4 + // - VK_KHR_swapchain_mutable_format + // - VK_EXT_full_screen_exclusive + // - VK_EXT_hdr_metadata + // - VK_KHR_depth_stencil_resolve + + // Even though the user "enabled" the extension via the command + // line, we must make sure that it's enumerated for use with the + // device. Therefore, disable it here, and re-enable it again if + // enumerated. + if (VK_KHR_incremental_present_enabled) { + register_requested_device_extension(VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME, false); + } + if (VK_GOOGLE_display_timing_enabled) { + register_requested_device_extension(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME, false); + } + + // obtain available device extensions + uint32_t device_extension_count = 0; + VkResult err = vkEnumerateDeviceExtensionProperties(gpu, nullptr, &device_extension_count, nullptr); + ERR_FAIL_COND_V(err, ERR_CANT_CREATE); + ERR_FAIL_COND_V_MSG(device_extension_count == 0, ERR_CANT_CREATE, + "vkEnumerateDeviceExtensionProperties failed to find any extensions\n\n" + "Do you have a compatible Vulkan installable client driver (ICD) installed?\n" + "vkCreateInstance Failure"); + + VkExtensionProperties *device_extensions = (VkExtensionProperties *)malloc(sizeof(VkExtensionProperties) * device_extension_count); + err = vkEnumerateDeviceExtensionProperties(gpu, nullptr, &device_extension_count, device_extensions); + if (err) { + free(device_extensions); + ERR_FAIL_V(ERR_CANT_CREATE); + } + +#ifdef DEV_ENABLED + for (uint32_t i = 0; i < device_extension_count; i++) { + print_verbose(String("VULKAN: Found device extension ") + String(device_extensions[i].extensionName)); + } +#endif + + // Enable all extensions that are supported and requested + for (uint32_t i = 0; i < device_extension_count; i++) { + CharString extension_name(device_extensions[i].extensionName); + if (requested_device_extensions.has(extension_name)) { + enabled_device_extension_names.insert(extension_name); + } + } + + // Now check our requested extensions + for (KeyValue<CharString, bool> &requested_extension : requested_device_extensions) { + if (!enabled_device_extension_names.has(requested_extension.key)) { + if (requested_extension.value) { + free(device_extensions); + ERR_FAIL_V_MSG(ERR_BUG, + String("vkEnumerateDeviceExtensionProperties failed to find the ") + String(requested_extension.key) + String(" extension.\n\nDo you have a compatible Vulkan installable client driver (ICD) installed?\nvkCreateInstance Failure")); + } else { + print_verbose(String("Optional extension ") + String(requested_extension.key) + String(" not found")); + } + } } - ERR_FAIL_COND_V_MSG(!surfaceExtFound, ERR_CANT_CREATE, "No surface extension found, is a driver installed?"); - ERR_FAIL_COND_V_MSG(!platformSurfaceExtFound, ERR_CANT_CREATE, "No platform surface extension found, is a driver installed?"); + free(device_extensions); + device_extensions_initialized = true; return OK; } @@ -644,184 +743,176 @@ Error VulkanContext::_check_capabilities() { storage_buffer_capabilities.storage_push_constant_16_is_supported = false; storage_buffer_capabilities.storage_input_output_16 = false; - // Check for extended features. - PFN_vkGetPhysicalDeviceFeatures2 vkGetPhysicalDeviceFeatures2_func = (PFN_vkGetPhysicalDeviceFeatures2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceFeatures2"); - if (vkGetPhysicalDeviceFeatures2_func == nullptr) { - // In Vulkan 1.0 might be accessible under its original extension name. - vkGetPhysicalDeviceFeatures2_func = (PFN_vkGetPhysicalDeviceFeatures2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceFeatures2KHR"); - } - if (vkGetPhysicalDeviceFeatures2_func != nullptr) { - // Check our extended features. - VkPhysicalDeviceFragmentShadingRateFeaturesKHR vrs_features = { - /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR, - /*pNext*/ nullptr, - /*pipelineFragmentShadingRate*/ false, - /*primitiveFragmentShadingRate*/ false, - /*attachmentFragmentShadingRate*/ false, - }; + if (is_instance_extension_enabled(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME)) { + // Check for extended features. + PFN_vkGetPhysicalDeviceFeatures2 vkGetPhysicalDeviceFeatures2_func = (PFN_vkGetPhysicalDeviceFeatures2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceFeatures2"); + if (vkGetPhysicalDeviceFeatures2_func == nullptr) { + // In Vulkan 1.0 might be accessible under its original extension name. + vkGetPhysicalDeviceFeatures2_func = (PFN_vkGetPhysicalDeviceFeatures2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceFeatures2KHR"); + } + if (vkGetPhysicalDeviceFeatures2_func != nullptr) { + // Check our extended features. + VkPhysicalDeviceFragmentShadingRateFeaturesKHR vrs_features = { + /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR, + /*pNext*/ nullptr, + /*pipelineFragmentShadingRate*/ false, + /*primitiveFragmentShadingRate*/ false, + /*attachmentFragmentShadingRate*/ false, + }; - VkPhysicalDeviceShaderFloat16Int8FeaturesKHR shader_features = { - /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR, - /*pNext*/ &vrs_features, - /*shaderFloat16*/ false, - /*shaderInt8*/ false, - }; + VkPhysicalDeviceShaderFloat16Int8FeaturesKHR shader_features = { + /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR, + /*pNext*/ &vrs_features, + /*shaderFloat16*/ false, + /*shaderInt8*/ false, + }; - VkPhysicalDevice16BitStorageFeaturesKHR storage_feature = { - /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, - /*pNext*/ &shader_features, - /*storageBuffer16BitAccess*/ false, - /*uniformAndStorageBuffer16BitAccess*/ false, - /*storagePushConstant16*/ false, - /*storageInputOutput16*/ false, - }; + VkPhysicalDevice16BitStorageFeaturesKHR storage_feature = { + /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, + /*pNext*/ &shader_features, + /*storageBuffer16BitAccess*/ false, + /*uniformAndStorageBuffer16BitAccess*/ false, + /*storagePushConstant16*/ false, + /*storageInputOutput16*/ false, + }; - VkPhysicalDeviceMultiviewFeatures multiview_features = { - /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES, - /*pNext*/ &storage_feature, - /*multiview*/ false, - /*multiviewGeometryShader*/ false, - /*multiviewTessellationShader*/ false, - }; + VkPhysicalDeviceMultiviewFeatures multiview_features = { + /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES, + /*pNext*/ &storage_feature, + /*multiview*/ false, + /*multiviewGeometryShader*/ false, + /*multiviewTessellationShader*/ false, + }; - VkPhysicalDeviceFeatures2 device_features; - device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; - device_features.pNext = &multiview_features; + VkPhysicalDeviceFeatures2 device_features; + device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + device_features.pNext = &multiview_features; - vkGetPhysicalDeviceFeatures2_func(gpu, &device_features); + vkGetPhysicalDeviceFeatures2_func(gpu, &device_features); - // We must check that the relative extension is present before assuming a - // feature as enabled. Actually, according to the spec we shouldn't add the - // structs in pNext at all, but this works fine. - // See also: https://github.com/godotengine/godot/issues/65409 - for (uint32_t i = 0; i < enabled_extension_count; ++i) { - if (!strcmp(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME, extension_names[i])) { + // We must check that the relative extension is present before assuming a + // feature as enabled. Actually, according to the spec we shouldn't add the + // structs in pNext at all, but this works fine. + // See also: https://github.com/godotengine/godot/issues/65409 + if (is_device_extension_enabled(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { vrs_capabilities.pipeline_vrs_supported = vrs_features.pipelineFragmentShadingRate; vrs_capabilities.primitive_vrs_supported = vrs_features.primitiveFragmentShadingRate; vrs_capabilities.attachment_vrs_supported = vrs_features.attachmentFragmentShadingRate; - - continue; } - if (!strcmp(VK_KHR_MULTIVIEW_EXTENSION_NAME, extension_names[i])) { + if (is_device_extension_enabled(VK_KHR_MULTIVIEW_EXTENSION_NAME)) { multiview_capabilities.is_supported = multiview_features.multiview; multiview_capabilities.geometry_shader_is_supported = multiview_features.multiviewGeometryShader; multiview_capabilities.tessellation_shader_is_supported = multiview_features.multiviewTessellationShader; - - continue; } - if (!strcmp(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, extension_names[i])) { + if (is_device_extension_enabled(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) { shader_capabilities.shader_float16_is_supported = shader_features.shaderFloat16; shader_capabilities.shader_int8_is_supported = shader_features.shaderInt8; - - continue; } - if (!strcmp(VK_KHR_16BIT_STORAGE_EXTENSION_NAME, extension_names[i])) { + if (is_device_extension_enabled(VK_KHR_16BIT_STORAGE_EXTENSION_NAME)) { storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported = storage_feature.storageBuffer16BitAccess; storage_buffer_capabilities.uniform_and_storage_buffer_16_bit_access_is_supported = storage_feature.uniformAndStorageBuffer16BitAccess; storage_buffer_capabilities.storage_push_constant_16_is_supported = storage_feature.storagePushConstant16; storage_buffer_capabilities.storage_input_output_16 = storage_feature.storageInputOutput16; - - continue; } } - } - // Check extended properties. - PFN_vkGetPhysicalDeviceProperties2 device_properties_func = (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceProperties2"); - if (device_properties_func == nullptr) { - // In Vulkan 1.0 might be accessible under its original extension name. - device_properties_func = (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceProperties2KHR"); - } - if (device_properties_func != nullptr) { - VkPhysicalDeviceFragmentShadingRatePropertiesKHR vrsProperties{}; - VkPhysicalDeviceMultiviewProperties multiviewProperties{}; - VkPhysicalDeviceSubgroupProperties subgroupProperties{}; - VkPhysicalDeviceProperties2 physicalDeviceProperties{}; - void *nextptr = nullptr; - - if (!(vulkan_major == 1 && vulkan_minor == 0)) { - subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; - subgroupProperties.pNext = nextptr; - - nextptr = &subgroupProperties; + // Check extended properties. + PFN_vkGetPhysicalDeviceProperties2 device_properties_func = (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceProperties2"); + if (device_properties_func == nullptr) { + // In Vulkan 1.0 might be accessible under its original extension name. + device_properties_func = (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceProperties2KHR"); } + if (device_properties_func != nullptr) { + VkPhysicalDeviceFragmentShadingRatePropertiesKHR vrsProperties{}; + VkPhysicalDeviceMultiviewProperties multiviewProperties{}; + VkPhysicalDeviceSubgroupProperties subgroupProperties{}; + VkPhysicalDeviceProperties2 physicalDeviceProperties{}; + void *nextptr = nullptr; + + if (!(vulkan_major == 1 && vulkan_minor == 0)) { + subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; + subgroupProperties.pNext = nextptr; + + nextptr = &subgroupProperties; + } - if (multiview_capabilities.is_supported) { - multiviewProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES; - multiviewProperties.pNext = nextptr; + if (multiview_capabilities.is_supported) { + multiviewProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES; + multiviewProperties.pNext = nextptr; - nextptr = &multiviewProperties; - } - - if (vrs_capabilities.attachment_vrs_supported) { - vrsProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; - vrsProperties.pNext = nextptr; + nextptr = &multiviewProperties; + } - nextptr = &vrsProperties; - } + if (vrs_capabilities.attachment_vrs_supported) { + vrsProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; + vrsProperties.pNext = nextptr; - physicalDeviceProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - physicalDeviceProperties.pNext = nextptr; + nextptr = &vrsProperties; + } - device_properties_func(gpu, &physicalDeviceProperties); + physicalDeviceProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + physicalDeviceProperties.pNext = nextptr; - subgroup_capabilities.size = subgroupProperties.subgroupSize; - subgroup_capabilities.supportedStages = subgroupProperties.supportedStages; - subgroup_capabilities.supportedOperations = subgroupProperties.supportedOperations; - // Note: quadOperationsInAllStages will be true if: - // - supportedStages has VK_SHADER_STAGE_ALL_GRAPHICS + VK_SHADER_STAGE_COMPUTE_BIT. - // - supportedOperations has VK_SUBGROUP_FEATURE_QUAD_BIT. - subgroup_capabilities.quadOperationsInAllStages = subgroupProperties.quadOperationsInAllStages; + device_properties_func(gpu, &physicalDeviceProperties); - if (vrs_capabilities.pipeline_vrs_supported || vrs_capabilities.primitive_vrs_supported || vrs_capabilities.attachment_vrs_supported) { - print_verbose("- Vulkan Variable Rate Shading supported:"); - if (vrs_capabilities.pipeline_vrs_supported) { - print_verbose(" Pipeline fragment shading rate"); - } - if (vrs_capabilities.primitive_vrs_supported) { - print_verbose(" Primitive fragment shading rate"); - } - if (vrs_capabilities.attachment_vrs_supported) { - // TODO expose these somehow to the end user. - vrs_capabilities.min_texel_size.x = vrsProperties.minFragmentShadingRateAttachmentTexelSize.width; - vrs_capabilities.min_texel_size.y = vrsProperties.minFragmentShadingRateAttachmentTexelSize.height; - vrs_capabilities.max_texel_size.x = vrsProperties.maxFragmentShadingRateAttachmentTexelSize.width; - vrs_capabilities.max_texel_size.y = vrsProperties.maxFragmentShadingRateAttachmentTexelSize.height; + subgroup_capabilities.size = subgroupProperties.subgroupSize; + subgroup_capabilities.supportedStages = subgroupProperties.supportedStages; + subgroup_capabilities.supportedOperations = subgroupProperties.supportedOperations; + // Note: quadOperationsInAllStages will be true if: + // - supportedStages has VK_SHADER_STAGE_ALL_GRAPHICS + VK_SHADER_STAGE_COMPUTE_BIT. + // - supportedOperations has VK_SUBGROUP_FEATURE_QUAD_BIT. + subgroup_capabilities.quadOperationsInAllStages = subgroupProperties.quadOperationsInAllStages; - // We'll attempt to default to a texel size of 16x16 - vrs_capabilities.texel_size.x = CLAMP(16, vrs_capabilities.min_texel_size.x, vrs_capabilities.max_texel_size.x); - vrs_capabilities.texel_size.y = CLAMP(16, vrs_capabilities.min_texel_size.y, vrs_capabilities.max_texel_size.y); + if (vrs_capabilities.pipeline_vrs_supported || vrs_capabilities.primitive_vrs_supported || vrs_capabilities.attachment_vrs_supported) { + print_verbose("- Vulkan Variable Rate Shading supported:"); + if (vrs_capabilities.pipeline_vrs_supported) { + print_verbose(" Pipeline fragment shading rate"); + } + if (vrs_capabilities.primitive_vrs_supported) { + print_verbose(" Primitive fragment shading rate"); + } + if (vrs_capabilities.attachment_vrs_supported) { + // TODO expose these somehow to the end user. + vrs_capabilities.min_texel_size.x = vrsProperties.minFragmentShadingRateAttachmentTexelSize.width; + vrs_capabilities.min_texel_size.y = vrsProperties.minFragmentShadingRateAttachmentTexelSize.height; + vrs_capabilities.max_texel_size.x = vrsProperties.maxFragmentShadingRateAttachmentTexelSize.width; + vrs_capabilities.max_texel_size.y = vrsProperties.maxFragmentShadingRateAttachmentTexelSize.height; + + // We'll attempt to default to a texel size of 16x16 + vrs_capabilities.texel_size.x = CLAMP(16, vrs_capabilities.min_texel_size.x, vrs_capabilities.max_texel_size.x); + vrs_capabilities.texel_size.y = CLAMP(16, vrs_capabilities.min_texel_size.y, vrs_capabilities.max_texel_size.y); + + print_verbose(String(" Attachment fragment shading rate") + String(", min texel size: (") + itos(vrs_capabilities.min_texel_size.x) + String(", ") + itos(vrs_capabilities.min_texel_size.y) + String(")") + String(", max texel size: (") + itos(vrs_capabilities.max_texel_size.x) + String(", ") + itos(vrs_capabilities.max_texel_size.y) + String(")")); + } - print_verbose(String(" Attachment fragment shading rate") + String(", min texel size: (") + itos(vrs_capabilities.min_texel_size.x) + String(", ") + itos(vrs_capabilities.min_texel_size.y) + String(")") + String(", max texel size: (") + itos(vrs_capabilities.max_texel_size.x) + String(", ") + itos(vrs_capabilities.max_texel_size.y) + String(")")); + } else { + print_verbose("- Vulkan Variable Rate Shading not supported"); } - } else { - print_verbose("- Vulkan Variable Rate Shading not supported"); - } + if (multiview_capabilities.is_supported) { + multiview_capabilities.max_view_count = multiviewProperties.maxMultiviewViewCount; + multiview_capabilities.max_instance_count = multiviewProperties.maxMultiviewInstanceIndex; - if (multiview_capabilities.is_supported) { - multiview_capabilities.max_view_count = multiviewProperties.maxMultiviewViewCount; - multiview_capabilities.max_instance_count = multiviewProperties.maxMultiviewInstanceIndex; + print_verbose("- Vulkan multiview supported:"); + print_verbose(" max view count: " + itos(multiview_capabilities.max_view_count)); + print_verbose(" max instances: " + itos(multiview_capabilities.max_instance_count)); + } else { + print_verbose("- Vulkan multiview not supported"); + } - print_verbose("- Vulkan multiview supported:"); - print_verbose(" max view count: " + itos(multiview_capabilities.max_view_count)); - print_verbose(" max instances: " + itos(multiview_capabilities.max_instance_count)); + print_verbose("- Vulkan subgroup:"); + print_verbose(" size: " + itos(subgroup_capabilities.size)); + print_verbose(" stages: " + subgroup_capabilities.supported_stages_desc()); + print_verbose(" supported ops: " + subgroup_capabilities.supported_operations_desc()); + if (subgroup_capabilities.quadOperationsInAllStages) { + print_verbose(" quad operations in all stages"); + } } else { - print_verbose("- Vulkan multiview not supported"); - } - - print_verbose("- Vulkan subgroup:"); - print_verbose(" size: " + itos(subgroup_capabilities.size)); - print_verbose(" stages: " + subgroup_capabilities.supported_stages_desc()); - print_verbose(" supported ops: " + subgroup_capabilities.supported_operations_desc()); - if (subgroup_capabilities.quadOperationsInAllStages) { - print_verbose(" quad operations in all stages"); + print_verbose("- Couldn't call vkGetPhysicalDeviceProperties2"); } - } else { - print_verbose("- Couldn't call vkGetPhysicalDeviceProperties2"); } return OK; @@ -833,12 +924,19 @@ Error VulkanContext::_create_instance() { // Initialize extensions. { - Error err = _initialize_extensions(); + Error err = _initialize_instance_extensions(); if (err != OK) { return err; } } + int enabled_extension_count = 0; + const char *enabled_extension_names[MAX_EXTENSIONS]; + ERR_FAIL_COND_V(enabled_instance_extension_names.size() > MAX_EXTENSIONS, ERR_CANT_CREATE); + for (const CharString &extension_name : enabled_instance_extension_names) { + enabled_extension_names[enabled_extension_count++] = extension_name.ptr(); + } + CharString cs = GLOBAL_GET("application/config/name").operator String().utf8(); const VkApplicationInfo app = { /*sType*/ VK_STRUCTURE_TYPE_APPLICATION_INFO, @@ -853,7 +951,7 @@ Error VulkanContext::_create_instance() { inst_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; inst_info.pApplicationInfo = &app; inst_info.enabledExtensionCount = enabled_extension_count; - inst_info.ppEnabledExtensionNames = (const char *const *)extension_names; + inst_info.ppEnabledExtensionNames = (const char *const *)enabled_extension_names; if (_use_validation_layers()) { _get_preferred_validation_layers(&inst_info.enabledLayerCount, &inst_info.ppEnabledLayerNames); } @@ -863,9 +961,9 @@ Error VulkanContext::_create_instance() { * After the instance is created, we use the instance-based * function to register the final callback. */ - VkDebugUtilsMessengerCreateInfoEXT dbg_messenger_create_info; - VkDebugReportCallbackCreateInfoEXT dbg_report_callback_create_info{}; - if (enabled_debug_utils) { + VkDebugUtilsMessengerCreateInfoEXT dbg_messenger_create_info = {}; + VkDebugReportCallbackCreateInfoEXT dbg_report_callback_create_info = {}; + if (is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { // VK_EXT_debug_utils style. dbg_messenger_create_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; dbg_messenger_create_info.pNext = nullptr; @@ -878,7 +976,7 @@ Error VulkanContext::_create_instance() { dbg_messenger_create_info.pfnUserCallback = _debug_messenger_callback; dbg_messenger_create_info.pUserData = this; inst_info.pNext = &dbg_messenger_create_info; - } else if (enabled_debug_report) { + } else if (is_instance_extension_enabled(VK_EXT_DEBUG_REPORT_EXTENSION_NAME)) { dbg_report_callback_create_info.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT; dbg_report_callback_create_info.flags = VK_DEBUG_REPORT_INFORMATION_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT | @@ -918,7 +1016,7 @@ Error VulkanContext::_create_instance() { volkLoadInstance(inst); #endif - if (enabled_debug_utils) { + if (is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { // Setup VK_EXT_debug_utils function pointers always (we use them for debug labels and names). CreateDebugUtilsMessengerEXT = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(inst, "vkCreateDebugUtilsMessengerEXT"); @@ -959,7 +1057,7 @@ Error VulkanContext::_create_instance() { ERR_FAIL_V(ERR_CANT_CREATE); break; } - } else if (enabled_debug_report) { + } else if (is_instance_extension_enabled(VK_EXT_DEBUG_REPORT_EXTENSION_NAME)) { CreateDebugReportCallbackEXT = (PFN_vkCreateDebugReportCallbackEXT)vkGetInstanceProcAddr(inst, "vkCreateDebugReportCallbackEXT"); DebugReportMessageEXT = (PFN_vkDebugReportMessageEXT)vkGetInstanceProcAddr(inst, "vkDebugReportMessageEXT"); DestroyDebugReportCallbackEXT = (PFN_vkDestroyDebugReportCallbackEXT)vkGetInstanceProcAddr(inst, "vkDestroyDebugReportCallbackEXT"); @@ -1140,12 +1238,6 @@ Error VulkanContext::_create_physical_device(VkSurfaceKHR p_surface) { free(physical_devices); - // Look for device extensions. - uint32_t device_extension_count = 0; - VkBool32 swapchainExtFound = 0; - enabled_extension_count = 0; - memset(extension_names, 0, sizeof(extension_names)); - // Get identifier properties. vkGetPhysicalDeviceProperties(gpu, &gpu_props); @@ -1171,84 +1263,13 @@ Error VulkanContext::_create_physical_device(VkSurfaceKHR p_surface) { device_api_version = gpu_props.apiVersion; - err = vkEnumerateDeviceExtensionProperties(gpu, nullptr, &device_extension_count, nullptr); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - - if (device_extension_count > 0) { - VkExtensionProperties *device_extensions = (VkExtensionProperties *)malloc(sizeof(VkExtensionProperties) * device_extension_count); - err = vkEnumerateDeviceExtensionProperties(gpu, nullptr, &device_extension_count, device_extensions); - if (err) { - free(device_extensions); - ERR_FAIL_V(ERR_CANT_CREATE); - } - - for (uint32_t i = 0; i < device_extension_count; i++) { - if (!strcmp(VK_KHR_SWAPCHAIN_EXTENSION_NAME, device_extensions[i].extensionName)) { - swapchainExtFound = 1; - extension_names[enabled_extension_count++] = VK_KHR_SWAPCHAIN_EXTENSION_NAME; - } - if (!strcmp(VK_KHR_MULTIVIEW_EXTENSION_NAME, device_extensions[i].extensionName)) { - // If multiview is supported, enable it. - extension_names[enabled_extension_count++] = VK_KHR_MULTIVIEW_EXTENSION_NAME; - } - if (!strcmp(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME, device_extensions[i].extensionName)) { - // if shading rate image is supported, enable it - extension_names[enabled_extension_count++] = VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME; - } - if (!strcmp(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME, device_extensions[i].extensionName)) { - has_renderpass2_ext = true; - extension_names[enabled_extension_count++] = VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME; - } - if (enabled_extension_count >= MAX_EXTENSIONS) { - free(device_extensions); - ERR_FAIL_V_MSG(ERR_BUG, "Enabled extension count reaches MAX_EXTENSIONS, BUG"); - } - } - - if (VK_KHR_incremental_present_enabled) { - // Even though the user "enabled" the extension via the command - // line, we must make sure that it's enumerated for use with the - // device. Therefore, disable it here, and re-enable it again if - // enumerated. - VK_KHR_incremental_present_enabled = false; - for (uint32_t i = 0; i < device_extension_count; i++) { - if (!strcmp(VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME, device_extensions[i].extensionName)) { - extension_names[enabled_extension_count++] = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME; - VK_KHR_incremental_present_enabled = true; - } - if (enabled_extension_count >= MAX_EXTENSIONS) { - free(device_extensions); - ERR_FAIL_V_MSG(ERR_BUG, "Enabled extension count reaches MAX_EXTENSIONS, BUG"); - } - } - } - - if (VK_GOOGLE_display_timing_enabled) { - // Even though the user "enabled" the extension via the command - // line, we must make sure that it's enumerated for use with the - // device. Therefore, disable it here, and re-enable it again if - // enumerated. - VK_GOOGLE_display_timing_enabled = false; - for (uint32_t i = 0; i < device_extension_count; i++) { - if (!strcmp(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME, device_extensions[i].extensionName)) { - extension_names[enabled_extension_count++] = VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME; - VK_GOOGLE_display_timing_enabled = true; - } - if (enabled_extension_count >= MAX_EXTENSIONS) { - free(device_extensions); - ERR_FAIL_V_MSG(ERR_BUG, "Enabled extension count reaches MAX_EXTENSIONS, BUG"); - } - } + { + Error _err = _initialize_device_extensions(); + if (_err != OK) { + return _err; } - - free(device_extensions); } - ERR_FAIL_COND_V_MSG(!swapchainExtFound, ERR_CANT_CREATE, - "vkEnumerateDeviceExtensionProperties failed to find the " VK_KHR_SWAPCHAIN_EXTENSION_NAME - " extension.\n\nDo you have a compatible Vulkan installable client driver (ICD) installed?\n" - "vkCreateInstance Failure"); - // Call with nullptr data to get count. vkGetPhysicalDeviceQueueFamilyProperties(gpu, &queue_family_count, nullptr); ERR_FAIL_COND_V(queue_family_count == 0, ERR_CANT_CREATE); @@ -1309,7 +1330,7 @@ Error VulkanContext::_create_device() { }; nextptr = &shader_features; - VkPhysicalDeviceFragmentShadingRateFeaturesKHR vrs_features; + VkPhysicalDeviceFragmentShadingRateFeaturesKHR vrs_features = {}; if (vrs_capabilities.pipeline_vrs_supported || vrs_capabilities.primitive_vrs_supported || vrs_capabilities.attachment_vrs_supported) { // Insert into our chain to enable these features if they are available. vrs_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR; @@ -1321,9 +1342,9 @@ Error VulkanContext::_create_device() { nextptr = &vrs_features; } - VkPhysicalDeviceVulkan11Features vulkan11features; - VkPhysicalDevice16BitStorageFeaturesKHR storage_feature; - VkPhysicalDeviceMultiviewFeatures multiview_features; + VkPhysicalDeviceVulkan11Features vulkan11features = {}; + VkPhysicalDevice16BitStorageFeaturesKHR storage_feature = {}; + VkPhysicalDeviceMultiviewFeatures multiview_features = {}; if (vulkan_major > 1 || vulkan_minor >= 2) { // In Vulkan 1.2 and newer we use a newer struct to enable various features. @@ -1362,6 +1383,13 @@ Error VulkanContext::_create_device() { } } + uint32_t enabled_extension_count = 0; + const char *enabled_extension_names[MAX_EXTENSIONS]; + ERR_FAIL_COND_V(enabled_device_extension_names.size() > MAX_EXTENSIONS, ERR_CANT_CREATE); + for (const CharString &extension_name : enabled_device_extension_names) { + enabled_extension_names[enabled_extension_count++] = extension_name.ptr(); + } + VkDeviceCreateInfo sdevice = { /*sType*/ VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, /*pNext*/ nextptr, @@ -1371,7 +1399,7 @@ Error VulkanContext::_create_device() { /*enabledLayerCount*/ 0, /*ppEnabledLayerNames*/ nullptr, /*enabledExtensionCount*/ enabled_extension_count, - /*ppEnabledExtensionNames*/ (const char *const *)extension_names, + /*ppEnabledExtensionNames*/ (const char *const *)enabled_extension_names, /*pEnabledFeatures*/ &physical_device_features, // If specific features are required, pass them in here. }; if (separate_present_queue) { @@ -1459,7 +1487,7 @@ Error VulkanContext::_initialize_queues(VkSurfaceKHR p_surface) { GET_DEVICE_PROC_ADDR(device, GetSwapchainImagesKHR); GET_DEVICE_PROC_ADDR(device, AcquireNextImageKHR); GET_DEVICE_PROC_ADDR(device, QueuePresentKHR); - if (VK_GOOGLE_display_timing_enabled) { + if (is_device_extension_enabled(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME)) { GET_DEVICE_PROC_ADDR(device, GetRefreshCycleDurationGOOGLE); GET_DEVICE_PROC_ADDR(device, GetPastPresentationTimingGOOGLE); } @@ -2214,7 +2242,7 @@ Error VulkanContext::swap_buffers() { VkResult err; #if 0 - if (VK_GOOGLE_display_timing_enabled) { + if (is_device_extension_enabled(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME)) { // Look at what happened to previous presents, and make appropriate // adjustments in timing. DemoUpdateTargetIPD(demo); @@ -2335,7 +2363,7 @@ Error VulkanContext::swap_buffers() { } #if 0 - if (VK_KHR_incremental_present_enabled) { + if (is_device_extension_enabled(VK_KHR_incremental_present_enabled)) { // If using VK_KHR_incremental_present, we provide a hint of the region // that contains changed content relative to the previously-presented // image. The implementation can use this hint in order to save @@ -2366,7 +2394,7 @@ Error VulkanContext::swap_buffers() { #endif #if 0 - if (VK_GOOGLE_display_timing_enabled) { + if (is_device_extension_enabled(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME)) { VkPresentTimeGOOGLE ptime; if (prev_desired_present_time == 0) { // This must be the first present for this swapchain. @@ -2396,7 +2424,7 @@ Error VulkanContext::swap_buffers() { /*swapchainCount*/ present.swapchainCount, /*pTimes*/ &ptime, }; - if (VK_GOOGLE_display_timing_enabled) { + if (is_device_extension_enabled(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME)) { present.pNext = &present_time; } } @@ -2469,6 +2497,13 @@ RID VulkanContext::local_device_create() { queues[0].pQueuePriorities = queue_priorities; queues[0].flags = 0; + uint32_t enabled_extension_count = 0; + const char *enabled_extension_names[MAX_EXTENSIONS]; + ERR_FAIL_COND_V(enabled_device_extension_names.size() > MAX_EXTENSIONS, RID()); + for (const CharString &extension_name : enabled_device_extension_names) { + enabled_extension_names[enabled_extension_count++] = extension_name.ptr(); + } + VkDeviceCreateInfo sdevice = { /*sType =*/VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, /*pNext */ nullptr, @@ -2478,7 +2513,7 @@ RID VulkanContext::local_device_create() { /*enabledLayerCount */ 0, /*ppEnabledLayerNames */ nullptr, /*enabledExtensionCount */ enabled_extension_count, - /*ppEnabledExtensionNames */ (const char *const *)extension_names, + /*ppEnabledExtensionNames */ (const char *const *)enabled_extension_names, /*pEnabledFeatures */ &physical_device_features, // If specific features are required, pass them in here. }; err = vkCreateDevice(gpu, &sdevice, nullptr, &ld.device); @@ -2543,7 +2578,7 @@ void VulkanContext::local_device_free(RID p_local_device) { } void VulkanContext::command_begin_label(VkCommandBuffer p_command_buffer, String p_label_name, const Color p_color) { - if (!enabled_debug_utils) { + if (!is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { return; } @@ -2560,7 +2595,7 @@ void VulkanContext::command_begin_label(VkCommandBuffer p_command_buffer, String } void VulkanContext::command_insert_label(VkCommandBuffer p_command_buffer, String p_label_name, const Color p_color) { - if (!enabled_debug_utils) { + if (!is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { return; } CharString cs = p_label_name.utf8(); @@ -2576,14 +2611,14 @@ void VulkanContext::command_insert_label(VkCommandBuffer p_command_buffer, Strin } void VulkanContext::command_end_label(VkCommandBuffer p_command_buffer) { - if (!enabled_debug_utils) { + if (!is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { return; } CmdEndDebugUtilsLabelEXT(p_command_buffer); } void VulkanContext::set_object_name(VkObjectType p_object_type, uint64_t p_object_handle, String p_object_name) { - if (!enabled_debug_utils) { + if (!is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { return; } CharString obj_data = p_object_name.utf8(); @@ -2644,7 +2679,7 @@ VulkanContext::~VulkanContext() { vkDestroySemaphore(device, image_ownership_semaphores[i], nullptr); } } - if (inst_initialized && enabled_debug_utils) { + if (inst_initialized && is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { DestroyDebugUtilsMessengerEXT(inst, dbg_messenger, nullptr); } if (inst_initialized && dbg_debug_report != VK_NULL_HANDLE) { diff --git a/drivers/vulkan/vulkan_context.h b/drivers/vulkan/vulkan_context.h index 8cf33fa463..0d49f5fe9f 100644 --- a/drivers/vulkan/vulkan_context.h +++ b/drivers/vulkan/vulkan_context.h @@ -34,6 +34,7 @@ #include "core/error/error_list.h" #include "core/os/mutex.h" #include "core/string/ustring.h" +#include "core/templates/hash_map.h" #include "core/templates/rb_map.h" #include "core/templates/rid_owner.h" #include "servers/display_server.h" @@ -184,19 +185,15 @@ private: int command_buffer_count = 1; // Extensions. + static bool instance_extensions_initialized; + static HashMap<CharString, bool> requested_instance_extensions; + HashSet<CharString> enabled_instance_extension_names; + static bool device_extensions_initialized; + static HashMap<CharString, bool> requested_device_extensions; + HashSet<CharString> enabled_device_extension_names; bool VK_KHR_incremental_present_enabled = true; bool VK_GOOGLE_display_timing_enabled = true; - uint32_t enabled_extension_count = 0; - const char *extension_names[MAX_EXTENSIONS]; - bool enabled_debug_utils = false; - bool has_renderpass2_ext = false; - - /** - * True if VK_EXT_debug_report extension is used. VK_EXT_debug_report is deprecated but it is - * still used if VK_EXT_debug_utils is not available. - */ - bool enabled_debug_report = false; PFN_vkCreateDebugUtilsMessengerEXT CreateDebugUtilsMessengerEXT = nullptr; PFN_vkDestroyDebugUtilsMessengerEXT DestroyDebugUtilsMessengerEXT = nullptr; @@ -225,7 +222,8 @@ private: VkDebugReportCallbackEXT dbg_debug_report = VK_NULL_HANDLE; Error _obtain_vulkan_version(); - Error _initialize_extensions(); + Error _initialize_instance_extensions(); + Error _initialize_device_extensions(); Error _check_capabilities(); VkBool32 _check_layers(uint32_t check_count, const char *const *check_names, uint32_t layer_count, VkLayerProperties *layers); @@ -275,7 +273,7 @@ protected: public: // Extension calls. - bool supports_renderpass2() const { return has_renderpass2_ext; } + bool supports_renderpass2() const { return is_device_extension_enabled(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME); } VkResult vkCreateRenderPass2KHR(VkDevice p_device, const VkRenderPassCreateInfo2 *p_create_info, const VkAllocationCallbacks *p_allocator, VkRenderPass *p_render_pass); uint32_t get_vulkan_major() const { return vulkan_major; }; @@ -295,6 +293,16 @@ public: static void set_vulkan_hooks(VulkanHooks *p_vulkan_hooks) { vulkan_hooks = p_vulkan_hooks; }; + static void register_requested_instance_extension(const CharString &extension_name, bool p_required); + bool is_instance_extension_enabled(const CharString &extension_name) const { + return enabled_instance_extension_names.has(extension_name); + } + + static void register_requested_device_extension(const CharString &extension_name, bool p_required); + bool is_device_extension_enabled(const CharString &extension_name) const { + return enabled_device_extension_names.has(extension_name); + } + void window_resize(DisplayServer::WindowID p_window_id, int p_width, int p_height); int window_get_width(DisplayServer::WindowID p_window = 0); int window_get_height(DisplayServer::WindowID p_window = 0); |