diff options
Diffstat (limited to 'drivers/gles3')
-rw-r--r-- | drivers/gles3/rasterizer_canvas_gles3.cpp | 178 | ||||
-rw-r--r-- | drivers/gles3/rasterizer_canvas_gles3.h | 5 | ||||
-rw-r--r-- | drivers/gles3/shaders/canvas.glsl | 219 | ||||
-rw-r--r-- | drivers/gles3/shaders/canvas_uniforms_inc.glsl | 36 | ||||
-rw-r--r-- | drivers/gles3/storage/config.cpp | 2 | ||||
-rw-r--r-- | drivers/gles3/storage/config.h | 2 |
6 files changed, 235 insertions, 207 deletions
diff --git a/drivers/gles3/rasterizer_canvas_gles3.cpp b/drivers/gles3/rasterizer_canvas_gles3.cpp index 7b39641029..be18b008be 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.cpp +++ b/drivers/gles3/rasterizer_canvas_gles3.cpp @@ -568,9 +568,8 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou _new_batch(batch_broken, index); // Override the start position and index as we want to start from where we finished off last time. - state.canvas_instance_batches[state.current_batch_index].start = r_last_index * sizeof(InstanceData); + state.canvas_instance_batches[state.current_batch_index].start = r_last_index; index = 0; - _align_instance_data_buffer(index); for (int i = 0; i < p_item_count; i++) { Item *ci = items[i]; @@ -630,14 +629,14 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou } // Copy over all data needed for rendering. - glBindBuffer(GL_UNIFORM_BUFFER, state.canvas_instance_data_buffers[state.current_buffer].ubo); + glBindBuffer(GL_ARRAY_BUFFER, state.canvas_instance_data_buffers[state.current_buffer].buffer); #ifdef WEB_ENABLED - glBufferSubData(GL_UNIFORM_BUFFER, r_last_index * sizeof(InstanceData), sizeof(InstanceData) * index, state.instance_data_array); + glBufferSubData(GL_ARRAY_BUFFER, r_last_index * sizeof(InstanceData), sizeof(InstanceData) * index, state.instance_data_array); #else // On Desktop and mobile we map the memory without synchronizing for maximum speed. - void *ubo = glMapBufferRange(GL_UNIFORM_BUFFER, r_last_index * sizeof(InstanceData), index * sizeof(InstanceData), GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); - memcpy(ubo, state.instance_data_array, index * sizeof(InstanceData)); - glUnmapBuffer(GL_UNIFORM_BUFFER); + void *buffer = glMapBufferRange(GL_ARRAY_BUFFER, r_last_index * sizeof(InstanceData), index * sizeof(InstanceData), GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); + memcpy(buffer, state.instance_data_array, index * sizeof(InstanceData)); + glUnmapBuffer(GL_ARRAY_BUFFER); #endif glDisable(GL_SCISSOR_TEST); @@ -664,7 +663,17 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou uint64_t specialization = 0; specialization |= uint64_t(state.canvas_instance_batches[i].lights_disabled); specialization |= uint64_t(!GLES3::Config::get_singleton()->float_texture_supported) << 1; - bool success = _bind_material(material_data, variant, specialization); + RID shader_version = data.canvas_shader_default_version; + + if (material_data) { + if (material_data->shader_data->version.is_valid() && material_data->shader_data->valid) { + // Bind uniform buffer and textures + material_data->bind_uniforms(); + shader_version = material_data->shader_data->version; + } + } + + bool success = GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(shader_version, variant, specialization); if (!success) { continue; } @@ -1217,26 +1226,15 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) { _bind_canvas_texture(state.canvas_instance_batches[p_index].tex, state.canvas_instance_batches[p_index].filter, state.canvas_instance_batches[p_index].repeat); - // Bind the region of the UBO used by this batch. - // If region exceeds the boundary of the UBO, just ignore. - uint32_t range_bytes = data.max_instances_per_batch * sizeof(InstanceData); - if (state.canvas_instance_batches[p_index].start >= (data.max_instances_per_ubo - 1) * sizeof(InstanceData)) { - return; - } else if (state.canvas_instance_batches[p_index].start >= (data.max_instances_per_ubo - data.max_instances_per_batch) * sizeof(InstanceData)) { - // If we have less than a full batch at the end, we can just draw it anyway. - // OpenGL will complain about the UBO being smaller than expected, but it should render fine. - range_bytes = (data.max_instances_per_ubo - 1) * sizeof(InstanceData) - state.canvas_instance_batches[p_index].start; - } - - uint32_t range_start = state.canvas_instance_batches[p_index].start; - glBindBufferRange(GL_UNIFORM_BUFFER, INSTANCE_UNIFORM_LOCATION, state.canvas_instance_data_buffers[state.current_buffer].ubo, range_start, range_bytes); - switch (state.canvas_instance_batches[p_index].command_type) { case Item::Command::TYPE_RECT: case Item::Command::TYPE_NINEPATCH: { glBindVertexArray(data.indexed_quad_array); - glDrawElements(GL_TRIANGLES, state.canvas_instance_batches[p_index].instance_count * 6, GL_UNSIGNED_INT, 0); - glBindBuffer(GL_UNIFORM_BUFFER, 0); + glBindBuffer(GL_ARRAY_BUFFER, state.canvas_instance_data_buffers[state.current_buffer].buffer); + uint32_t range_start = state.canvas_instance_batches[p_index].start * sizeof(InstanceData); + _enable_attributes(range_start, false); + + glDrawElementsInstanced(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0, state.canvas_instance_batches[p_index].instance_count); glBindVertexArray(0); } break; @@ -1248,18 +1246,21 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) { ERR_FAIL_COND(!pb); glBindVertexArray(pb->vertex_array); + glBindBuffer(GL_ARRAY_BUFFER, state.canvas_instance_data_buffers[state.current_buffer].buffer); + + uint32_t range_start = state.canvas_instance_batches[p_index].start * sizeof(InstanceData); + _enable_attributes(range_start, false); if (pb->color_disabled && pb->color != Color(1.0, 1.0, 1.0, 1.0)) { glVertexAttrib4f(RS::ARRAY_COLOR, pb->color.r, pb->color.g, pb->color.b, pb->color.a); } if (pb->index_buffer != 0) { - glDrawElements(prim[polygon->primitive], pb->count, GL_UNSIGNED_INT, nullptr); + glDrawElementsInstanced(prim[polygon->primitive], pb->count, GL_UNSIGNED_INT, nullptr, 1); } else { - glDrawArrays(prim[polygon->primitive], 0, pb->count); + glDrawArraysInstanced(prim[polygon->primitive], 0, pb->count, 1); } glBindVertexArray(0); - glBindBuffer(GL_UNIFORM_BUFFER, 0); if (pb->color_disabled && pb->color != Color(1.0, 1.0, 1.0, 1.0)) { // Reset so this doesn't pollute other draw calls. @@ -1269,14 +1270,16 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) { case Item::Command::TYPE_PRIMITIVE: { glBindVertexArray(data.canvas_quad_array); + glBindBuffer(GL_ARRAY_BUFFER, state.canvas_instance_data_buffers[state.current_buffer].buffer); + uint32_t range_start = state.canvas_instance_batches[p_index].start * sizeof(InstanceData); + _enable_attributes(range_start, true); + const GLenum primitive[5] = { GL_POINTS, GL_POINTS, GL_LINES, GL_TRIANGLES, GL_TRIANGLES }; int instance_count = state.canvas_instance_batches[p_index].instance_count; - if (instance_count > 1) { + ERR_FAIL_COND(instance_count <= 0); + if (instance_count >= 1) { glDrawArraysInstanced(primitive[state.canvas_instance_batches[p_index].primitive_points], 0, state.canvas_instance_batches[p_index].primitive_points, instance_count); - } else { - glDrawArrays(primitive[state.canvas_instance_batches[p_index].primitive_points], 0, state.canvas_instance_batches[p_index].primitive_points); } - glBindBuffer(GL_UNIFORM_BUFFER, 0); } break; @@ -1370,6 +1373,11 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) { index_array_gl = mesh_storage->mesh_surface_get_index_buffer(surface, 0); bool use_index_buffer = false; glBindVertexArray(vertex_array_gl); + glBindBuffer(GL_ARRAY_BUFFER, state.canvas_instance_data_buffers[state.current_buffer].buffer); + + uint32_t range_start = state.canvas_instance_batches[p_index].start * sizeof(InstanceData); + _enable_attributes(range_start, false, instance_count); + if (index_array_gl != 0) { glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_array_gl); use_index_buffer = true; @@ -1396,20 +1404,13 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) { } GLenum primitive_gl = prim[int(primitive)]; - if (instance_count == 1) { - if (use_index_buffer) { - glDrawElements(primitive_gl, mesh_storage->mesh_surface_get_vertices_drawn_count(surface), mesh_storage->mesh_surface_get_index_type(surface), 0); - } else { - glDrawArrays(primitive_gl, 0, mesh_storage->mesh_surface_get_vertices_drawn_count(surface)); - } - } else if (instance_count > 1) { - if (use_index_buffer) { - glDrawElementsInstanced(primitive_gl, mesh_storage->mesh_surface_get_vertices_drawn_count(surface), mesh_storage->mesh_surface_get_index_type(surface), 0, instance_count); - } else { - glDrawArraysInstanced(primitive_gl, 0, mesh_storage->mesh_surface_get_vertices_drawn_count(surface), instance_count); - } - } + if (use_index_buffer) { + glDrawElementsInstanced(primitive_gl, mesh_storage->mesh_surface_get_vertices_drawn_count(surface), mesh_storage->mesh_surface_get_index_type(surface), 0, instance_count); + } else { + glDrawArraysInstanced(primitive_gl, 0, mesh_storage->mesh_surface_get_vertices_drawn_count(surface), instance_count); + } + glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); if (instance_count > 1) { glDisableVertexAttribArray(5); @@ -1429,7 +1430,7 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) { } void RasterizerCanvasGLES3::_add_to_batch(uint32_t &r_index, bool &r_batch_broken) { - if (r_index >= data.max_instances_per_ubo - 1) { + if (r_index >= data.max_instances_per_buffer - 1) { ERR_PRINT_ONCE("Trying to draw too many items. Please increase maximum number of items in the project settings 'rendering/gl_compatibility/item_buffer_size'"); return; } @@ -1457,27 +1458,25 @@ void RasterizerCanvasGLES3::_new_batch(bool &r_batch_broken, uint32_t &r_index) // Copy the properties of the current batch, we will manually update the things that changed. Batch new_batch = state.canvas_instance_batches[state.current_batch_index]; new_batch.instance_count = 0; - new_batch.start = state.canvas_instance_batches[state.current_batch_index].start + state.canvas_instance_batches[state.current_batch_index].instance_count * sizeof(InstanceData); + new_batch.start = state.canvas_instance_batches[state.current_batch_index].start + state.canvas_instance_batches[state.current_batch_index].instance_count; state.current_batch_index++; state.canvas_instance_batches.push_back(new_batch); - _align_instance_data_buffer(r_index); } -bool RasterizerCanvasGLES3::_bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant, uint64_t p_specialization) { - if (p_material_data) { - if (p_material_data->shader_data->version.is_valid() && p_material_data->shader_data->valid) { - // Bind uniform buffer and textures - p_material_data->bind_uniforms(); - return GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(p_material_data->shader_data->version, p_variant, p_specialization); - } else { - return GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, p_variant, p_specialization); - } - } else { - return GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, p_variant, p_specialization); +void RasterizerCanvasGLES3::_enable_attributes(uint32_t p_start, bool p_primitive, uint32_t p_rate) { + uint32_t split = p_primitive ? 11 : 12; + for (uint32_t i = 6; i < split; i++) { + glEnableVertexAttribArray(i); + glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, sizeof(InstanceData), CAST_INT_TO_UCHAR_PTR(p_start + (i - 6) * 4 * sizeof(float))); + glVertexAttribDivisor(i, p_rate); + } + for (uint32_t i = split; i <= 13; i++) { + glEnableVertexAttribArray(i); + glVertexAttribIPointer(i, 4, GL_UNSIGNED_INT, sizeof(InstanceData), CAST_INT_TO_UCHAR_PTR(p_start + (i - 6) * 4 * sizeof(float))); + glVertexAttribDivisor(i, p_rate); } } - RID RasterizerCanvasGLES3::light_create() { CanvasLight canvas_light; return canvas_light_owner.make_rid(canvas_light); @@ -2416,8 +2415,8 @@ void RasterizerCanvasGLES3::_allocate_instance_data_buffer() { GLuint new_buffers[3]; glGenBuffers(3, new_buffers); // Batch UBO. - glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[0]); - glBufferData(GL_UNIFORM_BUFFER, data.max_instance_buffer_size, nullptr, GL_STREAM_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, new_buffers[0]); + glBufferData(GL_ARRAY_BUFFER, data.max_instance_buffer_size, nullptr, GL_STREAM_DRAW); // Light uniform buffer. glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[1]); glBufferData(GL_UNIFORM_BUFFER, sizeof(LightUniform) * data.max_lights_per_render, nullptr, GL_STREAM_DRAW); @@ -2427,36 +2426,16 @@ void RasterizerCanvasGLES3::_allocate_instance_data_buffer() { state.current_buffer = (state.current_buffer + 1); DataBuffer db; - db.ubo = new_buffers[0]; + db.buffer = new_buffers[0]; db.light_ubo = new_buffers[1]; db.state_ubo = new_buffers[2]; db.last_frame_used = RSG::rasterizer->get_frame_number(); state.canvas_instance_data_buffers.insert(state.current_buffer, db); state.current_buffer = state.current_buffer % state.canvas_instance_data_buffers.size(); + glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_UNIFORM_BUFFER, 0); } -// Batch start positions need to be aligned to the device's GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT. -// This needs to be called anytime a new batch is created. -void RasterizerCanvasGLES3::_align_instance_data_buffer(uint32_t &r_index) { - if (GLES3::Config::get_singleton()->uniform_buffer_offset_alignment > int(sizeof(InstanceData))) { - uint32_t offset = state.canvas_instance_batches[state.current_batch_index].start % GLES3::Config::get_singleton()->uniform_buffer_offset_alignment; - if (offset > 0) { - // uniform_buffer_offset_alignment can be 4, 16, 32, or 256. Our instance batches are 128 bytes. - // Accordingly, this branch is only triggered if we are 128 bytes off. - uint32_t offset_bytes = GLES3::Config::get_singleton()->uniform_buffer_offset_alignment - offset; - state.canvas_instance_batches[state.current_batch_index].start += offset_bytes; - // Offset the instance array so it stays in sync with batch start points. - // This creates gaps in the instance buffer with wasted space, but we can't help it. - r_index += offset_bytes / sizeof(InstanceData); - if (r_index > 0) { - // In this case we need to copy over the basic data. - state.instance_data_array[r_index] = state.instance_data_array[r_index - 1]; - } - } - } -} - void RasterizerCanvasGLES3::set_time(double p_time) { state.time = p_time; } @@ -2601,12 +2580,12 @@ RasterizerCanvasGLES3::RasterizerCanvasGLES3() { data.max_instances_per_batch = 128; } else { data.max_lights_per_render = 256; - data.max_instances_per_batch = 512; + data.max_instances_per_batch = 2048; } // Reserve 3 Uniform Buffers for instance data Frame N, N+1 and N+2 - data.max_instances_per_ubo = MAX(data.max_instances_per_batch, uint32_t(GLOBAL_GET("rendering/gl_compatibility/item_buffer_size"))); - data.max_instance_buffer_size = data.max_instances_per_ubo * sizeof(InstanceData); // 16,384 instances * 128 bytes = 2,097,152 bytes = 2,048 kb + data.max_instances_per_buffer = MAX(data.max_instances_per_batch, uint32_t(GLOBAL_GET("rendering/gl_compatibility/item_buffer_size"))); + data.max_instance_buffer_size = data.max_instances_per_buffer * sizeof(InstanceData); // 16,384 instances * 128 bytes = 2,097,152 bytes = 2,048 kb state.canvas_instance_data_buffers.resize(3); state.canvas_instance_batches.reserve(200); @@ -2614,8 +2593,8 @@ RasterizerCanvasGLES3::RasterizerCanvasGLES3() { GLuint new_buffers[3]; glGenBuffers(3, new_buffers); // Batch UBO. - glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[0]); - glBufferData(GL_UNIFORM_BUFFER, data.max_instance_buffer_size, nullptr, GL_STREAM_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, new_buffers[0]); + glBufferData(GL_ARRAY_BUFFER, data.max_instance_buffer_size, nullptr, GL_STREAM_DRAW); // Light uniform buffer. glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[1]); glBufferData(GL_UNIFORM_BUFFER, sizeof(LightUniform) * data.max_lights_per_render, nullptr, GL_STREAM_DRAW); @@ -2623,41 +2602,28 @@ RasterizerCanvasGLES3::RasterizerCanvasGLES3() { glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[2]); glBufferData(GL_UNIFORM_BUFFER, sizeof(StateBuffer), nullptr, GL_STREAM_DRAW); DataBuffer db; - db.ubo = new_buffers[0]; + db.buffer = new_buffers[0]; db.light_ubo = new_buffers[1]; db.state_ubo = new_buffers[2]; db.last_frame_used = 0; db.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); state.canvas_instance_data_buffers[i] = db; } + glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_UNIFORM_BUFFER, 0); - state.instance_data_array = memnew_arr(InstanceData, data.max_instances_per_ubo); + state.instance_data_array = memnew_arr(InstanceData, data.max_instances_per_buffer); state.light_uniforms = memnew_arr(LightUniform, data.max_lights_per_render); { - const uint32_t no_of_instances = data.max_instances_per_batch; - + const uint32_t indices[6] = { 0, 2, 1, 3, 2, 0 }; glGenVertexArrays(1, &data.indexed_quad_array); glBindVertexArray(data.indexed_quad_array); glBindBuffer(GL_ARRAY_BUFFER, data.canvas_quad_vertices); - - const uint32_t num_indices = 6; - const uint32_t quad_indices[num_indices] = { 0, 2, 1, 3, 2, 0 }; - - const uint32_t total_indices = no_of_instances * num_indices; - uint32_t *indices = new uint32_t[total_indices]; - for (uint32_t i = 0; i < total_indices; i++) { - uint32_t quad = i / num_indices; - uint32_t quad_local = i % num_indices; - indices[i] = quad_indices[quad_local] + quad * num_indices; - } - glGenBuffers(1, &data.indexed_quad_buffer); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.indexed_quad_buffer); - glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(uint32_t) * total_indices, indices, GL_STATIC_DRAW); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(uint32_t) * 6, indices, GL_STATIC_DRAW); glBindVertexArray(0); - delete[] indices; } String global_defines; diff --git a/drivers/gles3/rasterizer_canvas_gles3.h b/drivers/gles3/rasterizer_canvas_gles3.h index bd87973404..d99a8414f7 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.h +++ b/drivers/gles3/rasterizer_canvas_gles3.h @@ -245,7 +245,7 @@ public: uint32_t max_lights_per_render = 256; uint32_t max_lights_per_item = 16; uint32_t max_instances_per_batch = 512; - uint32_t max_instances_per_ubo = 16384; + uint32_t max_instances_per_buffer = 16384; uint32_t max_instance_buffer_size = 16384 * 128; } data; @@ -278,7 +278,7 @@ public: // We track them and ensure that they don't get reused until at least 2 frames have passed // to avoid the GPU stalling to wait for a resource to become available. struct DataBuffer { - GLuint ubo = 0; + GLuint buffer = 0; GLuint light_ubo = 0; GLuint state_ubo = 0; uint64_t last_frame_used = -3; @@ -359,6 +359,7 @@ public: void _add_to_batch(uint32_t &r_index, bool &r_batch_broken); void _allocate_instance_data_buffer(); void _align_instance_data_buffer(uint32_t &r_index); + void _enable_attributes(uint32_t p_start, bool p_primitive, uint32_t p_rate = 1); void set_time(double p_time); diff --git a/drivers/gles3/shaders/canvas.glsl b/drivers/gles3/shaders/canvas.glsl index c1c26ed963..9a90f33674 100644 --- a/drivers/gles3/shaders/canvas.glsl +++ b/drivers/gles3/shaders/canvas.glsl @@ -11,6 +11,7 @@ mode_instanced = #define USE_ATTRIBUTES \n#define USE_INSTANCING DISABLE_LIGHTING = false USE_RGBA_SHADOWS = false +SINGLE_INSTANCE = false #[vertex] @@ -25,9 +26,74 @@ layout(location = 1) in highp vec4 instance_xform0; layout(location = 2) in highp vec4 instance_xform1; layout(location = 5) in highp uvec4 instance_color_custom_data; // Color packed into xy, custom_data packed into zw for compatibility with 3D +#endif // USE_INSTANCING + +#endif // USE_ATTRIBUTES + +#include "stdlib_inc.glsl" + +layout(location = 6) in highp vec4 attrib_A; +layout(location = 7) in highp vec4 attrib_B; +layout(location = 8) in highp vec4 attrib_C; +layout(location = 9) in highp vec4 attrib_D; +layout(location = 10) in highp vec4 attrib_E; +#ifdef USE_PRIMITIVE +layout(location = 11) in highp uvec4 attrib_F; +#else +layout(location = 11) in highp vec4 attrib_F; +#endif +layout(location = 12) in highp uvec4 attrib_G; +layout(location = 13) in highp uvec4 attrib_H; + +#define read_draw_data_world_x attrib_A.xy +#define read_draw_data_world_y attrib_A.zw +#define read_draw_data_world_ofs attrib_B.xy +#define read_draw_data_color_texture_pixel_size attrib_B.zw + +#ifdef USE_PRIMITIVE + +#define read_draw_data_point_a attrib_C.xy +#define read_draw_data_point_b attrib_C.zw +#define read_draw_data_point_c attrib_D.xy +#define read_draw_data_uv_a attrib_D.zw +#define read_draw_data_uv_b attrib_E.xy +#define read_draw_data_uv_c attrib_E.zw + +#define read_draw_data_color_a_rg attrib_F.x +#define read_draw_data_color_a_ba attrib_F.y +#define read_draw_data_color_b_rg attrib_F.z +#define read_draw_data_color_b_ba attrib_F.w +#define read_draw_data_color_c_rg attrib_G.x +#define read_draw_data_color_c_ba attrib_G.y + +#else + +#define read_draw_data_modulation attrib_C +#define read_draw_data_ninepatch_margins attrib_D +#define read_draw_data_dst_rect attrib_E +#define read_draw_data_src_rect attrib_F + #endif +#define read_draw_data_flags attrib_G.z +#define read_draw_data_specular_shininess attrib_G.w +#define read_draw_data_lights attrib_H + +// Varyings so the per-instance info can be used in the fragment shader +flat out vec4 varying_A; +flat out vec2 varying_B; +#ifndef USE_PRIMITIVE +flat out vec4 varying_C; +#ifndef USE_ATTRIBUTES +#ifdef USE_NINEPATCH + +flat out vec2 varying_D; +#endif +flat out vec4 varying_E; +#endif #endif +flat out uvec2 varying_F; +flat out uvec4 varying_G; // This needs to be outside clang-format so the ubo comment is in the right place #ifdef MATERIAL_UNIFORMS_USED @@ -39,12 +105,10 @@ layout(std140) uniform MaterialUniforms{ //ubo:4 #endif /* clang-format on */ #include "canvas_uniforms_inc.glsl" -#include "stdlib_inc.glsl" out vec2 uv_interp; out vec4 color_interp; out vec2 vertex_interp; -flat out int draw_data_instance; #ifdef USE_NINEPATCH @@ -55,35 +119,46 @@ out vec2 pixel_size_interp; #GLOBALS void main() { + varying_A = vec4(read_draw_data_world_x, read_draw_data_world_y); + varying_B = read_draw_data_color_texture_pixel_size; +#ifndef USE_PRIMITIVE + varying_C = read_draw_data_ninepatch_margins; + +#ifndef USE_ATTRIBUTES +#ifdef USE_NINEPATCH + varying_D = vec2(read_draw_data_dst_rect.z, read_draw_data_dst_rect.w); +#endif // USE_NINEPATCH + varying_E = read_draw_data_src_rect; +#endif // !USE_ATTRIBUTES +#endif // USE_PRIMITIVE + + varying_F = uvec2(read_draw_data_flags, read_draw_data_specular_shininess); + varying_G = read_draw_data_lights; + vec4 instance_custom = vec4(0.0); #ifdef USE_PRIMITIVE - draw_data_instance = gl_InstanceID; vec2 vertex; vec2 uv; vec4 color; if (gl_VertexID % 3 == 0) { - vertex = draw_data[draw_data_instance].point_a; - uv = draw_data[draw_data_instance].uv_a; - color = vec4(unpackHalf2x16(draw_data[draw_data_instance].color_a_rg), unpackHalf2x16(draw_data[draw_data_instance].color_a_ba)); + vertex = read_draw_data_point_a; + uv = read_draw_data_uv_a; + color = vec4(unpackHalf2x16(read_draw_data_color_a_rg), unpackHalf2x16(read_draw_data_color_a_ba)); } else if (gl_VertexID % 3 == 1) { - vertex = draw_data[draw_data_instance].point_b; - uv = draw_data[draw_data_instance].uv_b; - color = vec4(unpackHalf2x16(draw_data[draw_data_instance].color_b_rg), unpackHalf2x16(draw_data[draw_data_instance].color_b_ba)); + vertex = read_draw_data_point_b; + uv = read_draw_data_uv_b; + color = vec4(unpackHalf2x16(read_draw_data_color_b_rg), unpackHalf2x16(read_draw_data_color_b_ba)); } else { - vertex = draw_data[draw_data_instance].point_c; - uv = draw_data[draw_data_instance].uv_c; - color = vec4(unpackHalf2x16(draw_data[draw_data_instance].color_c_rg), unpackHalf2x16(draw_data[draw_data_instance].color_c_ba)); + vertex = read_draw_data_point_c; + uv = read_draw_data_uv_c; + color = vec4(unpackHalf2x16(read_draw_data_color_c_rg), unpackHalf2x16(read_draw_data_color_c_ba)); } #elif defined(USE_ATTRIBUTES) - draw_data_instance = gl_InstanceID; -#ifdef USE_INSTANCING - draw_data_instance = 0; -#endif vec2 vertex = vertex_attrib; - vec4 color = color_attrib * draw_data[draw_data_instance].modulation; + vec4 color = color_attrib * read_draw_data_modulation; vec2 uv = uv_attrib; #ifdef USE_INSTANCING @@ -93,30 +168,29 @@ void main() { #endif #else - draw_data_instance = gl_VertexID / 6; vec2 vertex_base_arr[6] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0), vec2(0.0, 0.0), vec2(1.0, 1.0)); vec2 vertex_base = vertex_base_arr[gl_VertexID % 6]; - vec2 uv = draw_data[draw_data_instance].src_rect.xy + abs(draw_data[draw_data_instance].src_rect.zw) * ((draw_data[draw_data_instance].flags & FLAGS_TRANSPOSE_RECT) != uint(0) ? vertex_base.yx : vertex_base.xy); - vec4 color = draw_data[draw_data_instance].modulation; - vec2 vertex = draw_data[draw_data_instance].dst_rect.xy + abs(draw_data[draw_data_instance].dst_rect.zw) * mix(vertex_base, vec2(1.0, 1.0) - vertex_base, lessThan(draw_data[draw_data_instance].src_rect.zw, vec2(0.0, 0.0))); + vec2 uv = read_draw_data_src_rect.xy + abs(read_draw_data_src_rect.zw) * ((read_draw_data_flags & FLAGS_TRANSPOSE_RECT) != uint(0) ? vertex_base.yx : vertex_base.xy); + vec4 color = read_draw_data_modulation; + vec2 vertex = read_draw_data_dst_rect.xy + abs(read_draw_data_dst_rect.zw) * mix(vertex_base, vec2(1.0, 1.0) - vertex_base, lessThan(read_draw_data_src_rect.zw, vec2(0.0, 0.0))); #endif - mat4 model_matrix = mat4(vec4(draw_data[draw_data_instance].world_x, 0.0, 0.0), vec4(draw_data[draw_data_instance].world_y, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(draw_data[draw_data_instance].world_ofs, 0.0, 1.0)); + mat4 model_matrix = mat4(vec4(read_draw_data_world_x, 0.0, 0.0), vec4(read_draw_data_world_y, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(read_draw_data_world_ofs, 0.0, 1.0)); #ifdef USE_INSTANCING model_matrix = model_matrix * transpose(mat4(instance_xform0, instance_xform1, vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))); #endif // USE_INSTANCING #if !defined(USE_ATTRIBUTES) && !defined(USE_PRIMITIVE) - if (bool(draw_data[draw_data_instance].flags & FLAGS_USING_PARTICLES)) { + if (bool(read_draw_data_flags & FLAGS_USING_PARTICLES)) { //scale by texture size - vertex /= draw_data[draw_data_instance].color_texture_pixel_size; + vertex /= read_draw_data_color_texture_pixel_size; } #endif - vec2 color_texture_pixel_size = draw_data[draw_data_instance].color_texture_pixel_size.xy; + vec2 color_texture_pixel_size = read_draw_data_color_texture_pixel_size; #ifdef USE_POINT_SIZE float point_size = 1.0; @@ -126,7 +200,7 @@ void main() { } #ifdef USE_NINEPATCH - pixel_size_interp = abs(draw_data[draw_data_instance].dst_rect.zw) * vertex_base; + pixel_size_interp = abs(read_draw_data_dst_rect.zw) * vertex_base; #endif #if !defined(SKIP_TRANSFORM_USED) @@ -159,6 +233,46 @@ void main() { #include "canvas_uniforms_inc.glsl" #include "stdlib_inc.glsl" +in vec2 uv_interp; +in vec2 vertex_interp; +in vec4 color_interp; + +#ifdef USE_NINEPATCH + +in vec2 pixel_size_interp; + +#endif + +// Can all be flat as they are the same for the whole batched instance +flat in vec4 varying_A; +flat in vec2 varying_B; +#define read_draw_data_world_x varying_A.xy +#define read_draw_data_world_y varying_A.zw +#define read_draw_data_color_texture_pixel_size varying_B + +#ifndef USE_PRIMITIVE +flat in vec4 varying_C; +#define read_draw_data_ninepatch_margins varying_C + +#ifndef USE_ATTRIBUTES +#ifdef USE_NINEPATCH + +flat in vec2 varying_D; +#define read_draw_data_dst_rect_z varying_D.x +#define read_draw_data_dst_rect_w varying_D.y +#endif + +flat in vec4 varying_E; +#define read_draw_data_src_rect varying_E +#endif // USE_ATTRIBUTES +#endif // USE_PRIMITIVE + +flat in uvec2 varying_F; +flat in uvec4 varying_G; +#define read_draw_data_flags varying_F.x +#define read_draw_data_specular_shininess varying_F.y +#define read_draw_data_lights varying_G + #ifndef DISABLE_LIGHTING uniform sampler2D atlas_texture; //texunit:-2 uniform sampler2D shadow_atlas_texture; //texunit:-3 @@ -170,17 +284,6 @@ uniform sampler2D specular_texture; //texunit:-7 uniform sampler2D color_texture; //texunit:0 -in vec2 uv_interp; -in vec4 color_interp; -in vec2 vertex_interp; -flat in int draw_data_instance; - -#ifdef USE_NINEPATCH - -in vec2 pixel_size_interp; - -#endif - layout(location = 0) out vec4 frag_color; #ifdef MATERIAL_UNIFORMS_USED @@ -366,7 +469,7 @@ float map_ninepatch_axis(float pixel, float draw_size, float tex_pixel_size, flo } else if (pixel >= draw_size - margin_end) { return (tex_size - (draw_size - pixel)) * tex_pixel_size; } else { - if (!bool(draw_data[draw_data_instance].flags & FLAGS_NINEPACH_DRAW_CENTER)) { + if (!bool(read_draw_data_flags & FLAGS_NINEPACH_DRAW_CENTER)) { draw_center--; } @@ -414,28 +517,26 @@ void main() { int draw_center = 2; uv = vec2( - map_ninepatch_axis(pixel_size_interp.x, abs(draw_data[draw_data_instance].dst_rect.z), draw_data[draw_data_instance].color_texture_pixel_size.x, draw_data[draw_data_instance].ninepatch_margins.x, draw_data[draw_data_instance].ninepatch_margins.z, int(draw_data[draw_data_instance].flags >> FLAGS_NINEPATCH_H_MODE_SHIFT) & 0x3, draw_center), - map_ninepatch_axis(pixel_size_interp.y, abs(draw_data[draw_data_instance].dst_rect.w), draw_data[draw_data_instance].color_texture_pixel_size.y, draw_data[draw_data_instance].ninepatch_margins.y, draw_data[draw_data_instance].ninepatch_margins.w, int(draw_data[draw_data_instance].flags >> FLAGS_NINEPATCH_V_MODE_SHIFT) & 0x3, draw_center)); + map_ninepatch_axis(pixel_size_interp.x, abs(read_draw_data_dst_rect_z), read_draw_data_color_texture_pixel_size.x, read_draw_data_ninepatch_margins.x, read_draw_data_ninepatch_margins.z, int(read_draw_data_flags >> FLAGS_NINEPATCH_H_MODE_SHIFT) & 0x3, draw_center), + map_ninepatch_axis(pixel_size_interp.y, abs(read_draw_data_dst_rect_w), read_draw_data_color_texture_pixel_size.y, read_draw_data_ninepatch_margins.y, read_draw_data_ninepatch_margins.w, int(read_draw_data_flags >> FLAGS_NINEPATCH_V_MODE_SHIFT) & 0x3, draw_center)); if (draw_center == 0) { color.a = 0.0; } - uv = uv * draw_data[draw_data_instance].src_rect.zw + draw_data[draw_data_instance].src_rect.xy; //apply region if needed + uv = uv * read_draw_data_src_rect.zw + read_draw_data_src_rect.xy; //apply region if needed #endif - if (bool(draw_data[draw_data_instance].flags & FLAGS_CLIP_RECT_UV)) { - uv = clamp(uv, draw_data[draw_data_instance].src_rect.xy, draw_data[draw_data_instance].src_rect.xy + abs(draw_data[draw_data_instance].src_rect.zw)); + if (bool(read_draw_data_flags & FLAGS_CLIP_RECT_UV)) { + uv = clamp(uv, read_draw_data_src_rect.xy, read_draw_data_src_rect.xy + abs(read_draw_data_src_rect.zw)); } #endif #ifndef USE_PRIMITIVE - if (bool(draw_data[draw_data_instance].flags & FLAGS_USE_MSDF)) { - float px_range = draw_data[draw_data_instance].ninepatch_margins.x; - float outline_thickness = draw_data[draw_data_instance].ninepatch_margins.y; - //float reserved1 = draw_data[draw_data_instance].ninepatch_margins.z; - //float reserved2 = draw_data[draw_data_instance].ninepatch_margins.w; + if (bool(read_draw_data_flags & FLAGS_USE_MSDF)) { + float px_range = read_draw_data_ninepatch_margins.x; + float outline_thickness = read_draw_data_ninepatch_margins.y; vec4 msdf_sample = texture(color_texture, uv); vec2 msdf_size = vec2(textureSize(color_texture, 0)); @@ -451,7 +552,7 @@ void main() { float a = clamp(d * px_size + 0.5, 0.0, 1.0); color.a = a * color.a; } - } else if (bool(draw_data[draw_data_instance].flags & FLAGS_USE_LCD)) { + } else if (bool(read_draw_data_flags & FLAGS_USE_LCD)) { vec4 lcd_sample = texture(color_texture, uv); if (lcd_sample.a == 1.0) { color.rgb = lcd_sample.rgb * color.a; @@ -465,7 +566,7 @@ void main() { color *= texture(color_texture, uv); } - uint light_count = (draw_data[draw_data_instance].flags >> uint(FLAGS_LIGHT_COUNT_SHIFT)) & uint(0xF); //max 16 lights + uint light_count = (read_draw_data_flags >> uint(FLAGS_LIGHT_COUNT_SHIFT)) & uint(0xF); //max 16 lights bool using_light = light_count > 0u || directional_light_count > 0u; vec3 normal; @@ -476,7 +577,7 @@ void main() { bool normal_used = false; #endif - if (normal_used || (using_light && bool(draw_data[draw_data_instance].flags & FLAGS_DEFAULT_NORMAL_MAP_USED))) { + if (normal_used || (using_light && bool(read_draw_data_flags & FLAGS_DEFAULT_NORMAL_MAP_USED))) { normal.xy = texture(normal_texture, uv).xy * vec2(2.0, -2.0) - vec2(1.0, -1.0); normal.z = sqrt(1.0 - dot(normal.xy, normal.xy)); normal_used = true; @@ -493,9 +594,9 @@ void main() { bool specular_shininess_used = false; #endif - if (specular_shininess_used || (using_light && normal_used && bool(draw_data[draw_data_instance].flags & FLAGS_DEFAULT_SPECULAR_MAP_USED))) { + if (specular_shininess_used || (using_light && normal_used && bool(read_draw_data_flags & FLAGS_DEFAULT_SPECULAR_MAP_USED))) { specular_shininess = texture(specular_texture, uv); - specular_shininess *= godot_unpackUnorm4x8(draw_data[draw_data_instance].specular_shininess); + specular_shininess *= godot_unpackUnorm4x8(read_draw_data_specular_shininess); specular_shininess_used = true; } else { specular_shininess = vec4(1.0); @@ -507,7 +608,7 @@ void main() { vec2 screen_uv = vec2(0.0); #endif - vec2 color_texture_pixel_size = draw_data[draw_data_instance].color_texture_pixel_size.xy; + vec2 color_texture_pixel_size = read_draw_data_color_texture_pixel_size.xy; vec3 light_vertex = vec3(vertex, 0.0); vec2 shadow_vertex = vertex; @@ -529,7 +630,7 @@ void main() { if (normal_used) { //convert by item transform - normal.xy = mat2(normalize(draw_data[draw_data_instance].world_x), normalize(draw_data[draw_data_instance].world_y)) * normal.xy; + normal.xy = mat2(normalize(read_draw_data_world_x), normalize(read_draw_data_world_y)) * normal.xy; //convert by canvas transform normal = normalize((canvas_normal_transform * vec4(normal, 0.0)).xyz); } @@ -591,15 +692,15 @@ void main() { uint light_base; if (i < 8u) { if (i < 4u) { - light_base = draw_data[draw_data_instance].lights[0]; + light_base = read_draw_data_lights[0]; } else { - light_base = draw_data[draw_data_instance].lights[1]; + light_base = read_draw_data_lights[1]; } } else { if (i < 12u) { - light_base = draw_data[draw_data_instance].lights[2]; + light_base = read_draw_data_lights[2]; } else { - light_base = draw_data[draw_data_instance].lights[3]; + light_base = read_draw_data_lights[3]; } } light_base >>= (i & 3u) * 8u; diff --git a/drivers/gles3/shaders/canvas_uniforms_inc.glsl b/drivers/gles3/shaders/canvas_uniforms_inc.glsl index dd5ebecb1a..f65558f042 100644 --- a/drivers/gles3/shaders/canvas_uniforms_inc.glsl +++ b/drivers/gles3/shaders/canvas_uniforms_inc.glsl @@ -27,38 +27,6 @@ #define FLAGS_USE_MSDF uint(1 << 28) #define FLAGS_USE_LCD uint(1 << 29) -// must be always 128 bytes long -struct DrawData { - vec2 world_x; - vec2 world_y; - vec2 world_ofs; - vec2 color_texture_pixel_size; -#ifdef USE_PRIMITIVE - vec2 point_a; - vec2 point_b; - vec2 point_c; - vec2 uv_a; - vec2 uv_b; - vec2 uv_c; - uint color_a_rg; - uint color_a_ba; - uint color_b_rg; - uint color_b_ba; - uint color_c_rg; - uint color_c_ba; -#else - vec4 modulation; - vec4 ninepatch_margins; - vec4 dst_rect; //for built-in rect and UV - vec4 src_rect; - uint pad; - uint pad2; -#endif - uint flags; - uint specular_shininess; - uvec4 lights; -}; - layout(std140) uniform GlobalShaderUniformData { //ubo:1 vec4 global_shader_uniforms[MAX_GLOBAL_SHADER_UNIFORMS]; }; @@ -116,7 +84,3 @@ layout(std140) uniform LightData { //ubo:2 Light light_array[MAX_LIGHTS]; }; #endif // DISABLE_LIGHTING -layout(std140) uniform DrawDataInstances { //ubo:3 - - DrawData draw_data[MAX_DRAW_DATA_INSTANCES]; -}; diff --git a/drivers/gles3/storage/config.cpp b/drivers/gles3/storage/config.cpp index 9b496c0999..46dc2b806e 100644 --- a/drivers/gles3/storage/config.cpp +++ b/drivers/gles3/storage/config.cpp @@ -90,8 +90,6 @@ Config::Config() { glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &max_uniform_buffer_size); glGetIntegerv(GL_MAX_VIEWPORT_DIMS, max_viewport_size); - glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_offset_alignment); - support_anisotropic_filter = extensions.has("GL_EXT_texture_filter_anisotropic"); if (support_anisotropic_filter) { glGetFloatv(_GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &anisotropic_level); diff --git a/drivers/gles3/storage/config.h b/drivers/gles3/storage/config.h index 87202fde84..1a93b4b8ef 100644 --- a/drivers/gles3/storage/config.h +++ b/drivers/gles3/storage/config.h @@ -67,8 +67,6 @@ public: int max_renderable_lights = 0; int max_lights_per_object = 0; - int uniform_buffer_offset_alignment = 0; - // TODO implement wireframe in OpenGL // bool generate_wireframes; |