diff options
Diffstat (limited to 'drivers')
53 files changed, 7534 insertions, 1429 deletions
diff --git a/drivers/SCsub b/drivers/SCsub index dd81fc645c..6cfcb1d18c 100644 --- a/drivers/SCsub +++ b/drivers/SCsub @@ -24,6 +24,7 @@ SConscript("winmidi/SCsub") # Graphics drivers if env["vulkan"]: + SConscript("spirv-reflect/SCsub") SConscript("vulkan/SCsub") if env["opengl3"]: SConscript("gl_context/SCsub") @@ -31,7 +32,6 @@ if env["opengl3"]: # Core dependencies SConscript("png/SCsub") -SConscript("spirv-reflect/SCsub") env.add_source_files(env.drivers_sources, "*.cpp") diff --git a/drivers/alsamidi/midi_driver_alsamidi.cpp b/drivers/alsamidi/midi_driver_alsamidi.cpp index d2a0076023..61fecccb6b 100644 --- a/drivers/alsamidi/midi_driver_alsamidi.cpp +++ b/drivers/alsamidi/midi_driver_alsamidi.cpp @@ -37,85 +37,135 @@ #include <errno.h> -static int get_message_size(uint8_t message) { - switch (message & 0xF0) { - case 0x80: // note off - case 0x90: // note on - case 0xA0: // aftertouch - case 0xB0: // continuous controller - case 0xE0: // pitch bend - case 0xF2: // song position pointer - return 3; - - case 0xC0: // patch change - case 0xD0: // channel pressure - case 0xF1: // time code quarter frame - case 0xF3: // song select +MIDIDriverALSAMidi::MessageCategory MIDIDriverALSAMidi::msg_category(uint8_t msg_part) { + if (msg_part >= 0xf8) { + return MessageCategory::RealTime; + } else if (msg_part >= 0xf0) { + // System Exclusive begin/end are specified as System Common Category messages, + // but we separate them here and give them their own categories as their + // behavior is significantly different. + if (msg_part == 0xf0) { + return MessageCategory::SysExBegin; + } else if (msg_part == 0xf7) { + return MessageCategory::SysExEnd; + } + return MessageCategory::SystemCommon; + } else if (msg_part >= 0x80) { + return MessageCategory::Voice; + } + return MessageCategory::Data; +} + +size_t MIDIDriverALSAMidi::msg_expected_data(uint8_t status_byte) { + if (msg_category(status_byte) == MessageCategory::Voice) { + // Voice messages have a channel number in the status byte, mask it out. + status_byte &= 0xf0; + } + + switch (status_byte) { + case 0x80: // Note Off + case 0x90: // Note On + case 0xA0: // Polyphonic Key Pressure (Aftertouch) + case 0xB0: // Control Change (CC) + case 0xE0: // Pitch Bend Change + case 0xF2: // Song Position Pointer return 2; - case 0xF0: // SysEx start - case 0xF4: // reserved - case 0xF5: // reserved - case 0xF6: // tune request - case 0xF7: // SysEx end - case 0xF8: // timing clock - case 0xF9: // reserved - case 0xFA: // start - case 0xFB: // continue - case 0xFC: // stop - case 0xFD: // reserved - case 0xFE: // active sensing - case 0xFF: // reset + case 0xC0: // Program Change + case 0xD0: // Channel Pressure (Aftertouch) + case 0xF1: // MIDI Time Code Quarter Frame + case 0xF3: // Song Select return 1; } - return 256; + return 0; +} + +void MIDIDriverALSAMidi::InputConnection::parse_byte(uint8_t byte, MIDIDriverALSAMidi &driver, + uint64_t timestamp) { + switch (msg_category(byte)) { + case MessageCategory::RealTime: + // Real-Time messages are single byte messages that can + // occur at any point. + // We pass them straight through. + driver.receive_input_packet(timestamp, &byte, 1); + break; + + case MessageCategory::Data: + // We don't currently forward System Exclusive messages so skip their data. + // Collect any expected data for other message types. + if (!skipping_sys_ex && expected_data > received_data) { + buffer[received_data + 1] = byte; + received_data++; + + // Forward a complete message and reset relevant state. + if (received_data == expected_data) { + driver.receive_input_packet(timestamp, buffer, received_data + 1); + received_data = 0; + + if (msg_category(buffer[0]) != MessageCategory::Voice) { + // Voice Category messages can be sent with "running status". + // This means they don't resend the status byte until it changes. + // For other categories, we reset expected data, to require a new status byte. + expected_data = 0; + } + } + } + break; + + case MessageCategory::SysExBegin: + buffer[0] = byte; + skipping_sys_ex = true; + break; + + case MessageCategory::SysExEnd: + expected_data = 0; + skipping_sys_ex = false; + break; + + case MessageCategory::Voice: + case MessageCategory::SystemCommon: + buffer[0] = byte; + received_data = 0; + expected_data = msg_expected_data(byte); + skipping_sys_ex = false; + if (expected_data == 0) { + driver.receive_input_packet(timestamp, &byte, 1); + } + break; + } +} + +int MIDIDriverALSAMidi::InputConnection::read_in(MIDIDriverALSAMidi &driver, uint64_t timestamp) { + int ret; + do { + uint8_t byte = 0; + ret = snd_rawmidi_read(rawmidi_ptr, &byte, 1); + + if (ret < 0) { + if (ret != -EAGAIN) { + ERR_PRINT("snd_rawmidi_read error: " + String(snd_strerror(ret))); + } + } else { + parse_byte(byte, driver, timestamp); + } + } while (ret > 0); + + return ret; } void MIDIDriverALSAMidi::thread_func(void *p_udata) { MIDIDriverALSAMidi *md = static_cast<MIDIDriverALSAMidi *>(p_udata); uint64_t timestamp = 0; - uint8_t buffer[256]; - int expected_size = 255; - int bytes = 0; while (!md->exit_thread.is_set()) { - int ret; - md->lock(); - for (int i = 0; i < md->connected_inputs.size(); i++) { - snd_rawmidi_t *midi_in = md->connected_inputs[i]; - do { - uint8_t byte = 0; - ret = snd_rawmidi_read(midi_in, &byte, 1); - if (ret < 0) { - if (ret != -EAGAIN) { - ERR_PRINT("snd_rawmidi_read error: " + String(snd_strerror(ret))); - } - } else { - if (byte & 0x80) { - // Flush previous packet if there is any - if (bytes) { - md->receive_input_packet(timestamp, buffer, bytes); - bytes = 0; - } - expected_size = get_message_size(byte); - // After a SysEx start, all bytes are data until a SysEx end, so - // we're going to end the command at the SES, and let the common - // driver ignore the following data bytes. - } + InputConnection *connections = md->connected_inputs.ptrw(); + size_t connection_count = md->connected_inputs.size(); - if (bytes < 256) { - buffer[bytes++] = byte; - // If we know the size of the current packet receive it if it reached the expected size - if (bytes >= expected_size) { - md->receive_input_packet(timestamp, buffer, bytes); - bytes = 0; - } - } - } - } while (ret > 0); + for (size_t i = 0; i < connection_count; i++) { + connections[i].read_in(*md, timestamp); } md->unlock(); @@ -139,7 +189,7 @@ Error MIDIDriverALSAMidi::open() { snd_rawmidi_t *midi_in; int ret = snd_rawmidi_open(&midi_in, nullptr, name, SND_RAWMIDI_NONBLOCK); if (ret >= 0) { - connected_inputs.insert(i++, midi_in); + connected_inputs.insert(i++, InputConnection(midi_in)); } } @@ -160,7 +210,7 @@ void MIDIDriverALSAMidi::close() { thread.wait_to_finish(); for (int i = 0; i < connected_inputs.size(); i++) { - snd_rawmidi_t *midi_in = connected_inputs[i]; + snd_rawmidi_t *midi_in = connected_inputs[i].rawmidi_ptr; snd_rawmidi_close(midi_in); } connected_inputs.clear(); @@ -179,7 +229,7 @@ PackedStringArray MIDIDriverALSAMidi::get_connected_inputs() { lock(); for (int i = 0; i < connected_inputs.size(); i++) { - snd_rawmidi_t *midi_in = connected_inputs[i]; + snd_rawmidi_t *midi_in = connected_inputs[i].rawmidi_ptr; snd_rawmidi_info_t *info; snd_rawmidi_info_malloc(&info); diff --git a/drivers/alsamidi/midi_driver_alsamidi.h b/drivers/alsamidi/midi_driver_alsamidi.h index ac3530b1b2..9265dede3d 100644 --- a/drivers/alsamidi/midi_driver_alsamidi.h +++ b/drivers/alsamidi/midi_driver_alsamidi.h @@ -46,12 +46,48 @@ class MIDIDriverALSAMidi : public MIDIDriver { Thread thread; Mutex mutex; - Vector<snd_rawmidi_t *> connected_inputs; + class InputConnection { + public: + InputConnection() = default; + InputConnection(snd_rawmidi_t *midi_in) : + rawmidi_ptr{ midi_in } {} + + // Read in and parse available data, forwarding any complete messages through the driver. + int read_in(MIDIDriverALSAMidi &driver, uint64_t timestamp); + + snd_rawmidi_t *rawmidi_ptr = nullptr; + + private: + static const size_t MSG_BUFFER_SIZE = 3; + uint8_t buffer[MSG_BUFFER_SIZE] = { 0 }; + size_t expected_data = 0; + size_t received_data = 0; + bool skipping_sys_ex = false; + void parse_byte(uint8_t byte, MIDIDriverALSAMidi &driver, uint64_t timestamp); + }; + + Vector<InputConnection> connected_inputs; SafeFlag exit_thread; static void thread_func(void *p_udata); + enum class MessageCategory { + Data, + Voice, + SysExBegin, + SystemCommon, // excluding System Exclusive Begin/End + SysExEnd, + RealTime, + }; + + // If the passed byte is a status byte, return the associated message category, + // else return MessageCategory::Data. + static MessageCategory msg_category(uint8_t msg_part); + + // Return the number of data bytes expected for the provided status byte. + static size_t msg_expected_data(uint8_t status_byte); + void lock() const; void unlock() const; diff --git a/drivers/gl_context/SCsub b/drivers/gl_context/SCsub index 7e8bd22960..2204c486c6 100644 --- a/drivers/gl_context/SCsub +++ b/drivers/gl_context/SCsub @@ -6,7 +6,7 @@ if env["platform"] in ["haiku", "macos", "windows", "linuxbsd"]: # Thirdparty source files thirdparty_dir = "#thirdparty/glad/" thirdparty_sources = [ - "glad.c", + "gl.c", ] thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] diff --git a/drivers/gles3/effects/copy_effects.cpp b/drivers/gles3/effects/copy_effects.cpp index de0181f887..b552b52cd5 100644 --- a/drivers/gles3/effects/copy_effects.cpp +++ b/drivers/gles3/effects/copy_effects.cpp @@ -114,19 +114,23 @@ CopyEffects::~CopyEffects() { copy.shader.version_free(copy.shader_version); } -void CopyEffects::copy_to_rect(const Rect2i &p_rect) { - copy.shader.version_bind_shader(copy.shader_version, CopyShaderGLES3::MODE_COPY_SECTION); +void CopyEffects::copy_to_rect(const Rect2 &p_rect) { + bool success = copy.shader.version_bind_shader(copy.shader_version, CopyShaderGLES3::MODE_COPY_SECTION); + if (!success) { + return; + } + copy.shader.version_set_uniform(CopyShaderGLES3::COPY_SECTION, p_rect.position.x, p_rect.position.y, p_rect.size.x, p_rect.size.y, copy.shader_version, CopyShaderGLES3::MODE_COPY_SECTION); - glBindVertexArray(quad_array); - glDrawArrays(GL_TRIANGLES, 0, 6); - glBindVertexArray(0); + draw_screen_quad(); } void CopyEffects::copy_screen() { - copy.shader.version_bind_shader(copy.shader_version, CopyShaderGLES3::MODE_DEFAULT); - glBindVertexArray(screen_triangle_array); - glDrawArrays(GL_TRIANGLES, 0, 3); - glBindVertexArray(0); + bool success = copy.shader.version_bind_shader(copy.shader_version, CopyShaderGLES3::MODE_DEFAULT); + if (!success) { + return; + } + + draw_screen_triangle(); } void CopyEffects::bilinear_blur(GLuint p_source_texture, int p_mipmap_count, const Rect2i &p_region) { @@ -155,11 +159,26 @@ void CopyEffects::bilinear_blur(GLuint p_source_texture, int p_mipmap_count, con } void CopyEffects::set_color(const Color &p_color, const Rect2i &p_region) { - copy.shader.version_bind_shader(copy.shader_version, CopyShaderGLES3::MODE_SIMPLE_COLOR); + bool success = copy.shader.version_bind_shader(copy.shader_version, CopyShaderGLES3::MODE_SIMPLE_COLOR); + if (!success) { + return; + } + copy.shader.version_set_uniform(CopyShaderGLES3::COPY_SECTION, p_region.position.x, p_region.position.y, p_region.size.x, p_region.size.y, copy.shader_version, CopyShaderGLES3::MODE_SIMPLE_COLOR); copy.shader.version_set_uniform(CopyShaderGLES3::COLOR_IN, p_color, copy.shader_version, CopyShaderGLES3::MODE_SIMPLE_COLOR); + draw_screen_quad(); +} + +void CopyEffects::draw_screen_triangle() { + glBindVertexArray(screen_triangle_array); + glDrawArrays(GL_TRIANGLES, 0, 3); + glBindVertexArray(0); +} + +void CopyEffects::draw_screen_quad() { glBindVertexArray(quad_array); glDrawArrays(GL_TRIANGLES, 0, 6); glBindVertexArray(0); } + #endif // GLES3_ENABLED diff --git a/drivers/gles3/effects/copy_effects.h b/drivers/gles3/effects/copy_effects.h index b863e76579..a817c3f0dc 100644 --- a/drivers/gles3/effects/copy_effects.h +++ b/drivers/gles3/effects/copy_effects.h @@ -61,10 +61,12 @@ public: ~CopyEffects(); // These functions assume that a framebuffer and texture are bound already. They only manage the shader, uniforms, and vertex array. - void copy_to_rect(const Rect2i &p_rect); + void copy_to_rect(const Rect2 &p_rect); void copy_screen(); void bilinear_blur(GLuint p_source_texture, int p_mipmap_count, const Rect2i &p_region); void set_color(const Color &p_color, const Rect2i &p_region); + void draw_screen_triangle(); + void draw_screen_quad(); }; } //namespace GLES3 diff --git a/drivers/gles3/rasterizer_canvas_gles3.cpp b/drivers/gles3/rasterizer_canvas_gles3.cpp index c635e18132..e5d4077393 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.cpp +++ b/drivers/gles3/rasterizer_canvas_gles3.cpp @@ -36,26 +36,14 @@ #include "rasterizer_scene_gles3.h" #include "core/config/project_settings.h" +#include "core/math/geometry_2d.h" #include "servers/rendering/rendering_server_default.h" #include "storage/config.h" #include "storage/material_storage.h" #include "storage/mesh_storage.h" +#include "storage/particles_storage.h" #include "storage/texture_storage.h" -#ifndef GLES_OVER_GL -#define glClearDepth glClearDepthf -#endif - -//static const GLenum gl_primitive[] = { -// GL_POINTS, -// GL_LINES, -// GL_LINE_STRIP, -// GL_LINE_LOOP, -// GL_TRIANGLES, -// GL_TRIANGLE_STRIP, -// GL_TRIANGLE_FAN -//}; - void RasterizerCanvasGLES3::_update_transform_2d_to_mat4(const Transform2D &p_transform, float *p_mat4) { p_mat4[0] = p_transform.columns[0][0]; p_mat4[1] = p_transform.columns[0][1]; @@ -115,9 +103,10 @@ void RasterizerCanvasGLES3::_update_transform_to_mat4(const Transform3D &p_trans p_mat4[15] = 1; } -void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_item_list, const Color &p_modulate, Light *p_light_list, Light *p_directional_list, const Transform2D &p_canvas_transform, RS::CanvasItemTextureFilter p_default_filter, RS::CanvasItemTextureRepeat p_default_repeat, bool p_snap_2d_vertices_to_pixel, bool &r_sdf_used) { +void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_item_list, const Color &p_modulate, Light *p_light_list, Light *p_directional_light_list, const Transform2D &p_canvas_transform, RS::CanvasItemTextureFilter p_default_filter, RS::CanvasItemTextureRepeat p_default_repeat, bool p_snap_2d_vertices_to_pixel, bool &r_sdf_used) { GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton(); GLES3::MaterialStorage *material_storage = GLES3::MaterialStorage::get_singleton(); + GLES3::MeshStorage *mesh_storage = GLES3::MeshStorage::get_singleton(); Transform2D canvas_transform_inverse = p_canvas_transform.affine_inverse(); @@ -130,7 +119,10 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_ if (syncStatus == GL_UNSIGNALED) { // If older than 2 frames, wait for sync OpenGL can have up to 3 frames in flight, any more and we need to sync anyway. if (state.canvas_instance_data_buffers[state.current_buffer].last_frame_used < RSG::rasterizer->get_frame_number() - 2) { +#ifndef WEB_ENABLED + // On web, we do nothing as the glSubBufferData will force a sync anyway and WebGL does not like waiting. glClientWaitSync(state.canvas_instance_data_buffers[state.current_buffer].fence, 0, 100000000); // wait for up to 100ms +#endif } else { // Used in last frame or frame before that. OpenGL can get up to two frames behind, so these buffers may still be in use // Allocate a new buffer and use that. @@ -144,9 +136,175 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_ } } - // TODO: Setup Directional Lights + //setup directional lights if exist + + uint32_t light_count = 0; + uint32_t directional_light_count = 0; + { + Light *l = p_directional_light_list; + uint32_t index = 0; + + while (l) { + if (index == data.max_lights_per_render) { + l->render_index_cache = -1; + l = l->next_ptr; + continue; + } + + CanvasLight *clight = canvas_light_owner.get_or_null(l->light_internal); + if (!clight) { //unused or invalid texture + l->render_index_cache = -1; + l = l->next_ptr; + ERR_CONTINUE(!clight); + } + + Vector2 canvas_light_dir = l->xform_cache.columns[1].normalized(); + + state.light_uniforms[index].position[0] = -canvas_light_dir.x; + state.light_uniforms[index].position[1] = -canvas_light_dir.y; + + _update_transform_2d_to_mat2x4(clight->shadow.directional_xform, state.light_uniforms[index].shadow_matrix); - // TODO: Setup lights + state.light_uniforms[index].height = l->height; //0..1 here + + for (int i = 0; i < 4; i++) { + state.light_uniforms[index].shadow_color[i] = uint8_t(CLAMP(int32_t(l->shadow_color[i] * 255.0), 0, 255)); + state.light_uniforms[index].color[i] = l->color[i]; + } + + state.light_uniforms[index].color[3] = l->energy; //use alpha for energy, so base color can go separate + + if (state.shadow_fb != 0) { + state.light_uniforms[index].shadow_pixel_size = (1.0 / state.shadow_texture_size) * (1.0 + l->shadow_smooth); + state.light_uniforms[index].shadow_z_far_inv = 1.0 / clight->shadow.z_far; + state.light_uniforms[index].shadow_y_ofs = clight->shadow.y_offset; + } else { + state.light_uniforms[index].shadow_pixel_size = 1.0; + state.light_uniforms[index].shadow_z_far_inv = 1.0; + state.light_uniforms[index].shadow_y_ofs = 0; + } + + state.light_uniforms[index].flags = l->blend_mode << LIGHT_FLAGS_BLEND_SHIFT; + state.light_uniforms[index].flags |= l->shadow_filter << LIGHT_FLAGS_FILTER_SHIFT; + + if (clight->shadow.enabled) { + state.light_uniforms[index].flags |= LIGHT_FLAGS_HAS_SHADOW; + } + + l->render_index_cache = index; + + index++; + l = l->next_ptr; + } + + light_count = index; + directional_light_count = light_count; + state.using_directional_lights = directional_light_count > 0; + } + + //setup lights if exist + + { + Light *l = p_light_list; + uint32_t index = light_count; + + while (l) { + if (index == data.max_lights_per_render) { + l->render_index_cache = -1; + l = l->next_ptr; + continue; + } + + CanvasLight *clight = canvas_light_owner.get_or_null(l->light_internal); + if (!clight) { //unused or invalid texture + l->render_index_cache = -1; + l = l->next_ptr; + ERR_CONTINUE(!clight); + } + Transform2D to_light_xform = (p_canvas_transform * l->light_shader_xform).affine_inverse(); + + Vector2 canvas_light_pos = p_canvas_transform.xform(l->xform.get_origin()); //convert light position to canvas coordinates, as all computation is done in canvas coords to avoid precision loss + state.light_uniforms[index].position[0] = canvas_light_pos.x; + state.light_uniforms[index].position[1] = canvas_light_pos.y; + + _update_transform_2d_to_mat2x4(to_light_xform, state.light_uniforms[index].matrix); + _update_transform_2d_to_mat2x4(l->xform_cache.affine_inverse(), state.light_uniforms[index].shadow_matrix); + + state.light_uniforms[index].height = l->height * (p_canvas_transform.columns[0].length() + p_canvas_transform.columns[1].length()) * 0.5; //approximate height conversion to the canvas size, since all calculations are done in canvas coords to avoid precision loss + for (int i = 0; i < 4; i++) { + state.light_uniforms[index].shadow_color[i] = uint8_t(CLAMP(int32_t(l->shadow_color[i] * 255.0), 0, 255)); + state.light_uniforms[index].color[i] = l->color[i]; + } + + state.light_uniforms[index].color[3] = l->energy; //use alpha for energy, so base color can go separate + + if (state.shadow_fb != 0) { + state.light_uniforms[index].shadow_pixel_size = (1.0 / state.shadow_texture_size) * (1.0 + l->shadow_smooth); + state.light_uniforms[index].shadow_z_far_inv = 1.0 / clight->shadow.z_far; + state.light_uniforms[index].shadow_y_ofs = clight->shadow.y_offset; + } else { + state.light_uniforms[index].shadow_pixel_size = 1.0; + state.light_uniforms[index].shadow_z_far_inv = 1.0; + state.light_uniforms[index].shadow_y_ofs = 0; + } + + state.light_uniforms[index].flags = l->blend_mode << LIGHT_FLAGS_BLEND_SHIFT; + state.light_uniforms[index].flags |= l->shadow_filter << LIGHT_FLAGS_FILTER_SHIFT; + + if (clight->shadow.enabled) { + state.light_uniforms[index].flags |= LIGHT_FLAGS_HAS_SHADOW; + } + + if (clight->texture.is_valid()) { + Rect2 atlas_rect = GLES3::TextureStorage::get_singleton()->texture_atlas_get_texture_rect(clight->texture); + state.light_uniforms[index].atlas_rect[0] = atlas_rect.position.x; + state.light_uniforms[index].atlas_rect[1] = atlas_rect.position.y; + state.light_uniforms[index].atlas_rect[2] = atlas_rect.size.width; + state.light_uniforms[index].atlas_rect[3] = atlas_rect.size.height; + + } else { + state.light_uniforms[index].atlas_rect[0] = 0; + state.light_uniforms[index].atlas_rect[1] = 0; + state.light_uniforms[index].atlas_rect[2] = 0; + state.light_uniforms[index].atlas_rect[3] = 0; + } + + l->render_index_cache = index; + + index++; + l = l->next_ptr; + } + + light_count = index; + } + + if (light_count > 0) { + glBindBufferBase(GL_UNIFORM_BUFFER, LIGHT_UNIFORM_LOCATION, state.canvas_instance_data_buffers[state.current_buffer].light_ubo); + +#ifdef WEB_ENABLED + glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(LightUniform) * light_count, state.light_uniforms); +#else + // On Desktop and mobile we map the memory without synchronizing for maximum speed. + void *ubo = glMapBufferRange(GL_UNIFORM_BUFFER, 0, sizeof(LightUniform) * light_count, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); + memcpy(ubo, state.light_uniforms, sizeof(LightUniform) * light_count); + glUnmapBuffer(GL_UNIFORM_BUFFER); +#endif + + GLuint texture_atlas = texture_storage->texture_atlas_get_texture(); + if (texture_atlas == 0) { + GLES3::Texture *tex = texture_storage->get_texture(texture_storage->texture_gl_get_default(GLES3::DEFAULT_GL_TEXTURE_WHITE)); + texture_atlas = tex->tex_id; + } + glActiveTexture(GL_TEXTURE0 + GLES3::Config::get_singleton()->max_texture_image_units - 2); + glBindTexture(GL_TEXTURE_2D, texture_atlas); + GLuint shadow_tex = state.shadow_texture; + if (shadow_tex == 0) { + GLES3::Texture *tex = texture_storage->get_texture(texture_storage->texture_gl_get_default(GLES3::DEFAULT_GL_TEXTURE_WHITE)); + shadow_tex = tex->tex_id; + } + glActiveTexture(GL_TEXTURE0 + GLES3::Config::get_singleton()->max_texture_image_units - 3); + glBindTexture(GL_TEXTURE_2D, shadow_tex); + } { //update canvas state uniform buffer @@ -154,9 +312,14 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_ Size2i ssize = texture_storage->render_target_get_size(p_to_render_target); + // If we've overridden the render target's color texture, then we need + // to invert the Y axis, so 2D texture appear right side up. + // We're probably rendering directly to an XR device. + float y_scale = texture_storage->render_target_get_override_color(p_to_render_target).is_valid() ? -2.0f : 2.0f; + Transform3D screen_transform; screen_transform.translate_local(-(ssize.width / 2.0f), -(ssize.height / 2.0f), 0.0f); - screen_transform.scale(Vector3(2.0f / ssize.width, 2.0f / ssize.height, 1.0f)); + screen_transform.scale(Vector3(2.0f / ssize.width, y_scale / ssize.height, 1.0f)); _update_transform_to_mat4(screen_transform, state_buffer.screen_transform); _update_transform_2d_to_mat4(p_canvas_transform, state_buffer.canvas_transform); @@ -175,13 +338,10 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_ state_buffer.screen_pixel_size[0] = 1.0 / render_target_size.x; state_buffer.screen_pixel_size[1] = 1.0 / render_target_size.y; - // TODO: temporary, this should be set at the top of this function - glViewport(0, 0, render_target_size.x, render_target_size.y); - state_buffer.time = state.time; state_buffer.use_pixel_snap = p_snap_2d_vertices_to_pixel; - state_buffer.directional_light_count = 0; //directional_light_count; + state_buffer.directional_light_count = directional_light_count; Vector2 canvas_scale = p_canvas_transform.get_scale(); @@ -200,7 +360,8 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_ state_buffer.sdf_to_tex[3] = -sdf_tex_rect.position.y / sdf_tex_rect.size.height; state_buffer.tex_to_sdf = 1.0 / ((canvas_scale.x + canvas_scale.y) * 0.5); - glBindBufferBase(GL_UNIFORM_BUFFER, BASE_UNIFORM_LOCATION, state.canvas_state_buffer); + + glBindBufferBase(GL_UNIFORM_BUFFER, BASE_UNIFORM_LOCATION, state.canvas_instance_data_buffers[state.current_buffer].state_ubo); glBufferData(GL_UNIFORM_BUFFER, sizeof(StateBuffer), &state_buffer, GL_STREAM_DRAW); GLuint global_buffer = material_storage->global_shader_parameters_get_uniform_buffer(); @@ -209,11 +370,17 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_ glBindBuffer(GL_UNIFORM_BUFFER, 0); } + glActiveTexture(GL_TEXTURE0 + GLES3::Config::get_singleton()->max_texture_image_units - 5); + glBindTexture(GL_TEXTURE_2D, texture_storage->render_target_get_sdf_texture(p_to_render_target)); + { state.default_filter = p_default_filter; state.default_repeat = p_default_repeat; } + Size2 render_target_size = texture_storage->render_target_get_size(p_to_render_target); + glViewport(0, 0, render_target_size.x, render_target_size.y); + r_sdf_used = false; int item_count = 0; bool backbuffer_cleared = false; @@ -223,6 +390,7 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_ Rect2 back_buffer_rect; bool backbuffer_copy = false; bool backbuffer_gen_mipmaps = false; + bool update_skeletons = false; Item *ci = p_item_list; Item *canvas_group_owner = nullptr; @@ -264,16 +432,37 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_ } } + if (ci->skeleton.is_valid()) { + const Item::Command *c = ci->commands; + + while (c) { + if (c->type == Item::Command::TYPE_MESH) { + const Item::CommandMesh *cm = static_cast<const Item::CommandMesh *>(c); + if (cm->mesh_instance.is_valid()) { + mesh_storage->mesh_instance_check_for_update(cm->mesh_instance); + update_skeletons = true; + } + } + c = c->next; + } + } + if (ci->canvas_group_owner != nullptr) { if (canvas_group_owner == nullptr) { + if (update_skeletons) { + mesh_storage->update_mesh_instances(); + update_skeletons = false; + } // Canvas group begins here, render until before this item _render_items(p_to_render_target, item_count, canvas_transform_inverse, p_light_list, starting_index, false); item_count = 0; - Rect2i group_rect = ci->canvas_group_owner->global_rect_cache; - - if (ci->canvas_group_owner->canvas_group->mode == RS::CANVAS_GROUP_MODE_OPAQUE) { + if (ci->canvas_group_owner->canvas_group->mode != RS::CANVAS_GROUP_MODE_TRANSPARENT) { + Rect2i group_rect = ci->canvas_group_owner->global_rect_cache; texture_storage->render_target_copy_to_back_buffer(p_to_render_target, group_rect, false); + if (ci->canvas_group_owner->canvas_group->mode == RS::CANVAS_GROUP_MODE_CLIP_AND_DRAW) { + items[item_count++] = ci->canvas_group_owner; + } } else if (!backbuffer_cleared) { texture_storage->render_target_clear_back_buffer(p_to_render_target, Rect2i(), Color(0, 0, 0, 0)); backbuffer_cleared = true; @@ -292,6 +481,10 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_ } if (ci == canvas_group_owner) { + if (update_skeletons) { + mesh_storage->update_mesh_instances(); + update_skeletons = false; + } _render_items(p_to_render_target, item_count, canvas_transform_inverse, p_light_list, starting_index, true); item_count = 0; @@ -305,6 +498,10 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_ } if (backbuffer_copy) { + if (update_skeletons) { + mesh_storage->update_mesh_instances(); + update_skeletons = false; + } //render anything pending, including clearing if no items _render_items(p_to_render_target, item_count, canvas_transform_inverse, p_light_list, starting_index, false); @@ -329,6 +526,10 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_ items[item_count++] = ci; if (!ci->next || item_count == MAX_RENDER_ITEMS - 1) { + if (update_skeletons) { + mesh_storage->update_mesh_instances(); + update_skeletons = false; + } _render_items(p_to_render_target, item_count, canvas_transform_inverse, p_light_list, starting_index, false); //then reset item_count = 0; @@ -381,9 +582,18 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou } RID material = ci->material_owner == nullptr ? ci->material : ci->material_owner->material; - - if (material.is_null() && ci->canvas_group != nullptr) { - material = default_canvas_group_material; + if (ci->canvas_group != nullptr) { + if (ci->canvas_group->mode == RS::CANVAS_GROUP_MODE_CLIP_AND_DRAW) { + if (!p_to_backbuffer) { + material = default_clip_children_material; + } + } else { + if (ci->canvas_group->mode == RS::CANVAS_GROUP_MODE_CLIP_ONLY) { + material = default_clip_children_material; + } else { + material = default_canvas_group_material; + } + } } GLES3::CanvasShaderData *shader_data_cache = nullptr; @@ -407,7 +617,14 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou GLES3::CanvasShaderData::BlendMode blend_mode = shader_data_cache ? shader_data_cache->blend_mode : GLES3::CanvasShaderData::BLEND_MODE_MIX; - _record_item_commands(ci, p_canvas_transform_inverse, current_clip, blend_mode, p_lights, index, batch_broken); + _record_item_commands(ci, p_to_render_target, p_canvas_transform_inverse, current_clip, blend_mode, p_lights, index, batch_broken); + } + + if (index == 0) { + // Nothing to render, just return. + state.current_batch_index = 0; + state.canvas_instance_batches.clear(); + return; } // Copy over all data needed for rendering. @@ -442,7 +659,13 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou GLES3::CanvasMaterialData *material_data = state.canvas_instance_batches[i].material_data; CanvasShaderGLES3::ShaderVariant variant = state.canvas_instance_batches[i].shader_variant; - _bind_material(material_data, variant); + uint64_t specialization = 0; + specialization |= uint64_t(state.canvas_instance_batches[i].lights_disabled); + specialization |= uint64_t(!GLES3::Config::get_singleton()->float_texture_supported) << 1; + bool success = _bind_material(material_data, variant, specialization); + if (!success) { + continue; + } GLES3::CanvasShaderData::BlendMode blend_mode = state.canvas_instance_batches[i].blend_mode; @@ -526,7 +749,7 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou r_last_index += index; } -void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, const Transform2D &p_canvas_transform_inverse, Item *¤t_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_batch_broken) { +void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, RID p_render_target, const Transform2D &p_canvas_transform_inverse, Item *¤t_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_batch_broken) { RenderingServer::CanvasItemTextureFilter texture_filter = p_item->texture_filter == RS::CANVAS_ITEM_TEXTURE_FILTER_DEFAULT ? state.default_filter : p_item->texture_filter; if (texture_filter != state.canvas_instance_batches[state.current_batch_index].filter) { @@ -554,6 +777,38 @@ void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, const Tran bool skipping = false; + // TODO: consider making lights a per-batch property and then baking light operations in the shader for better performance. + uint32_t lights[4] = { 0, 0, 0, 0 }; + + uint16_t light_count = 0; + + { + Light *light = p_lights; + + while (light) { + if (light->render_index_cache >= 0 && p_item->light_mask & light->item_mask && p_item->z_final >= light->z_min && p_item->z_final <= light->z_max && p_item->global_rect_cache.intersects_transformed(light->xform_cache, light->rect_cache)) { + uint32_t light_index = light->render_index_cache; + lights[light_count >> 2] |= light_index << ((light_count & 3) * 8); + + light_count++; + + if (light_count == data.max_lights_per_item) { + break; + } + } + light = light->next_ptr; + } + + base_flags |= light_count << FLAGS_LIGHT_COUNT_SHIFT; + } + + bool lights_disabled = light_count == 0 && !state.using_directional_lights; + + if (lights_disabled != state.canvas_instance_batches[state.current_batch_index].lights_disabled) { + _new_batch(r_batch_broken, r_index); + state.canvas_instance_batches[state.current_batch_index].lights_disabled = lights_disabled; + } + const Item::Command *c = p_item->commands; while (c) { if (skipping && c->type != Item::Command::TYPE_ANIMATION_SLICE) { @@ -580,6 +835,11 @@ void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, const Tran state.instance_data_array[r_index].pad[0] = 0.0; state.instance_data_array[r_index].pad[1] = 0.0; + state.instance_data_array[r_index].lights[0] = lights[0]; + state.instance_data_array[r_index].lights[1] = lights[1]; + state.instance_data_array[r_index].lights[2] = lights[2]; + state.instance_data_array[r_index].lights[3] = lights[3]; + state.instance_data_array[r_index].flags = base_flags | (state.instance_data_array[r_index == 0 ? 0 : r_index - 1].flags & (FLAGS_DEFAULT_NORMAL_MAP_USED | FLAGS_DEFAULT_SPECULAR_MAP_USED)); //reset on each command for sanity, keep canvastexture binding config Color blend_color; @@ -788,13 +1048,15 @@ void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, const Tran if (primitive->point_count != state.canvas_instance_batches[state.current_batch_index].primitive_points || state.canvas_instance_batches[state.current_batch_index].command_type != Item::Command::TYPE_PRIMITIVE) { _new_batch(r_batch_broken, r_index); - state.canvas_instance_batches[state.current_batch_index].tex = RID(); + state.canvas_instance_batches[state.current_batch_index].tex = primitive->texture; state.canvas_instance_batches[state.current_batch_index].primitive_points = primitive->point_count; state.canvas_instance_batches[state.current_batch_index].command_type = Item::Command::TYPE_PRIMITIVE; state.canvas_instance_batches[state.current_batch_index].command = c; state.canvas_instance_batches[state.current_batch_index].shader_variant = CanvasShaderGLES3::MODE_PRIMITIVE; } + _prepare_canvas_texture(state.canvas_instance_batches[state.current_batch_index].tex, state.canvas_instance_batches[state.current_batch_index].filter, state.canvas_instance_batches[state.current_batch_index].repeat, r_index, texpixel_size); + for (uint32_t j = 0; j < MIN(3u, primitive->point_count); j++) { state.instance_data_array[r_index].points[j * 2 + 0] = primitive->points[j].x; state.instance_data_array[r_index].points[j * 2 + 1] = primitive->points[j].y; @@ -808,7 +1070,7 @@ void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, const Tran _add_to_batch(r_index, r_batch_broken); if (primitive->point_count == 4) { - // Reset base data + // Reset base data. _update_transform_2d_to_mat2x3(base_transform * draw_transform, state.instance_data_array[r_index].world); state.instance_data_array[r_index].color_texture_pixel_size[0] = 0.0; state.instance_data_array[r_index].color_texture_pixel_size[1] = 0.0; @@ -816,12 +1078,13 @@ void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, const Tran state.instance_data_array[r_index].flags = base_flags | (state.instance_data_array[r_index - 1].flags & (FLAGS_DEFAULT_NORMAL_MAP_USED | FLAGS_DEFAULT_SPECULAR_MAP_USED)); //reset on each command for sanity, keep canvastexture binding config for (uint32_t j = 0; j < 3; j++) { - //second half of triangle - state.instance_data_array[r_index].points[j * 2 + 0] = primitive->points[j + 1].x; - state.instance_data_array[r_index].points[j * 2 + 1] = primitive->points[j + 1].y; - state.instance_data_array[r_index].uvs[j * 2 + 0] = primitive->uvs[j + 1].x; - state.instance_data_array[r_index].uvs[j * 2 + 1] = primitive->uvs[j + 1].y; - Color col = primitive->colors[j + 1] * base_color; + int offset = j == 0 ? 0 : 1; + // Second triangle in the quad. Uses vertices 0, 2, 3. + state.instance_data_array[r_index].points[j * 2 + 0] = primitive->points[j + offset].x; + state.instance_data_array[r_index].points[j * 2 + 1] = primitive->points[j + offset].y; + state.instance_data_array[r_index].uvs[j * 2 + 0] = primitive->uvs[j + offset].x; + state.instance_data_array[r_index].uvs[j * 2 + 1] = primitive->uvs[j + offset].y; + Color col = primitive->colors[j + offset] * base_color; state.instance_data_array[r_index].colors[j * 2 + 0] = (uint32_t(Math::make_half_float(col.g)) << 16) | Math::make_half_float(col.r); state.instance_data_array[r_index].colors[j * 2 + 1] = (uint32_t(Math::make_half_float(col.a)) << 16) | Math::make_half_float(col.b); } @@ -843,15 +1106,45 @@ void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, const Tran state.canvas_instance_batches[state.current_batch_index].tex = m->texture; _update_transform_2d_to_mat2x3(base_transform * draw_transform * m->transform, state.instance_data_array[r_index].world); modulate = m->modulate; + } else if (c->type == Item::Command::TYPE_MULTIMESH) { const Item::CommandMultiMesh *mm = static_cast<const Item::CommandMultiMesh *>(c); state.canvas_instance_batches[state.current_batch_index].tex = mm->texture; - uint32_t instance_count = GLES3::MeshStorage::get_singleton()->multimesh_get_instances_to_draw(mm->multimesh); - if (instance_count > 1) { - state.canvas_instance_batches[state.current_batch_index].shader_variant = CanvasShaderGLES3::MODE_INSTANCED; - } + state.canvas_instance_batches[state.current_batch_index].shader_variant = CanvasShaderGLES3::MODE_INSTANCED; + } else if (c->type == Item::Command::TYPE_PARTICLES) { - WARN_PRINT_ONCE("Particles not supported yet, sorry :("); + GLES3::ParticlesStorage *particles_storage = GLES3::ParticlesStorage::get_singleton(); + GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton(); + + const Item::CommandParticles *pt = static_cast<const Item::CommandParticles *>(c); + RID particles = pt->particles; + state.canvas_instance_batches[state.current_batch_index].tex = pt->texture; + state.canvas_instance_batches[state.current_batch_index].shader_variant = CanvasShaderGLES3::MODE_INSTANCED; + bool local_coords = particles_storage->particles_is_using_local_coords(particles); + + if (particles_storage->particles_has_collision(particles) && texture_storage->render_target_is_sdf_enabled(p_render_target)) { + // Pass collision information. + Transform2D xform; + if (local_coords) { + xform = p_item->final_transform; + } else { + xform = p_canvas_transform_inverse; + } + + GLuint sdf_texture = texture_storage->render_target_get_sdf_texture(p_render_target); + + Rect2 to_screen; + { + Rect2 sdf_rect = texture_storage->render_target_get_sdf_rect(p_render_target); + + to_screen.size = Vector2(1.0 / sdf_rect.size.width, 1.0 / sdf_rect.size.height); + to_screen.position = -sdf_rect.position * to_screen.size; + } + + particles_storage->particles_set_canvas_sdf_collision(pt->particles, true, xform, to_screen, sdf_texture); + } else { + particles_storage->particles_set_canvas_sdf_collision(pt->particles, false, Transform2D(), Rect2(), 0); + } } state.canvas_instance_batches[state.current_batch_index].command = c; @@ -988,20 +1281,21 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) { case Item::Command::TYPE_MULTIMESH: case Item::Command::TYPE_PARTICLES: { GLES3::MeshStorage *mesh_storage = GLES3::MeshStorage::get_singleton(); + GLES3::ParticlesStorage *particles_storage = GLES3::ParticlesStorage::get_singleton(); RID mesh; RID mesh_instance; - RID texture; uint32_t instance_count = 1; - GLuint multimesh_buffer = 0; - uint32_t multimesh_stride = 0; - uint32_t multimesh_color_offset = 0; - bool multimesh_uses_color = false; - bool multimesh_uses_custom_data = false; + GLuint instance_buffer = 0; + uint32_t instance_stride = 0; + uint32_t instance_color_offset = 0; + bool instance_uses_color = false; + bool instance_uses_custom_data = false; if (state.canvas_instance_batches[p_index].command_type == Item::Command::TYPE_MESH) { const Item::CommandMesh *m = static_cast<const Item::CommandMesh *>(state.canvas_instance_batches[p_index].command); mesh = m->mesh; mesh_instance = m->mesh_instance; + } else if (state.canvas_instance_batches[p_index].command_type == Item::Command::TYPE_MULTIMESH) { const Item::CommandMultiMesh *mm = static_cast<const Item::CommandMultiMesh *>(state.canvas_instance_batches[p_index].command); RID multimesh = mm->multimesh; @@ -1017,13 +1311,37 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) { break; } - multimesh_buffer = mesh_storage->multimesh_get_gl_buffer(multimesh); - multimesh_stride = mesh_storage->multimesh_get_stride(multimesh); - multimesh_color_offset = mesh_storage->multimesh_get_color_offset(multimesh); - multimesh_uses_color = mesh_storage->multimesh_uses_colors(multimesh); - multimesh_uses_custom_data = mesh_storage->multimesh_uses_custom_data(multimesh); + instance_buffer = mesh_storage->multimesh_get_gl_buffer(multimesh); + instance_stride = mesh_storage->multimesh_get_stride(multimesh); + instance_color_offset = mesh_storage->multimesh_get_color_offset(multimesh); + instance_uses_color = mesh_storage->multimesh_uses_colors(multimesh); + instance_uses_custom_data = mesh_storage->multimesh_uses_custom_data(multimesh); + } else if (state.canvas_instance_batches[p_index].command_type == Item::Command::TYPE_PARTICLES) { - // Do nothing for now. + const Item::CommandParticles *pt = static_cast<const Item::CommandParticles *>(state.canvas_instance_batches[p_index].command); + RID particles = pt->particles; + mesh = particles_storage->particles_get_draw_pass_mesh(particles, 0); + + ERR_BREAK(particles_storage->particles_get_mode(particles) != RS::PARTICLES_MODE_2D); + particles_storage->particles_request_process(particles); + + if (particles_storage->particles_is_inactive(particles)) { + break; + } + + RenderingServerDefault::redraw_request(); // Active particles means redraw request. + + int dpc = particles_storage->particles_get_draw_passes(particles); + if (dpc == 0) { + break; // Nothing to draw. + } + + instance_count = particles_storage->particles_get_amount(particles); + instance_buffer = particles_storage->particles_get_gl_buffer(particles); + instance_stride = 12; // 8 bytes for instance transform and 4 bytes for packed color and custom. + instance_color_offset = 8; // 8 bytes for instance transform. + instance_uses_color = true; + instance_uses_custom_data = true; } ERR_FAIL_COND(mesh.is_null()); @@ -1055,18 +1373,21 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) { } if (instance_count > 1) { + if (instance_buffer == 0) { + break; + } // Bind instance buffers. - glBindBuffer(GL_ARRAY_BUFFER, multimesh_buffer); + glBindBuffer(GL_ARRAY_BUFFER, instance_buffer); glEnableVertexAttribArray(1); - glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, multimesh_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(0)); + glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, instance_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(0)); glVertexAttribDivisor(1, 1); glEnableVertexAttribArray(2); - glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, multimesh_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(4 * 4)); + glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, instance_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(4 * 4)); glVertexAttribDivisor(2, 1); - if (multimesh_uses_color || multimesh_uses_custom_data) { + if (instance_uses_color || instance_uses_custom_data) { glEnableVertexAttribArray(5); - glVertexAttribIPointer(5, 4, GL_UNSIGNED_INT, multimesh_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(multimesh_color_offset * sizeof(float))); + glVertexAttribIPointer(5, 4, GL_UNSIGNED_INT, instance_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(instance_color_offset * sizeof(float))); glVertexAttribDivisor(5, 1); } } @@ -1106,7 +1427,7 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) { void RasterizerCanvasGLES3::_add_to_batch(uint32_t &r_index, bool &r_batch_broken) { if (r_index >= data.max_instances_per_ubo - 1) { - WARN_PRINT_ONCE("Trying to draw too many items. Please increase maximum number of items in the project settings 'rendering/gl_compatibility/item_buffer_size'"); + ERR_PRINT_ONCE("Trying to draw too many items. Please increase maximum number of items in the project settings 'rendering/gl_compatibility/item_buffer_size'"); return; } @@ -1140,53 +1461,577 @@ void RasterizerCanvasGLES3::_new_batch(bool &r_batch_broken, uint32_t &r_index) _align_instance_data_buffer(r_index); } -void RasterizerCanvasGLES3::_bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant) { +bool RasterizerCanvasGLES3::_bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant, uint64_t p_specialization) { if (p_material_data) { if (p_material_data->shader_data->version.is_valid() && p_material_data->shader_data->valid) { // Bind uniform buffer and textures p_material_data->bind_uniforms(); - GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(p_material_data->shader_data->version, p_variant); + return GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(p_material_data->shader_data->version, p_variant, p_specialization); } else { - GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, p_variant); + return GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, p_variant, p_specialization); } } else { - GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, p_variant); + return GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, p_variant, p_specialization); } } RID RasterizerCanvasGLES3::light_create() { - return RID(); + CanvasLight canvas_light; + return canvas_light_owner.make_rid(canvas_light); } void RasterizerCanvasGLES3::light_set_texture(RID p_rid, RID p_texture) { + GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton(); + + CanvasLight *cl = canvas_light_owner.get_or_null(p_rid); + ERR_FAIL_COND(!cl); + if (cl->texture == p_texture) { + return; + } + if (cl->texture.is_valid()) { + texture_storage->texture_remove_from_texture_atlas(cl->texture); + } + cl->texture = p_texture; + + if (cl->texture.is_valid()) { + texture_storage->texture_add_to_texture_atlas(cl->texture); + } } void RasterizerCanvasGLES3::light_set_use_shadow(RID p_rid, bool p_enable) { + CanvasLight *cl = canvas_light_owner.get_or_null(p_rid); + ERR_FAIL_COND(!cl); + + cl->shadow.enabled = p_enable; } void RasterizerCanvasGLES3::light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders) { + GLES3::Config *config = GLES3::Config::get_singleton(); + + CanvasLight *cl = canvas_light_owner.get_or_null(p_rid); + ERR_FAIL_COND(!cl->shadow.enabled); + + _update_shadow_atlas(); + + cl->shadow.z_far = p_far; + cl->shadow.y_offset = float(p_shadow_index * 2 + 1) / float(data.max_lights_per_render * 2); + + glBindFramebuffer(GL_FRAMEBUFFER, state.shadow_fb); + glViewport(0, p_shadow_index * 2, state.shadow_texture_size, 2); + + glDepthMask(GL_TRUE); + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_LESS); + glDisable(GL_BLEND); + + glEnable(GL_SCISSOR_TEST); + glScissor(0, p_shadow_index * 2, state.shadow_texture_size, 2); + glClearColor(p_far, p_far, p_far, 1.0); + glClearDepth(1.0); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + glCullFace(GL_BACK); + glDisable(GL_CULL_FACE); + RS::CanvasOccluderPolygonCullMode cull_mode = RS::CANVAS_OCCLUDER_POLYGON_CULL_DISABLED; + + CanvasOcclusionShaderGLES3::ShaderVariant variant = config->float_texture_supported ? CanvasOcclusionShaderGLES3::MODE_SHADOW : CanvasOcclusionShaderGLES3::MODE_SHADOW_RGBA; + bool success = shadow_render.shader.version_bind_shader(shadow_render.shader_version, variant); + if (!success) { + return; + } + + for (int i = 0; i < 4; i++) { + glViewport((state.shadow_texture_size / 4) * i, p_shadow_index * 2, (state.shadow_texture_size / 4), 2); + + Projection projection; + { + real_t fov = 90; + real_t nearp = p_near; + real_t farp = p_far; + real_t aspect = 1.0; + + real_t ymax = nearp * Math::tan(Math::deg_to_rad(fov * 0.5)); + real_t ymin = -ymax; + real_t xmin = ymin * aspect; + real_t xmax = ymax * aspect; + + projection.set_frustum(xmin, xmax, ymin, ymax, nearp, farp); + } + + Vector3 cam_target = Basis::from_euler(Vector3(0, 0, Math_TAU * ((i + 3) / 4.0))).xform(Vector3(0, 1, 0)); + + projection = projection * Projection(Transform3D().looking_at(cam_target, Vector3(0, 0, -1)).affine_inverse()); + shadow_render.shader.version_set_uniform(CanvasOcclusionShaderGLES3::PROJECTION, projection, shadow_render.shader_version, variant); + + static const Vector2 directions[4] = { Vector2(1, 0), Vector2(0, 1), Vector2(-1, 0), Vector2(0, -1) }; + shadow_render.shader.version_set_uniform(CanvasOcclusionShaderGLES3::DIRECTION, directions[i].x, directions[i].y, shadow_render.shader_version, variant); + shadow_render.shader.version_set_uniform(CanvasOcclusionShaderGLES3::Z_FAR, p_far, shadow_render.shader_version, variant); + + LightOccluderInstance *instance = p_occluders; + + while (instance) { + OccluderPolygon *co = occluder_polygon_owner.get_or_null(instance->occluder); + + if (!co || co->vertex_array == 0 || !(p_light_mask & instance->light_mask)) { + instance = instance->next; + continue; + } + + Transform2D modelview = p_light_xform * instance->xform_cache; + shadow_render.shader.version_set_uniform(CanvasOcclusionShaderGLES3::MODELVIEW1, modelview.columns[0][0], modelview.columns[1][0], 0, modelview.columns[2][0], shadow_render.shader_version, variant); + shadow_render.shader.version_set_uniform(CanvasOcclusionShaderGLES3::MODELVIEW2, modelview.columns[0][1], modelview.columns[1][1], 0, modelview.columns[2][1], shadow_render.shader_version, variant); + + if (co->cull_mode != cull_mode) { + if (co->cull_mode == RS::CANVAS_OCCLUDER_POLYGON_CULL_DISABLED) { + glDisable(GL_CULL_FACE); + } else { + if (cull_mode == RS::CANVAS_OCCLUDER_POLYGON_CULL_DISABLED) { + // Last time was disabled, so enable and set proper face. + glEnable(GL_CULL_FACE); + } + glCullFace(co->cull_mode == RS::CANVAS_OCCLUDER_POLYGON_CULL_CLOCKWISE ? GL_FRONT : GL_BACK); + } + cull_mode = co->cull_mode; + } + + glBindVertexArray(co->vertex_array); + glDrawElements(GL_TRIANGLES, 3 * co->line_point_count, GL_UNSIGNED_SHORT, 0); + + instance = instance->next; + } + } + + glBindVertexArray(0); + glBindFramebuffer(GL_FRAMEBUFFER, 0); + glDepthMask(GL_FALSE); + glDisable(GL_DEPTH_TEST); + glDisable(GL_SCISSOR_TEST); } void RasterizerCanvasGLES3::light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) { + GLES3::Config *config = GLES3::Config::get_singleton(); + + CanvasLight *cl = canvas_light_owner.get_or_null(p_rid); + ERR_FAIL_COND(!cl->shadow.enabled); + + _update_shadow_atlas(); + + Vector2 light_dir = p_light_xform.columns[1].normalized(); + + Vector2 center = p_clip_rect.get_center(); + + float to_edge_distance = ABS(light_dir.dot(p_clip_rect.get_support(light_dir)) - light_dir.dot(center)); + + Vector2 from_pos = center - light_dir * (to_edge_distance + p_cull_distance); + float distance = to_edge_distance * 2.0 + p_cull_distance; + float half_size = p_clip_rect.size.length() * 0.5; //shadow length, must keep this no matter the angle + + cl->shadow.z_far = distance; + cl->shadow.y_offset = float(p_shadow_index * 2 + 1) / float(data.max_lights_per_render * 2); + + Transform2D to_light_xform; + + to_light_xform[2] = from_pos; + to_light_xform[1] = light_dir; + to_light_xform[0] = -light_dir.orthogonal(); + + to_light_xform.invert(); + + glBindFramebuffer(GL_FRAMEBUFFER, state.shadow_fb); + glViewport(0, p_shadow_index * 2, state.shadow_texture_size, 2); + + glDepthMask(GL_TRUE); + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_LESS); + glDisable(GL_BLEND); + + glEnable(GL_SCISSOR_TEST); + glScissor(0, p_shadow_index * 2, state.shadow_texture_size, 2); + glClearColor(1.0, 1.0, 1.0, 1.0); + glClearDepth(1.0); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + glCullFace(GL_BACK); + glDisable(GL_CULL_FACE); + RS::CanvasOccluderPolygonCullMode cull_mode = RS::CANVAS_OCCLUDER_POLYGON_CULL_DISABLED; + + CanvasOcclusionShaderGLES3::ShaderVariant variant = config->float_texture_supported ? CanvasOcclusionShaderGLES3::MODE_SHADOW : CanvasOcclusionShaderGLES3::MODE_SHADOW_RGBA; + bool success = shadow_render.shader.version_bind_shader(shadow_render.shader_version, variant); + if (!success) { + return; + } + + Projection projection; + projection.set_orthogonal(-half_size, half_size, -0.5, 0.5, 0.0, distance); + projection = projection * Projection(Transform3D().looking_at(Vector3(0, 1, 0), Vector3(0, 0, -1)).affine_inverse()); + + shadow_render.shader.version_set_uniform(CanvasOcclusionShaderGLES3::PROJECTION, projection, shadow_render.shader_version, variant); + shadow_render.shader.version_set_uniform(CanvasOcclusionShaderGLES3::DIRECTION, 0.0, 1.0, shadow_render.shader_version, variant); + shadow_render.shader.version_set_uniform(CanvasOcclusionShaderGLES3::Z_FAR, distance, shadow_render.shader_version, variant); + + LightOccluderInstance *instance = p_occluders; + + while (instance) { + OccluderPolygon *co = occluder_polygon_owner.get_or_null(instance->occluder); + + if (!co || co->vertex_array == 0 || !(p_light_mask & instance->light_mask)) { + instance = instance->next; + continue; + } + + Transform2D modelview = to_light_xform * instance->xform_cache; + shadow_render.shader.version_set_uniform(CanvasOcclusionShaderGLES3::MODELVIEW1, modelview.columns[0][0], modelview.columns[1][0], 0, modelview.columns[2][0], shadow_render.shader_version, variant); + shadow_render.shader.version_set_uniform(CanvasOcclusionShaderGLES3::MODELVIEW2, modelview.columns[0][1], modelview.columns[1][1], 0, modelview.columns[2][1], shadow_render.shader_version, variant); + + if (co->cull_mode != cull_mode) { + if (co->cull_mode == RS::CANVAS_OCCLUDER_POLYGON_CULL_DISABLED) { + glDisable(GL_CULL_FACE); + } else { + if (cull_mode == RS::CANVAS_OCCLUDER_POLYGON_CULL_DISABLED) { + // Last time was disabled, so enable and set proper face. + glEnable(GL_CULL_FACE); + } + glCullFace(co->cull_mode == RS::CANVAS_OCCLUDER_POLYGON_CULL_CLOCKWISE ? GL_FRONT : GL_BACK); + } + cull_mode = co->cull_mode; + } + + glBindVertexArray(co->vertex_array); + glDrawElements(GL_TRIANGLES, 3 * co->line_point_count, GL_UNSIGNED_SHORT, 0); + + instance = instance->next; + } + + Transform2D to_shadow; + to_shadow.columns[0].x = 1.0 / -(half_size * 2.0); + to_shadow.columns[2].x = 0.5; + + cl->shadow.directional_xform = to_shadow * to_light_xform; + + glBindVertexArray(0); + glBindFramebuffer(GL_FRAMEBUFFER, 0); + glDepthMask(GL_FALSE); + glDisable(GL_DEPTH_TEST); + glDisable(GL_SCISSOR_TEST); + glDisable(GL_CULL_FACE); +} + +void RasterizerCanvasGLES3::_update_shadow_atlas() { + GLES3::Config *config = GLES3::Config::get_singleton(); + + if (state.shadow_fb == 0) { + glActiveTexture(GL_TEXTURE0); + + glGenFramebuffers(1, &state.shadow_fb); + glBindFramebuffer(GL_FRAMEBUFFER, state.shadow_fb); + + glGenRenderbuffers(1, &state.shadow_depth_buffer); + glBindRenderbuffer(GL_RENDERBUFFER, state.shadow_depth_buffer); + glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24, state.shadow_texture_size, data.max_lights_per_render * 2); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, state.shadow_depth_buffer); + + glGenTextures(1, &state.shadow_texture); + glBindTexture(GL_TEXTURE_2D, state.shadow_texture); + if (config->float_texture_supported) { + glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, state.shadow_texture_size, data.max_lights_per_render * 2, 0, GL_RED, GL_FLOAT, nullptr); + } else { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, state.shadow_texture_size, data.max_lights_per_render * 2, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); + } + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, state.shadow_texture, 0); + + GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); + if (status != GL_FRAMEBUFFER_COMPLETE) { + glDeleteFramebuffers(1, &state.shadow_fb); + glDeleteTextures(1, &state.shadow_texture); + glDeleteRenderbuffers(1, &state.shadow_depth_buffer); + state.shadow_fb = 0; + state.shadow_texture = 0; + state.shadow_depth_buffer = 0; + WARN_PRINT("Could not create CanvasItem shadow atlas, status: " + GLES3::TextureStorage::get_singleton()->get_framebuffer_error(status)); + } + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); + } } void RasterizerCanvasGLES3::render_sdf(RID p_render_target, LightOccluderInstance *p_occluders) { + GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton(); + + GLuint fb = texture_storage->render_target_get_sdf_framebuffer(p_render_target); + Rect2i rect = texture_storage->render_target_get_sdf_rect(p_render_target); + + Transform2D to_sdf; + to_sdf.columns[0] *= rect.size.width; + to_sdf.columns[1] *= rect.size.height; + to_sdf.columns[2] = rect.position; + + Transform2D to_clip; + to_clip.columns[0] *= 2.0; + to_clip.columns[1] *= 2.0; + to_clip.columns[2] = -Vector2(1.0, 1.0); + + to_clip = to_clip * to_sdf.affine_inverse(); + + glBindFramebuffer(GL_FRAMEBUFFER, fb); + glViewport(0, 0, rect.size.width, rect.size.height); + + glDepthMask(GL_FALSE); + glDisable(GL_DEPTH_TEST); + glDisable(GL_BLEND); + glDisable(GL_CULL_FACE); + glDisable(GL_SCISSOR_TEST); + + glClearColor(0.0, 0.0, 0.0, 0.0); + glClear(GL_COLOR_BUFFER_BIT); + + CanvasOcclusionShaderGLES3::ShaderVariant variant = CanvasOcclusionShaderGLES3::MODE_SDF; + bool success = shadow_render.shader.version_bind_shader(shadow_render.shader_version, variant); + if (!success) { + return; + } + + shadow_render.shader.version_set_uniform(CanvasOcclusionShaderGLES3::PROJECTION, Projection(), shadow_render.shader_version, variant); + shadow_render.shader.version_set_uniform(CanvasOcclusionShaderGLES3::DIRECTION, 0.0, 0.0, shadow_render.shader_version, variant); + shadow_render.shader.version_set_uniform(CanvasOcclusionShaderGLES3::Z_FAR, 0.0, shadow_render.shader_version, variant); + + LightOccluderInstance *instance = p_occluders; + + while (instance) { + OccluderPolygon *oc = occluder_polygon_owner.get_or_null(instance->occluder); + + if (!oc || oc->sdf_vertex_array == 0) { + instance = instance->next; + continue; + } + + Transform2D modelview = to_clip * instance->xform_cache; + shadow_render.shader.version_set_uniform(CanvasOcclusionShaderGLES3::MODELVIEW1, modelview.columns[0][0], modelview.columns[1][0], 0, modelview.columns[2][0], shadow_render.shader_version, variant); + shadow_render.shader.version_set_uniform(CanvasOcclusionShaderGLES3::MODELVIEW2, modelview.columns[0][1], modelview.columns[1][1], 0, modelview.columns[2][1], shadow_render.shader_version, variant); + + glBindVertexArray(oc->sdf_vertex_array); + glDrawElements(oc->sdf_is_lines ? GL_LINES : GL_TRIANGLES, oc->sdf_index_count, GL_UNSIGNED_INT, 0); + + instance = instance->next; + } + + texture_storage->render_target_sdf_process(p_render_target); //done rendering, process it + glBindVertexArray(0); + glBindFramebuffer(GL_FRAMEBUFFER, 0); } RID RasterizerCanvasGLES3::occluder_polygon_create() { - return RID(); + OccluderPolygon occluder; + + return occluder_polygon_owner.make_rid(occluder); } void RasterizerCanvasGLES3::occluder_polygon_set_shape(RID p_occluder, const Vector<Vector2> &p_points, bool p_closed) { + OccluderPolygon *oc = occluder_polygon_owner.get_or_null(p_occluder); + ERR_FAIL_COND(!oc); + + Vector<Vector2> lines; + + if (p_points.size()) { + int lc = p_points.size() * 2; + + lines.resize(lc - (p_closed ? 0 : 2)); + { + Vector2 *w = lines.ptrw(); + const Vector2 *r = p_points.ptr(); + + int max = lc / 2; + if (!p_closed) { + max--; + } + for (int i = 0; i < max; i++) { + Vector2 a = r[i]; + Vector2 b = r[(i + 1) % (lc / 2)]; + w[i * 2 + 0] = a; + w[i * 2 + 1] = b; + } + } + } + + if (oc->line_point_count != lines.size() && oc->vertex_array != 0) { + glDeleteVertexArrays(1, &oc->vertex_array); + glDeleteBuffers(1, &oc->vertex_buffer); + glDeleteBuffers(1, &oc->index_buffer); + + oc->vertex_array = 0; + oc->vertex_buffer = 0; + oc->index_buffer = 0; + } + + if (lines.size()) { + Vector<uint8_t> geometry; + Vector<uint8_t> indices; + int lc = lines.size(); + + geometry.resize(lc * 6 * sizeof(float)); + indices.resize(lc * 3 * sizeof(uint16_t)); + + { + uint8_t *vw = geometry.ptrw(); + float *vwptr = reinterpret_cast<float *>(vw); + uint8_t *iw = indices.ptrw(); + uint16_t *iwptr = (uint16_t *)iw; + + const Vector2 *lr = lines.ptr(); + + const int POLY_HEIGHT = 16384; + + for (int i = 0; i < lc / 2; i++) { + vwptr[i * 12 + 0] = lr[i * 2 + 0].x; + vwptr[i * 12 + 1] = lr[i * 2 + 0].y; + vwptr[i * 12 + 2] = POLY_HEIGHT; + + vwptr[i * 12 + 3] = lr[i * 2 + 1].x; + vwptr[i * 12 + 4] = lr[i * 2 + 1].y; + vwptr[i * 12 + 5] = POLY_HEIGHT; + + vwptr[i * 12 + 6] = lr[i * 2 + 1].x; + vwptr[i * 12 + 7] = lr[i * 2 + 1].y; + vwptr[i * 12 + 8] = -POLY_HEIGHT; + + vwptr[i * 12 + 9] = lr[i * 2 + 0].x; + vwptr[i * 12 + 10] = lr[i * 2 + 0].y; + vwptr[i * 12 + 11] = -POLY_HEIGHT; + + iwptr[i * 6 + 0] = i * 4 + 0; + iwptr[i * 6 + 1] = i * 4 + 1; + iwptr[i * 6 + 2] = i * 4 + 2; + + iwptr[i * 6 + 3] = i * 4 + 2; + iwptr[i * 6 + 4] = i * 4 + 3; + iwptr[i * 6 + 5] = i * 4 + 0; + } + } + + if (oc->vertex_array == 0) { + oc->line_point_count = lc; + glGenVertexArrays(1, &oc->vertex_array); + glBindVertexArray(oc->vertex_array); + glGenBuffers(1, &oc->vertex_buffer); + glBindBuffer(GL_ARRAY_BUFFER, oc->vertex_buffer); + + glBufferData(GL_ARRAY_BUFFER, lc * 6 * sizeof(float), geometry.ptr(), GL_STATIC_DRAW); + glEnableVertexAttribArray(RS::ARRAY_VERTEX); + glVertexAttribPointer(RS::ARRAY_VERTEX, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float), nullptr); + + glGenBuffers(1, &oc->index_buffer); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, oc->index_buffer); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, 3 * lc * sizeof(uint16_t), indices.ptr(), GL_STATIC_DRAW); + glBindVertexArray(0); + } else { + glBindVertexArray(oc->vertex_array); + glBindBuffer(GL_ARRAY_BUFFER, oc->vertex_buffer); + glBufferData(GL_ARRAY_BUFFER, lc * 6 * sizeof(float), geometry.ptr(), GL_STATIC_DRAW); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, oc->index_buffer); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, 3 * lc * sizeof(uint16_t), indices.ptr(), GL_STATIC_DRAW); + } + } + + // sdf + + Vector<int> sdf_indices; + + if (p_points.size()) { + if (p_closed) { + sdf_indices = Geometry2D::triangulate_polygon(p_points); + oc->sdf_is_lines = false; + } else { + int max = p_points.size(); + sdf_indices.resize(max * 2); + + int *iw = sdf_indices.ptrw(); + for (int i = 0; i < max; i++) { + iw[i * 2 + 0] = i; + iw[i * 2 + 1] = (i + 1) % max; + } + oc->sdf_is_lines = true; + } + } + + if (oc->sdf_index_count != sdf_indices.size() && oc->sdf_point_count != p_points.size() && oc->sdf_vertex_array != 0) { + glDeleteVertexArrays(1, &oc->sdf_vertex_array); + glDeleteBuffers(1, &oc->sdf_vertex_buffer); + glDeleteBuffers(1, &oc->sdf_index_buffer); + + oc->sdf_vertex_array = 0; + oc->sdf_vertex_buffer = 0; + oc->sdf_index_buffer = 0; + + oc->sdf_index_count = sdf_indices.size(); + oc->sdf_point_count = p_points.size(); + } + + if (sdf_indices.size()) { + if (oc->sdf_vertex_array == 0) { + oc->sdf_index_count = sdf_indices.size(); + oc->sdf_point_count = p_points.size(); + glGenVertexArrays(1, &oc->sdf_vertex_array); + glBindVertexArray(oc->sdf_vertex_array); + glGenBuffers(1, &oc->sdf_vertex_buffer); + glBindBuffer(GL_ARRAY_BUFFER, oc->sdf_vertex_buffer); + + glBufferData(GL_ARRAY_BUFFER, p_points.size() * 2 * sizeof(float), p_points.to_byte_array().ptr(), GL_STATIC_DRAW); + glEnableVertexAttribArray(RS::ARRAY_VERTEX); + glVertexAttribPointer(RS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(float), nullptr); + + glGenBuffers(1, &oc->sdf_index_buffer); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, oc->sdf_index_buffer); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, sdf_indices.size() * sizeof(uint32_t), sdf_indices.to_byte_array().ptr(), GL_STATIC_DRAW); + glBindVertexArray(0); + } else { + glBindBuffer(GL_ARRAY_BUFFER, oc->sdf_vertex_buffer); + glBufferData(GL_ARRAY_BUFFER, p_points.size() * 2 * sizeof(float), p_points.to_byte_array().ptr(), GL_STATIC_DRAW); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, oc->sdf_index_buffer); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, sdf_indices.size() * sizeof(uint32_t), sdf_indices.to_byte_array().ptr(), GL_STATIC_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + } + } } void RasterizerCanvasGLES3::occluder_polygon_set_cull_mode(RID p_occluder, RS::CanvasOccluderPolygonCullMode p_mode) { + OccluderPolygon *oc = occluder_polygon_owner.get_or_null(p_occluder); + ERR_FAIL_COND(!oc); + oc->cull_mode = p_mode; } void RasterizerCanvasGLES3::set_shadow_texture_size(int p_size) { + GLES3::Config *config = GLES3::Config::get_singleton(); + p_size = nearest_power_of_2_templated(p_size); + if (p_size == state.shadow_texture_size) { + return; + } + + if (p_size > config->max_texture_size) { + p_size = config->max_texture_size; + WARN_PRINT("Attempting to set CanvasItem shadow atlas size to " + itos(p_size) + " which is beyond limit of " + itos(config->max_texture_size) + "supported by hardware."); + } + + state.shadow_texture_size = p_size; } bool RasterizerCanvasGLES3::free(RID p_rid) { + if (canvas_light_owner.owns(p_rid)) { + CanvasLight *cl = canvas_light_owner.get_or_null(p_rid); + ERR_FAIL_COND_V(!cl, false); + canvas_light_owner.free(p_rid); + } else if (occluder_polygon_owner.owns(p_rid)) { + occluder_polygon_set_shape(p_rid, Vector<Vector2>(), false); + occluder_polygon_owner.free(p_rid); + } else { + return false; + } + return true; } @@ -1357,7 +2202,7 @@ void RasterizerCanvasGLES3::_prepare_canvas_texture(RID p_texture, RS::CanvasIte state.instance_data_array[r_index].flags &= ~FLAGS_DEFAULT_SPECULAR_MAP_USED; } - if (!normal_map) { + if (normal_map) { state.instance_data_array[r_index].flags |= FLAGS_DEFAULT_NORMAL_MAP_USED; } else { state.instance_data_array[r_index].flags &= ~FLAGS_DEFAULT_NORMAL_MAP_USED; @@ -1382,16 +2227,16 @@ void RasterizerCanvasGLES3::reset_canvas() { glEnable(GL_BLEND); glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ZERO, GL_ONE); + glActiveTexture(GL_TEXTURE0 + GLES3::Config::get_singleton()->max_texture_image_units - 2); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE0 + GLES3::Config::get_singleton()->max_texture_image_units - 3); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE0); + glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); } -void RasterizerCanvasGLES3::canvas_debug_viewport_shadows(Light *p_lights_with_shadow) { -} - -void RasterizerCanvasGLES3::canvas_light_shadow_buffer_update(RID p_buffer, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders, Projection *p_xform_cache) { -} - void RasterizerCanvasGLES3::draw_lens_distortion_rect(const Rect2 &p_rect, float p_k1, float p_k2, const Vector2 &p_eye_center, float p_oversample) { } @@ -1422,7 +2267,6 @@ RendererCanvasRender::PolygonID RasterizerCanvasGLES3::request_polygon(const Vec polygon_buffer.resize(buffer_size * sizeof(float)); { glBindBuffer(GL_ARRAY_BUFFER, pb.vertex_buffer); - glBufferData(GL_ARRAY_BUFFER, stride * vertex_count * sizeof(float), nullptr, GL_STATIC_DRAW); // TODO may not be necessary uint8_t *r = polygon_buffer.ptrw(); float *fptr = reinterpret_cast<float *>(r); uint32_t *uptr = (uint32_t *)r; @@ -1531,7 +2375,6 @@ RendererCanvasRender::PolygonID RasterizerCanvasGLES3::request_polygon(const Vec } glGenBuffers(1, &pb.index_buffer); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, pb.index_buffer); - glBufferData(GL_ELEMENT_ARRAY_BUFFER, p_indices.size() * 4, nullptr, GL_STATIC_DRAW); // TODO may not be necessary glBufferData(GL_ELEMENT_ARRAY_BUFFER, p_indices.size() * 4, index_buffer.ptr(), GL_STATIC_DRAW); pb.count = p_indices.size(); } @@ -1567,13 +2410,23 @@ void RasterizerCanvasGLES3::free_polygon(PolygonID p_polygon) { // In theory allocations can reach as high as number of windows * 3 frames // because OpenGL can start rendering subsequent frames before finishing the current one void RasterizerCanvasGLES3::_allocate_instance_data_buffer() { - GLuint new_buffer; - glGenBuffers(1, &new_buffer); - glBindBuffer(GL_UNIFORM_BUFFER, new_buffer); - glBufferData(GL_UNIFORM_BUFFER, data.max_instance_buffer_size, nullptr, GL_DYNAMIC_DRAW); + GLuint new_buffers[3]; + glGenBuffers(3, new_buffers); + // Batch UBO. + glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[0]); + glBufferData(GL_UNIFORM_BUFFER, data.max_instance_buffer_size, nullptr, GL_STREAM_DRAW); + // Light uniform buffer. + glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[1]); + glBufferData(GL_UNIFORM_BUFFER, sizeof(LightUniform) * data.max_lights_per_render, nullptr, GL_STREAM_DRAW); + // State buffer. + glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[2]); + glBufferData(GL_UNIFORM_BUFFER, sizeof(StateBuffer), nullptr, GL_STREAM_DRAW); + state.current_buffer = (state.current_buffer + 1); DataBuffer db; - db.ubo = new_buffer; + db.ubo = new_buffers[0]; + db.light_ubo = new_buffers[1]; + db.state_ubo = new_buffers[2]; db.last_frame_used = RSG::rasterizer->get_frame_number(); state.canvas_instance_data_buffers.insert(state.current_buffer, db); state.current_buffer = state.current_buffer % state.canvas_instance_data_buffers.size(); @@ -1755,12 +2608,21 @@ RasterizerCanvasGLES3::RasterizerCanvasGLES3() { state.canvas_instance_batches.reserve(200); for (int i = 0; i < 3; i++) { - GLuint new_buffer; - glGenBuffers(1, &new_buffer); - glBindBuffer(GL_UNIFORM_BUFFER, new_buffer); - glBufferData(GL_UNIFORM_BUFFER, data.max_instance_buffer_size, nullptr, GL_DYNAMIC_DRAW); + GLuint new_buffers[3]; + glGenBuffers(3, new_buffers); + // Batch UBO. + glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[0]); + glBufferData(GL_UNIFORM_BUFFER, data.max_instance_buffer_size, nullptr, GL_STREAM_DRAW); + // Light uniform buffer. + glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[1]); + glBufferData(GL_UNIFORM_BUFFER, sizeof(LightUniform) * data.max_lights_per_render, nullptr, GL_STREAM_DRAW); + // State buffer. + glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[2]); + glBufferData(GL_UNIFORM_BUFFER, sizeof(StateBuffer), nullptr, GL_STREAM_DRAW); DataBuffer db; - db.ubo = new_buffer; + db.ubo = new_buffers[0]; + db.light_ubo = new_buffers[1]; + db.state_ubo = new_buffers[2]; db.last_frame_used = 0; db.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); state.canvas_instance_data_buffers[i] = db; @@ -1768,6 +2630,7 @@ RasterizerCanvasGLES3::RasterizerCanvasGLES3() { glBindBuffer(GL_UNIFORM_BUFFER, 0); state.instance_data_array = memnew_arr(InstanceData, data.max_instances_per_ubo); + state.light_uniforms = memnew_arr(LightUniform, data.max_lights_per_render); { const uint32_t no_of_instances = data.max_instances_per_batch; @@ -1794,19 +2657,16 @@ RasterizerCanvasGLES3::RasterizerCanvasGLES3() { delete[] indices; } - glGenBuffers(1, &state.canvas_state_buffer); - glBindBuffer(GL_UNIFORM_BUFFER, state.canvas_state_buffer); - glBufferData(GL_UNIFORM_BUFFER, sizeof(StateBuffer), nullptr, GL_STREAM_DRAW); - glBindBuffer(GL_UNIFORM_BUFFER, 0); - String global_defines; global_defines += "#define MAX_GLOBAL_SHADER_UNIFORMS 256\n"; // TODO: this is arbitrary for now - global_defines += "#define MAX_LIGHTS " + itos(data.max_instances_per_batch) + "\n"; + global_defines += "#define MAX_LIGHTS " + itos(data.max_lights_per_render) + "\n"; global_defines += "#define MAX_DRAW_DATA_INSTANCES " + itos(data.max_instances_per_batch) + "\n"; GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.initialize(global_defines); data.canvas_shader_default_version = GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_create(); - GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, CanvasShaderGLES3::MODE_QUAD); + + shadow_render.shader.initialize(); + shadow_render.shader_version = shadow_render.shader.version_create(); { default_canvas_group_shader = material_storage->shader_allocate(); @@ -1833,6 +2693,26 @@ void fragment() { material_storage->material_set_shader(default_canvas_group_material, default_canvas_group_shader); } + { + default_clip_children_shader = material_storage->shader_allocate(); + material_storage->shader_initialize(default_clip_children_shader); + + material_storage->shader_set_code(default_clip_children_shader, R"( +// Default clip children shader. + +shader_type canvas_item; + +void fragment() { + vec4 c = textureLod(SCREEN_TEXTURE, SCREEN_UV, 0.0); + COLOR.rgb = c.rgb; +} +)"); + default_clip_children_material = material_storage->material_allocate(); + material_storage->material_initialize(default_clip_children_material); + + material_storage->material_set_shader(default_clip_children_material, default_clip_children_shader); + } + default_canvas_texture = texture_storage->canvas_texture_allocate(); texture_storage->canvas_texture_initialize(default_canvas_texture); @@ -1840,11 +2720,15 @@ void fragment() { } RasterizerCanvasGLES3::~RasterizerCanvasGLES3() { - GLES3::MaterialStorage *material_storage = GLES3::MaterialStorage::get_singleton(); + singleton = nullptr; + GLES3::MaterialStorage *material_storage = GLES3::MaterialStorage::get_singleton(); material_storage->shaders.canvas_shader.version_free(data.canvas_shader_default_version); + shadow_render.shader.version_free(shadow_render.shader_version); material_storage->material_free(default_canvas_group_material); material_storage->shader_free(default_canvas_group_shader); + material_storage->material_free(default_clip_children_material); + material_storage->shader_free(default_clip_children_shader); singleton = nullptr; glDeleteBuffers(1, &data.canvas_quad_vertices); @@ -1854,7 +2738,17 @@ RasterizerCanvasGLES3::~RasterizerCanvasGLES3() { glDeleteVertexArrays(1, &data.canvas_quad_array); GLES3::TextureStorage::get_singleton()->canvas_texture_free(default_canvas_texture); - memfree(state.instance_data_array); + memdelete_arr(state.instance_data_array); + memdelete_arr(state.light_uniforms); + + if (state.shadow_fb != 0) { + glDeleteFramebuffers(1, &state.shadow_fb); + glDeleteTextures(1, &state.shadow_texture); + glDeleteRenderbuffers(1, &state.shadow_depth_buffer); + state.shadow_fb = 0; + state.shadow_texture = 0; + state.shadow_depth_buffer = 0; + } } #endif // GLES3_ENABLED diff --git a/drivers/gles3/rasterizer_canvas_gles3.h b/drivers/gles3/rasterizer_canvas_gles3.h index 15c2ca5710..0a03d43d07 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.h +++ b/drivers/gles3/rasterizer_canvas_gles3.h @@ -40,6 +40,7 @@ #include "storage/texture_storage.h" #include "shaders/canvas.glsl.gen.h" +#include "shaders/canvas_occlusion.glsl.gen.h" class RasterizerSceneGLES3; @@ -96,6 +97,63 @@ class RasterizerCanvasGLES3 : public RendererCanvasRender { DEFAULT_MAX_LIGHTS_PER_RENDER = 256, }; + /******************/ + /**** LIGHTING ****/ + /******************/ + + struct CanvasLight { + RID texture; + struct { + bool enabled = false; + float z_far; + float y_offset; + Transform2D directional_xform; + } shadow; + }; + + RID_Owner<CanvasLight> canvas_light_owner; + + struct OccluderPolygon { + RS::CanvasOccluderPolygonCullMode cull_mode = RS::CANVAS_OCCLUDER_POLYGON_CULL_DISABLED; + int line_point_count = 0; + GLuint vertex_buffer = 0; + GLuint vertex_array = 0; + GLuint index_buffer = 0; + + int sdf_point_count = 0; + int sdf_index_count = 0; + GLuint sdf_vertex_buffer = 0; + GLuint sdf_vertex_array = 0; + GLuint sdf_index_buffer = 0; + bool sdf_is_lines = false; + }; + + RID_Owner<OccluderPolygon> occluder_polygon_owner; + + void _update_shadow_atlas(); + + struct { + CanvasOcclusionShaderGLES3 shader; + RID shader_version; + } shadow_render; + + struct LightUniform { + float matrix[8]; //light to texture coordinate matrix + float shadow_matrix[8]; //light to shadow coordinate matrix + float color[4]; + + uint8_t shadow_color[4]; + uint32_t flags; //index to light texture + float shadow_pixel_size; + float height; + + float position[2]; + float shadow_z_far_inv; + float shadow_y_ofs; + + float atlas_rect[4]; + }; + public: enum { BASE_UNIFORM_LOCATION = 0, @@ -126,9 +184,9 @@ public: }; struct PolygonBuffers { - GLuint vertex_buffer; - GLuint vertex_array; - GLuint index_buffer; + GLuint vertex_buffer = 0; + GLuint vertex_array = 0; + GLuint index_buffer = 0; int count = 0; bool color_disabled = false; Color color; @@ -184,8 +242,8 @@ public: RID canvas_shader_default_version; - uint32_t max_lights_per_render; - uint32_t max_lights_per_item; + uint32_t max_lights_per_render = 256; + uint32_t max_lights_per_item = 16; uint32_t max_instances_per_batch = 512; uint32_t max_instances_per_ubo = 16384; uint32_t max_instance_buffer_size = 16384 * 128; @@ -196,7 +254,7 @@ public: uint32_t start = 0; uint32_t instance_count = 0; - RID tex = RID(); + RID tex; RS::CanvasItemTextureFilter filter = RS::CANVAS_ITEM_TEXTURE_FILTER_MAX; RS::CanvasItemTextureRepeat repeat = RS::CANVAS_ITEM_TEXTURE_REPEAT_MAX; @@ -205,23 +263,29 @@ public: Item *clip = nullptr; - RID material = RID(); + RID material; GLES3::CanvasMaterialData *material_data = nullptr; CanvasShaderGLES3::ShaderVariant shader_variant = CanvasShaderGLES3::MODE_QUAD; const Item::Command *command = nullptr; Item::Command::Type command_type = Item::Command::TYPE_ANIMATION_SLICE; // Can default to any type that doesn't form a batch. uint32_t primitive_points = 0; + + bool lights_disabled = false; }; + // DataBuffer contains our per-frame data. I.e. the resources that are updated each frame. + // We track them and ensure that they don't get reused until at least 2 frames have passed + // to avoid the GPU stalling to wait for a resource to become available. struct DataBuffer { GLuint ubo = 0; + GLuint light_ubo = 0; + GLuint state_ubo = 0; uint64_t last_frame_used = -3; GLsync fence = GLsync(); }; struct State { - GLuint canvas_state_buffer; LocalVector<DataBuffer> canvas_instance_data_buffers; LocalVector<Batch> canvas_instance_batches; uint32_t current_buffer = 0; @@ -230,7 +294,16 @@ public: InstanceData *instance_data_array = nullptr; - RID current_tex = RID(); + LightUniform *light_uniforms = nullptr; + + GLuint shadow_texture = 0; + GLuint shadow_depth_buffer = 0; + GLuint shadow_fb = 0; + int shadow_texture_size = 2048; + + bool using_directional_lights = false; + + RID current_tex; RS::CanvasItemTextureFilter current_filter_mode = RS::CANVAS_ITEM_TEXTURE_FILTER_MAX; RS::CanvasItemTextureRepeat current_repeat_mode = RS::CANVAS_ITEM_TEXTURE_REPEAT_MAX; @@ -247,6 +320,8 @@ public: RID default_canvas_texture; RID default_canvas_group_material; RID default_canvas_group_shader; + RID default_clip_children_material; + RID default_clip_children_shader; typedef void Texture; @@ -256,9 +331,6 @@ public: void draw_lens_distortion_rect(const Rect2 &p_rect, float p_k1, float p_k2, const Vector2 &p_eye_center, float p_oversample); void reset_canvas(); - void canvas_light_shadow_buffer_update(RID p_buffer, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders, Projection *p_xform_cache); - - virtual void canvas_debug_viewport_shadows(Light *p_lights_with_shadow) override; RID light_create() override; void light_set_texture(RID p_rid, RID p_texture) override; @@ -280,9 +352,9 @@ public: void canvas_render_items(RID p_to_render_target, Item *p_item_list, const Color &p_modulate, Light *p_light_list, Light *p_directional_list, const Transform2D &p_canvas_transform, RS::CanvasItemTextureFilter p_default_filter, RS::CanvasItemTextureRepeat p_default_repeat, bool p_snap_2d_vertices_to_pixel, bool &r_sdf_used) override; void _render_items(RID p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, uint32_t &r_last_index, bool p_to_backbuffer = false); - void _record_item_commands(const Item *p_item, const Transform2D &p_canvas_transform_inverse, Item *¤t_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_break_batch); + void _record_item_commands(const Item *p_item, RID p_render_target, const Transform2D &p_canvas_transform_inverse, Item *¤t_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_break_batch); void _render_batch(Light *p_lights, uint32_t p_index); - void _bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant); + bool _bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant, uint64_t p_specialization); void _new_batch(bool &r_batch_broken, uint32_t &r_index); void _add_to_batch(uint32_t &r_index, bool &r_batch_broken); void _allocate_instance_data_buffer(); diff --git a/drivers/gles3/rasterizer_gles3.cpp b/drivers/gles3/rasterizer_gles3.cpp index 0836a21254..1b42b55425 100644 --- a/drivers/gles3/rasterizer_gles3.cpp +++ b/drivers/gles3/rasterizer_gles3.cpp @@ -101,8 +101,7 @@ void RasterizerGLES3::begin_frame(double frame_step) { scene->set_time(time_total, frame_step); GLES3::Utilities *utils = GLES3::Utilities::get_singleton(); - utils->info.render_final = utils->info.render; - utils->info.render.reset(); + utils->_capture_timestamps_begin(); //scene->iteration(); } @@ -200,7 +199,7 @@ void RasterizerGLES3::finalize() { RasterizerGLES3::RasterizerGLES3() { #ifdef GLAD_ENABLED - if (!gladLoadGL()) { + if (!gladLoaderLoadGL()) { ERR_PRINT("Error initializing GLAD"); // FIXME this is an early return from a constructor. Any other code using this instance will crash or the finalizer will crash, because none of // the members of this instance are initialized, so this just makes debugging harder. It should either crash here intentionally, @@ -272,17 +271,41 @@ RasterizerGLES3::~RasterizerGLES3() { } void RasterizerGLES3::prepare_for_blitting_render_targets() { + // This is a hack, but this function is called one time after all viewports have been updated. + // So it marks the end of the frame for all viewports + // In the OpenGL renderer we have to call end_frame for each viewport so we can swap the + // buffers for each window before proceeding to the next. + // This allows us to only increment the frame after all viewports are done. + GLES3::Utilities *utils = GLES3::Utilities::get_singleton(); + utils->capture_timestamps_end(); } -void RasterizerGLES3::_blit_render_target_to_screen(RID p_render_target, DisplayServer::WindowID p_screen, const Rect2 &p_screen_rect) { +void RasterizerGLES3::_blit_render_target_to_screen(RID p_render_target, DisplayServer::WindowID p_screen, const Rect2 &p_screen_rect, uint32_t p_layer) { GLES3::RenderTarget *rt = GLES3::TextureStorage::get_singleton()->get_render_target(p_render_target); + ERR_FAIL_COND(!rt); - glBindFramebuffer(GL_READ_FRAMEBUFFER, rt->fbo); + GLuint read_fbo = 0; + if (rt->view_count > 1) { + glGenFramebuffers(1, &read_fbo); + glBindFramebuffer(GL_READ_FRAMEBUFFER, read_fbo); + glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, rt->color, 0, p_layer); + } else { + glBindFramebuffer(GL_READ_FRAMEBUFFER, rt->fbo); + } + glReadBuffer(GL_COLOR_ATTACHMENT0); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); // Flip content upside down to correct for coordinates. - glBlitFramebuffer(0, 0, rt->size.x, rt->size.y, 0, p_screen_rect.size.y, p_screen_rect.size.x, 0, GL_COLOR_BUFFER_BIT, GL_NEAREST); + Vector2i screen_rect_end = p_screen_rect.get_end(); + glBlitFramebuffer(0, 0, rt->size.x, rt->size.y, + p_screen_rect.position.x, screen_rect_end.y, screen_rect_end.x, p_screen_rect.position.y, + GL_COLOR_BUFFER_BIT, GL_NEAREST); + + if (read_fbo != 0) { + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + glDeleteFramebuffers(1, &read_fbo); + } } // is this p_screen useless in a multi window environment? @@ -293,7 +316,7 @@ void RasterizerGLES3::blit_render_targets_to_screen(DisplayServer::WindowID p_sc RID rid_rt = blit.render_target; Rect2 dst_rect = blit.dst_rect; - _blit_render_target_to_screen(rid_rt, p_screen, dst_rect); + _blit_render_target_to_screen(rid_rt, p_screen, dst_rect, blit.multi_view.use_layer ? blit.multi_view.layer : 0); } } diff --git a/drivers/gles3/rasterizer_gles3.h b/drivers/gles3/rasterizer_gles3.h index 431515fd0d..d7d26685b4 100644 --- a/drivers/gles3/rasterizer_gles3.h +++ b/drivers/gles3/rasterizer_gles3.h @@ -68,7 +68,7 @@ protected: RasterizerCanvasGLES3 *canvas = nullptr; RasterizerSceneGLES3 *scene = nullptr; - void _blit_render_target_to_screen(RID p_render_target, DisplayServer::WindowID p_screen, const Rect2 &p_screen_rect); + void _blit_render_target_to_screen(RID p_render_target, DisplayServer::WindowID p_screen, const Rect2 &p_screen_rect, uint32_t p_layer); public: RendererUtilities *get_utilities() { return utilities; } diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp index 4c71edc24c..8250140c3f 100644 --- a/drivers/gles3/rasterizer_scene_gles3.cpp +++ b/drivers/gles3/rasterizer_scene_gles3.cpp @@ -35,6 +35,7 @@ #include "servers/rendering/rendering_server_globals.h" #include "storage/config.h" #include "storage/mesh_storage.h" +#include "storage/particles_storage.h" #include "storage/texture_storage.h" #ifdef GLES3_ENABLED @@ -50,6 +51,9 @@ RenderGeometryInstance *RasterizerSceneGLES3::geometry_instance_create(RID p_bas ginstance->data->base = p_base; ginstance->data->base_type = type; + ginstance->data->dependency_tracker.userdata = ginstance; + ginstance->data->dependency_tracker.changed_callback = _geometry_instance_dependency_changed; + ginstance->data->dependency_tracker.deleted_callback = _geometry_instance_dependency_deleted; ginstance->_mark_dirty(); @@ -314,6 +318,8 @@ void RasterizerSceneGLES3::_geometry_instance_add_surface(GeometryInstanceGLES3 void RasterizerSceneGLES3::_geometry_instance_update(RenderGeometryInstance *p_geometry_instance) { GLES3::MeshStorage *mesh_storage = GLES3::MeshStorage::get_singleton(); + GLES3::ParticlesStorage *particles_storage = GLES3::ParticlesStorage::get_singleton(); + GeometryInstanceGLES3 *ginstance = static_cast<GeometryInstanceGLES3 *>(p_geometry_instance); if (ginstance->data->dirty_dependencies) { @@ -361,6 +367,26 @@ void RasterizerSceneGLES3::_geometry_instance_update(RenderGeometryInstance *p_g } break; case RS::INSTANCE_PARTICLES: { + int draw_passes = particles_storage->particles_get_draw_passes(ginstance->data->base); + + for (int j = 0; j < draw_passes; j++) { + RID mesh = particles_storage->particles_get_draw_pass_mesh(ginstance->data->base, j); + if (!mesh.is_valid()) { + continue; + } + + const RID *materials = nullptr; + uint32_t surface_count; + + materials = mesh_storage->mesh_get_surface_count_and_materials(mesh, surface_count); + if (materials) { + for (uint32_t k = 0; k < surface_count; k++) { + _geometry_instance_add_surface(ginstance, k, materials[k], mesh); + } + } + } + + ginstance->instance_count = particles_storage->particles_get_amount(ginstance->data->base); } break; default: { @@ -382,10 +408,23 @@ void RasterizerSceneGLES3::_geometry_instance_update(RenderGeometryInstance *p_g ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_CUSTOM_DATA; } - //ginstance->transforms_uniform_set = mesh_storage->multimesh_get_3d_uniform_set(ginstance->data->base, scene_globals.default_shader_rd, TRANSFORMS_UNIFORM_SET); - } else if (ginstance->data->base_type == RS::INSTANCE_PARTICLES) { + ginstance->base_flags |= INSTANCE_DATA_FLAG_PARTICLES; + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH; + + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_COLOR; + ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_CUSTOM_DATA; + + if (!particles_storage->particles_is_using_local_coords(ginstance->data->base)) { + store_transform = false; + } + } else if (ginstance->data->base_type == RS::INSTANCE_MESH) { + if (mesh_storage->skeleton_is_valid(ginstance->data->skeleton)) { + if (ginstance->data->dirty_dependencies) { + mesh_storage->skeleton_update_dependency(ginstance->data->skeleton, &ginstance->data->dependency_tracker); + } + } } ginstance->store_transform_cache = store_transform; @@ -595,6 +634,7 @@ void RasterizerSceneGLES3::_setup_sky(const RenderDataGLES3 *p_render_data, cons sky->reflection_dirty = true; } + glBindBufferBase(GL_UNIFORM_BUFFER, SKY_DIRECTIONAL_LIGHT_UNIFORM_LOCATION, sky_globals.directional_light_buffer); if (shader_data->uses_light) { sky_globals.directional_light_count = 0; for (int i = 0; i < (int)p_lights.size(); i++) { @@ -678,7 +718,6 @@ void RasterizerSceneGLES3::_setup_sky(const RenderDataGLES3 *p_render_data, cons } if (light_data_dirty) { - glBindBufferBase(GL_UNIFORM_BUFFER, SKY_DIRECTIONAL_LIGHT_UNIFORM_LOCATION, sky_globals.directional_light_buffer); glBufferData(GL_UNIFORM_BUFFER, sizeof(DirectionalLightData) * sky_globals.max_directional_lights, sky_globals.directional_lights, GL_STREAM_DRAW); glBindBuffer(GL_UNIFORM_BUFFER, 0); @@ -751,12 +790,16 @@ void RasterizerSceneGLES3::_draw_sky(RID p_env, const Projection &p_projection, sky_transform.invert(); sky_transform = p_transform.basis * sky_transform; - GLES3::MaterialStorage::get_singleton()->shaders.sky_shader.version_bind_shader(shader_data->version, SkyShaderGLES3::MODE_BACKGROUND); - GLES3::MaterialStorage::get_singleton()->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::ORIENTATION, sky_transform, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND); - GLES3::MaterialStorage::get_singleton()->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::PROJECTION, camera.columns[2][0], camera.columns[0][0], camera.columns[2][1], camera.columns[1][1], shader_data->version, SkyShaderGLES3::MODE_BACKGROUND); - GLES3::MaterialStorage::get_singleton()->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::POSITION, p_transform.origin, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND); - GLES3::MaterialStorage::get_singleton()->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::TIME, time, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND); - GLES3::MaterialStorage::get_singleton()->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::LUMINANCE_MULTIPLIER, p_luminance_multiplier, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND); + bool success = material_storage->shaders.sky_shader.version_bind_shader(shader_data->version, SkyShaderGLES3::MODE_BACKGROUND); + if (!success) { + return; + } + + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::ORIENTATION, sky_transform, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::PROJECTION, camera.columns[2][0], camera.columns[0][0], camera.columns[2][1], camera.columns[1][1], shader_data->version, SkyShaderGLES3::MODE_BACKGROUND); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::POSITION, p_transform.origin, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::TIME, time, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::LUMINANCE_MULTIPLIER, p_luminance_multiplier, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND); glBindVertexArray(sky_globals.screen_triangle_array); glDrawArrays(GL_TRIANGLES, 0, 3); @@ -847,15 +890,18 @@ void RasterizerSceneGLES3::_update_sky_radiance(RID p_env, const Projection &p_p Projection cm; cm.set_perspective(90, 1, 0.01, 10.0); Projection correction; - correction.set_depth_correction(true); + correction.columns[1][1] = -1.0; cm = correction * cm; - GLES3::MaterialStorage::get_singleton()->shaders.sky_shader.version_bind_shader(shader_data->version, SkyShaderGLES3::MODE_CUBEMAP); + bool success = material_storage->shaders.sky_shader.version_bind_shader(shader_data->version, SkyShaderGLES3::MODE_CUBEMAP); + if (!success) { + return; + } - GLES3::MaterialStorage::get_singleton()->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::POSITION, p_transform.origin, shader_data->version, SkyShaderGLES3::MODE_CUBEMAP); - GLES3::MaterialStorage::get_singleton()->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::TIME, time, shader_data->version, SkyShaderGLES3::MODE_CUBEMAP); - GLES3::MaterialStorage::get_singleton()->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::PROJECTION, cm.columns[2][0], cm.columns[0][0], cm.columns[2][1], cm.columns[1][1], shader_data->version, SkyShaderGLES3::MODE_CUBEMAP); - GLES3::MaterialStorage::get_singleton()->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::LUMINANCE_MULTIPLIER, p_luminance_multiplier, shader_data->version, SkyShaderGLES3::MODE_CUBEMAP); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::POSITION, p_transform.origin, shader_data->version, SkyShaderGLES3::MODE_CUBEMAP); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::TIME, time, shader_data->version, SkyShaderGLES3::MODE_CUBEMAP); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::PROJECTION, cm.columns[2][0], cm.columns[0][0], cm.columns[2][1], cm.columns[1][1], shader_data->version, SkyShaderGLES3::MODE_CUBEMAP); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::LUMINANCE_MULTIPLIER, p_luminance_multiplier, shader_data->version, SkyShaderGLES3::MODE_CUBEMAP); glBindVertexArray(sky_globals.screen_triangle_array); @@ -864,7 +910,7 @@ void RasterizerSceneGLES3::_update_sky_radiance(RID p_env, const Projection &p_p for (int i = 0; i < 6; i++) { Basis local_view = Basis::looking_at(view_normals[i], view_up[i]); - GLES3::MaterialStorage::get_singleton()->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::ORIENTATION, local_view, shader_data->version, SkyShaderGLES3::MODE_CUBEMAP); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::ORIENTATION, local_view, shader_data->version, SkyShaderGLES3::MODE_CUBEMAP); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_CUBE_MAP_POSITIVE_X + i, sky->raw_radiance, 0); glDrawArrays(GL_TRIANGLES, 0, 3); } @@ -945,7 +991,10 @@ void RasterizerSceneGLES3::_filter_sky_radiance(Sky *p_sky, int p_base_layer) { glViewport(0, 0, size, size); glBindVertexArray(sky_globals.screen_triangle_array); - material_storage->shaders.cubemap_filter_shader.version_bind_shader(scene_globals.cubemap_filter_shader_version, mode); + bool success = material_storage->shaders.cubemap_filter_shader.version_bind_shader(scene_globals.cubemap_filter_shader_version, mode); + if (!success) { + return; + } if (p_base_layer > 0) { const uint32_t sample_counts[4] = { 1, sky_globals.ggx_samples / 4, sky_globals.ggx_samples / 2, sky_globals.ggx_samples }; @@ -1166,12 +1215,13 @@ void RasterizerSceneGLES3::_fill_render_list(RenderListType p_render_list, const // LOD if (p_render_data->screen_mesh_lod_threshold > 0.0 && mesh_storage->mesh_surface_has_lod(surf->surface)) { - //lod - Vector3 lod_support_min = inst->transformed_aabb.get_support(-p_render_data->lod_camera_plane.normal); - Vector3 lod_support_max = inst->transformed_aabb.get_support(p_render_data->lod_camera_plane.normal); + // Get the LOD support points on the mesh AABB. + Vector3 lod_support_min = inst->transformed_aabb.get_support(p_render_data->cam_transform.basis.get_column(Vector3::AXIS_Z)); + Vector3 lod_support_max = inst->transformed_aabb.get_support(-p_render_data->cam_transform.basis.get_column(Vector3::AXIS_Z)); - float distance_min = p_render_data->lod_camera_plane.distance_to(lod_support_min); - float distance_max = p_render_data->lod_camera_plane.distance_to(lod_support_max); + // Get the distances to those points on the AABB from the camera origin. + float distance_min = (float)p_render_data->cam_transform.origin.distance_to(lod_support_min); + float distance_max = (float)p_render_data->cam_transform.origin.distance_to(lod_support_max); float distance = 0.0; @@ -1188,8 +1238,8 @@ void RasterizerSceneGLES3::_fill_render_list(RenderListType p_render_list, const distance = 1.0; } - uint32_t indices; - surf->lod_index = mesh_storage->mesh_surface_get_lod(surf->surface, inst->lod_model_scale * inst->lod_bias, distance * p_render_data->lod_distance_multiplier, p_render_data->screen_mesh_lod_threshold, &indices); + uint32_t indices = 0; + surf->lod_index = mesh_storage->mesh_surface_get_lod(surf->surface, inst->lod_model_scale * inst->lod_bias, distance * p_render_data->lod_distance_multiplier, p_render_data->screen_mesh_lod_threshold, indices); /* if (p_render_data->render_info) { indices = _indices_to_primitives(surf->primitive, indices); @@ -1264,7 +1314,7 @@ void RasterizerSceneGLES3::_fill_render_list(RenderListType p_render_list, const // Needs to be called after _setup_lights so that directional_light_count is accurate. void RasterizerSceneGLES3::_setup_environment(const RenderDataGLES3 *p_render_data, bool p_no_fog, const Size2i &p_screen_size, bool p_flip_y, const Color &p_default_bg_color, bool p_pancake_shadows) { Projection correction; - correction.set_depth_correction(p_flip_y); + correction.columns[1][1] = p_flip_y ? -1.0 : 1.0; Projection projection = correction * p_render_data->cam_projection; //store camera into ubo GLES3::MaterialStorage::store_camera(projection, scene_state.ubo.projection_matrix); @@ -1272,6 +1322,19 @@ void RasterizerSceneGLES3::_setup_environment(const RenderDataGLES3 *p_render_da GLES3::MaterialStorage::store_transform(p_render_data->cam_transform, scene_state.ubo.inv_view_matrix); GLES3::MaterialStorage::store_transform(p_render_data->inv_cam_transform, scene_state.ubo.view_matrix); + if (p_render_data->view_count > 1) { + for (uint32_t v = 0; v < p_render_data->view_count; v++) { + projection = correction * p_render_data->view_projection[v]; + GLES3::MaterialStorage::store_camera(projection, scene_state.multiview_ubo.projection_matrix_view[v]); + GLES3::MaterialStorage::store_camera(projection.inverse(), scene_state.multiview_ubo.inv_projection_matrix_view[v]); + + scene_state.multiview_ubo.eye_offset[v][0] = p_render_data->view_eye_offset[v].x; + scene_state.multiview_ubo.eye_offset[v][1] = p_render_data->view_eye_offset[v].y; + scene_state.multiview_ubo.eye_offset[v][2] = p_render_data->view_eye_offset[v].z; + scene_state.multiview_ubo.eye_offset[v][3] = 0.0; + } + } + scene_state.ubo.directional_light_count = p_render_data->directional_light_count; scene_state.ubo.z_far = p_render_data->z_far; @@ -1373,6 +1436,15 @@ void RasterizerSceneGLES3::_setup_environment(const RenderDataGLES3 *p_render_da glBindBufferBase(GL_UNIFORM_BUFFER, SCENE_DATA_UNIFORM_LOCATION, scene_state.ubo_buffer); glBufferData(GL_UNIFORM_BUFFER, sizeof(SceneState::UBO), &scene_state.ubo, GL_STREAM_DRAW); glBindBuffer(GL_UNIFORM_BUFFER, 0); + + if (p_render_data->view_count > 1) { + if (scene_state.multiview_buffer == 0) { + glGenBuffers(1, &scene_state.multiview_buffer); + } + glBindBufferBase(GL_UNIFORM_BUFFER, SCENE_MULTIVIEW_UNIFORM_LOCATION, scene_state.multiview_buffer); + glBufferData(GL_UNIFORM_BUFFER, sizeof(SceneState::MultiviewUBO), &scene_state.multiview_ubo, GL_STREAM_DRAW); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + } } // Puts lights into Uniform Buffers. Needs to be called before _fill_list as this caches the index of each light in the Uniform Buffer @@ -1619,6 +1691,9 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ ERR_FAIL_COND(rb.is_null()); } + GLES3::RenderTarget *rt = texture_storage->get_render_target(rb->render_target); + ERR_FAIL_COND(!rt); + // Assign render data // Use the format from rendererRD RenderDataGLES3 render_data; @@ -1650,7 +1725,6 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ // this should be the same for all cameras.. render_data.lod_distance_multiplier = p_camera_data->main_projection.get_lod_multiplier(); - render_data.lod_camera_plane = Plane(-p_camera_data->main_transform.basis.get_column(Vector3::AXIS_Z), p_camera_data->main_transform.get_origin()); if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_DISABLE_LOD) { render_data.screen_mesh_lod_threshold = 0.0; @@ -1707,8 +1781,20 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ scene_state.ubo.emissive_exposure_normalization = -1.0; // Use default exposure normalization. + bool flip_y = !render_data.reflection_probe.is_valid(); + + if (rt->overridden.color.is_valid()) { + // If we've overridden the render target's color texture, then don't render upside down. + // We're probably rendering directly to an XR device. + flip_y = false; + } + if (!flip_y) { + // If we're rendering right-side up, then we need to change the winding order. + glFrontFace(GL_CW); + } + _setup_lights(&render_data, false, render_data.directional_light_count, render_data.omni_light_count, render_data.spot_light_count); - _setup_environment(&render_data, render_data.reflection_probe.is_valid(), screen_size, !render_data.reflection_probe.is_valid(), clear_color, false); + _setup_environment(&render_data, render_data.reflection_probe.is_valid(), screen_size, flip_y, clear_color, false); _fill_render_list(RENDER_LIST_OPAQUE, &render_data, PASS_MODE_COLOR); render_list[RENDER_LIST_OPAQUE].sort_by_key(); @@ -1770,7 +1856,7 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ Projection projection = render_data.cam_projection; if (render_data.reflection_probe.is_valid()) { Projection correction; - correction.set_depth_correction(true); + correction.columns[1][1] = -1.0; projection = correction * render_data.cam_projection; } @@ -1789,7 +1875,7 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ } } - glBindFramebuffer(GL_FRAMEBUFFER, rb->framebuffer); + glBindFramebuffer(GL_FRAMEBUFFER, rt->fbo); glViewport(0, 0, rb->width, rb->height); // Do depth prepass if it's explicitly enabled @@ -1814,8 +1900,11 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ glColorMask(0, 0, 0, 0); glClearDepth(1.0f); glClear(GL_DEPTH_BUFFER_BIT); + uint32_t spec_constant = SceneShaderGLES3::DISABLE_FOG | SceneShaderGLES3::DISABLE_LIGHT_DIRECTIONAL | + SceneShaderGLES3::DISABLE_LIGHTMAP | SceneShaderGLES3::DISABLE_LIGHT_OMNI | + SceneShaderGLES3::DISABLE_LIGHT_SPOT; - RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), reverse_cull, 0, use_wireframe); + RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), reverse_cull, spec_constant, use_wireframe); _render_list_template<PASS_MODE_DEPTH>(&render_list_params, &render_data, 0, render_list[RENDER_LIST_OPAQUE].elements.size()); glColorMask(1, 1, 1, 1); @@ -1857,11 +1946,11 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ { // Specialization Constants that apply for entire rendering pass. if (render_data.directional_light_count == 0) { - spec_constant_base_flags |= 1 << SPEC_CONSTANT_DISABLE_DIRECTIONAL_LIGHTS; + spec_constant_base_flags |= SceneShaderGLES3::DISABLE_LIGHT_DIRECTIONAL; } if (render_data.environment.is_null() || (render_data.environment.is_valid() && !environment_get_fog_enabled(render_data.environment))) { - spec_constant_base_flags |= 1 << SPEC_CONSTANT_DISABLE_FOG; + spec_constant_base_flags |= SceneShaderGLES3::DISABLE_FOG; } } // Render Opaque Objects. @@ -1895,6 +1984,11 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ _render_list_template<PASS_MODE_COLOR_TRANSPARENT>(&render_list_params_alpha, &render_data, 0, render_list[RENDER_LIST_ALPHA].elements.size(), true); + if (!flip_y) { + // Restore the default winding order. + glFrontFace(GL_CCW); + } + if (rb.is_valid()) { _render_buffers_debug_draw(rb, p_shadow_atlas, p_occluder_debug_tex); } @@ -1905,6 +1999,7 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ template <PassMode p_pass_mode> void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, const RenderDataGLES3 *p_render_data, uint32_t p_from_element, uint32_t p_to_element, bool p_alpha_pass) { GLES3::MeshStorage *mesh_storage = GLES3::MeshStorage::get_singleton(); + GLES3::ParticlesStorage *particles_storage = GLES3::ParticlesStorage::get_singleton(); GLES3::MaterialStorage *material_storage = GLES3::MaterialStorage::get_singleton(); GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton(); GLES3::Config *config = GLES3::Config::get_singleton(); @@ -1916,8 +2011,13 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, GLES3::SceneShaderData *prev_shader = nullptr; GeometryInstanceGLES3 *prev_inst = nullptr; SceneShaderGLES3::ShaderVariant prev_variant = SceneShaderGLES3::ShaderVariant::MODE_COLOR; + SceneShaderGLES3::ShaderVariant shader_variant = SceneShaderGLES3::MODE_COLOR; // Assigned to silence wrong -Wmaybe-initialized - SceneShaderGLES3::ShaderVariant shader_variant = SceneShaderGLES3::MODE_COLOR; // Assigned to silence wrong -Wmaybe-initialized. + uint32_t base_spec_constants = p_params->spec_constant_base_flags; + + if (p_render_data->view_count > 1) { + base_spec_constants |= SceneShaderGLES3::USE_MULTIVIEW; + } switch (p_pass_mode) { case PASS_MODE_COLOR: @@ -1939,12 +2039,14 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, Sky *sky = sky_owner.get_or_null(environment_get_sky(p_render_data->environment)); if (sky && sky->radiance != 0) { texture_to_bind = sky->radiance; - // base_spec_constant |= USE_RADIANCE_MAP; + base_spec_constants |= SceneShaderGLES3::USE_RADIANCE_MAP; } glBindTexture(GL_TEXTURE_CUBE_MAP, texture_to_bind); } } + bool should_request_redraw = false; + for (uint32_t i = p_from_element; i < p_to_element; i++) { const GeometryInstanceSurface *surf = p_params->elements[i]; GeometryInstanceGLES3 *inst = surf->owner; @@ -1957,8 +2059,6 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, continue; } - //uint32_t base_spec_constants = p_params->spec_constant_base_flags; - GLES3::SceneShaderData *shader; GLES3::SceneMaterialData *material_data; void *mesh_surface; @@ -1977,6 +2077,11 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, continue; } + //request a redraw if one of the shaders uses TIME + if (shader->uses_time) { + should_request_redraw = true; + } + if constexpr (p_pass_mode == PASS_MODE_COLOR_TRANSPARENT) { if (scene_state.current_depth_test != shader->depth_test) { if (shader->depth_test == GLES3::SceneShaderData::DEPTH_TEST_DISABLED) { @@ -2104,7 +2209,7 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, } bool use_index_buffer = index_array_gl != 0; - if (prev_index_array_gl != index_array_gl) { + if (prev_index_array_gl != index_array_gl || prev_vertex_array_gl != vertex_array_gl) { if (index_array_gl != 0) { // Bind index each time so we can use LODs glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_array_gl); @@ -2124,11 +2229,16 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, SceneShaderGLES3::ShaderVariant instance_variant = shader_variant; if (inst->instance_count > 0) { + // Will need to use instancing to draw (either MultiMesh or Particles). instance_variant = SceneShaderGLES3::ShaderVariant(1 + int(shader_variant)); } if (prev_shader != shader || prev_variant != instance_variant) { - material_storage->shaders.scene_shader.version_bind_shader(shader->version, instance_variant); + bool success = material_storage->shaders.scene_shader.version_bind_shader(shader->version, instance_variant, base_spec_constants); + if (!success) { + continue; + } + float opaque_prepass_threshold = 0.0; if constexpr (p_pass_mode == PASS_MODE_DEPTH) { opaque_prepass_threshold = 0.99; @@ -2136,7 +2246,7 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, opaque_prepass_threshold = 0.1; } - material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::OPAQUE_PREPASS_THRESHOLD, opaque_prepass_threshold, shader->version, instance_variant); + material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::OPAQUE_PREPASS_THRESHOLD, opaque_prepass_threshold, shader->version, instance_variant, base_spec_constants); prev_shader = shader; prev_variant = instance_variant; @@ -2144,41 +2254,58 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, if (prev_inst != inst || prev_shader != shader || prev_variant != instance_variant) { // Rebind the light indices. - material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::OMNI_LIGHT_COUNT, inst->omni_light_count, shader->version, instance_variant); - material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::SPOT_LIGHT_COUNT, inst->spot_light_count, shader->version, instance_variant); + material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::OMNI_LIGHT_COUNT, inst->omni_light_count, shader->version, instance_variant, base_spec_constants); + material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::SPOT_LIGHT_COUNT, inst->spot_light_count, shader->version, instance_variant, base_spec_constants); if (inst->omni_light_count) { - glUniform1uiv(material_storage->shaders.scene_shader.version_get_uniform(SceneShaderGLES3::OMNI_LIGHT_INDICES, shader->version, instance_variant), inst->omni_light_count, inst->omni_light_gl_cache.ptr()); + glUniform1uiv(material_storage->shaders.scene_shader.version_get_uniform(SceneShaderGLES3::OMNI_LIGHT_INDICES, shader->version, instance_variant, base_spec_constants), inst->omni_light_count, inst->omni_light_gl_cache.ptr()); } if (inst->spot_light_count) { - glUniform1uiv(material_storage->shaders.scene_shader.version_get_uniform(SceneShaderGLES3::SPOT_LIGHT_INDICES, shader->version, instance_variant), inst->spot_light_count, inst->spot_light_gl_cache.ptr()); + glUniform1uiv(material_storage->shaders.scene_shader.version_get_uniform(SceneShaderGLES3::SPOT_LIGHT_INDICES, shader->version, instance_variant, base_spec_constants), inst->spot_light_count, inst->spot_light_gl_cache.ptr()); } prev_inst = inst; } - material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::WORLD_TRANSFORM, world_transform, shader->version, instance_variant); + material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::WORLD_TRANSFORM, world_transform, shader->version, instance_variant, base_spec_constants); if (inst->instance_count > 0) { - // Using MultiMesh. + // Using MultiMesh or Particles. // Bind instance buffers. - GLuint multimesh_buffer = mesh_storage->multimesh_get_gl_buffer(inst->data->base); - glBindBuffer(GL_ARRAY_BUFFER, multimesh_buffer); - uint32_t multimesh_stride = mesh_storage->multimesh_get_stride(inst->data->base); + GLuint instance_buffer = 0; + uint32_t stride = 0; + if (inst->flags_cache & INSTANCE_DATA_FLAG_PARTICLES) { + instance_buffer = particles_storage->particles_get_gl_buffer(inst->data->base); + stride = 16; // 12 bytes for instance transform and 4 bytes for packed color and custom. + } else { + instance_buffer = mesh_storage->multimesh_get_gl_buffer(inst->data->base); + stride = mesh_storage->multimesh_get_stride(inst->data->base); + } + + if (instance_buffer == 0) { + // Instance buffer not initialized yet. Skip rendering for now. + continue; + } + + glBindBuffer(GL_ARRAY_BUFFER, instance_buffer); + glEnableVertexAttribArray(12); - glVertexAttribPointer(12, 4, GL_FLOAT, GL_FALSE, multimesh_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(0)); + glVertexAttribPointer(12, 4, GL_FLOAT, GL_FALSE, stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(0)); glVertexAttribDivisor(12, 1); glEnableVertexAttribArray(13); - glVertexAttribPointer(13, 4, GL_FLOAT, GL_FALSE, multimesh_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(4 * 4)); + glVertexAttribPointer(13, 4, GL_FLOAT, GL_FALSE, stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(sizeof(float) * 4)); glVertexAttribDivisor(13, 1); - glEnableVertexAttribArray(14); - glVertexAttribPointer(14, 4, GL_FLOAT, GL_FALSE, multimesh_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(4 * 8)); - glVertexAttribDivisor(14, 1); + if (!(inst->flags_cache & INSTANCE_DATA_FLAG_MULTIMESH_FORMAT_2D)) { + glEnableVertexAttribArray(14); + glVertexAttribPointer(14, 4, GL_FLOAT, GL_FALSE, stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(sizeof(float) * 8)); + glVertexAttribDivisor(14, 1); + } - if (mesh_storage->multimesh_uses_colors(inst->data->base) || mesh_storage->multimesh_uses_custom_data(inst->data->base)) { + if ((inst->flags_cache & INSTANCE_DATA_FLAG_MULTIMESH_HAS_COLOR) || (inst->flags_cache & INSTANCE_DATA_FLAG_MULTIMESH_HAS_CUSTOM_DATA)) { + uint32_t color_custom_offset = inst->flags_cache & INSTANCE_DATA_FLAG_MULTIMESH_FORMAT_2D ? 8 : 12; glEnableVertexAttribArray(15); - glVertexAttribIPointer(15, 4, GL_UNSIGNED_INT, multimesh_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(mesh_storage->multimesh_get_color_offset(inst->data->base) * sizeof(float))); + glVertexAttribIPointer(15, 4, GL_UNSIGNED_INT, stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(color_custom_offset * sizeof(float))); glVertexAttribDivisor(15, 1); } if (use_index_buffer) { @@ -2201,12 +2328,83 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, glDisableVertexAttribArray(15); } } + + // Make the actual redraw request + if (should_request_redraw) { + RenderingServerDefault::redraw_request(); + } } void RasterizerSceneGLES3::render_material(const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal, const PagedArray<RenderGeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) { } void RasterizerSceneGLES3::render_particle_collider_heightfield(RID p_collider, const Transform3D &p_transform, const PagedArray<RenderGeometryInstance *> &p_instances) { + GLES3::ParticlesStorage *particles_storage = GLES3::ParticlesStorage::get_singleton(); + + ERR_FAIL_COND(!particles_storage->particles_collision_is_heightfield(p_collider)); + Vector3 extents = particles_storage->particles_collision_get_extents(p_collider) * p_transform.basis.get_scale(); + Projection cm; + cm.set_orthogonal(-extents.x, extents.x, -extents.z, extents.z, 0, extents.y * 2.0); + + Vector3 cam_pos = p_transform.origin; + cam_pos.y += extents.y; + + Transform3D cam_xform; + cam_xform.set_look_at(cam_pos, cam_pos - p_transform.basis.get_column(Vector3::AXIS_Y), -p_transform.basis.get_column(Vector3::AXIS_Z).normalized()); + + GLuint fb = particles_storage->particles_collision_get_heightfield_framebuffer(p_collider); + Size2i fb_size = particles_storage->particles_collision_get_heightfield_size(p_collider); + + RENDER_TIMESTAMP("Setup GPUParticlesCollisionHeightField3D"); + + RenderDataGLES3 render_data; + + render_data.cam_projection = cm; + render_data.cam_transform = cam_xform; + render_data.view_projection[0] = cm; + render_data.inv_cam_transform = render_data.cam_transform.affine_inverse(); + render_data.cam_orthogonal = true; + render_data.z_near = 0.0; + render_data.z_far = cm.get_z_far(); + + render_data.instances = &p_instances; + + _setup_environment(&render_data, true, Vector2(fb_size), true, Color(), false); + + PassMode pass_mode = PASS_MODE_SHADOW; + + _fill_render_list(RENDER_LIST_SECONDARY, &render_data, pass_mode); + render_list[RENDER_LIST_SECONDARY].sort_by_key(); + + RENDER_TIMESTAMP("Render Collider Heightfield"); + + glBindFramebuffer(GL_FRAMEBUFFER, fb); + glViewport(0, 0, fb_size.width, fb_size.height); + + GLuint global_buffer = GLES3::MaterialStorage::get_singleton()->global_shader_parameters_get_uniform_buffer(); + + glBindBufferBase(GL_UNIFORM_BUFFER, SCENE_GLOBALS_UNIFORM_LOCATION, global_buffer); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + + glDisable(GL_BLEND); + glDepthMask(GL_TRUE); + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_LESS); + glDisable(GL_SCISSOR_TEST); + glCullFace(GL_BACK); + glEnable(GL_CULL_FACE); + scene_state.cull_mode = GLES3::SceneShaderData::CULL_BACK; + + glColorMask(0, 0, 0, 0); + glClearDepth(1.0f); + glClear(GL_DEPTH_BUFFER_BIT); + + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), false, 31, false); + + _render_list_template<PASS_MODE_SHADOW>(&render_list_params, &render_data, 0, render_list[RENDER_LIST_SECONDARY].elements.size()); + + glColorMask(1, 1, 1, 1); + glBindFramebuffer(GL_FRAMEBUFFER, 0); } void RasterizerSceneGLES3::set_time(double p_time, double p_step) { @@ -2357,7 +2555,7 @@ RasterizerSceneGLES3::RasterizerSceneGLES3() { global_defines += "#define MAX_GLOBAL_SHADER_UNIFORMS 256\n"; // TODO: this is arbitrary for now global_defines += "\n#define MAX_LIGHT_DATA_STRUCTS " + itos(config->max_renderable_lights) + "\n"; global_defines += "\n#define MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS " + itos(MAX_DIRECTIONAL_LIGHTS) + "\n"; - global_defines += "\n#define MAX_FORWARD_LIGHTS " + itos(config->max_lights_per_object) + "\n"; + global_defines += "\n#define MAX_FORWARD_LIGHTS uint(" + itos(config->max_lights_per_object) + ")\n"; material_storage->shaders.scene_shader.initialize(global_defines); scene_globals.shader_default_version = material_storage->shaders.scene_shader.version_create(); material_storage->shaders.scene_shader.version_bind_shader(scene_globals.shader_default_version, SceneShaderGLES3::MODE_COLOR); @@ -2397,7 +2595,6 @@ void fragment() { global_defines += "\n#define MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS " + itos(sky_globals.max_directional_lights) + "\n"; material_storage->shaders.sky_shader.initialize(global_defines); sky_globals.shader_default_version = material_storage->shaders.sky_shader.version_create(); - material_storage->shaders.sky_shader.version_bind_shader(sky_globals.shader_default_version, SkyShaderGLES3::MODE_BACKGROUND); } { @@ -2405,7 +2602,6 @@ void fragment() { global_defines += "\n#define MAX_SAMPLE_COUNT " + itos(sky_globals.ggx_samples) + "\n"; material_storage->shaders.cubemap_filter_shader.initialize(global_defines); scene_globals.cubemap_filter_shader_version = material_storage->shaders.cubemap_filter_shader.version_create(); - material_storage->shaders.cubemap_filter_shader.version_bind_shader(scene_globals.cubemap_filter_shader_version, CubemapFilterShaderGLES3::MODE_DEFAULT); } { diff --git a/drivers/gles3/rasterizer_scene_gles3.h b/drivers/gles3/rasterizer_scene_gles3.h index d11dc14080..6e1f1babf8 100644 --- a/drivers/gles3/rasterizer_scene_gles3.h +++ b/drivers/gles3/rasterizer_scene_gles3.h @@ -74,6 +74,7 @@ enum SceneUniformLocation { SCENE_OMNILIGHT_UNIFORM_LOCATION, SCENE_SPOTLIGHT_UNIFORM_LOCATION, SCENE_DIRECTIONAL_LIGHT_UNIFORM_LOCATION, + SCENE_MULTIVIEW_UNIFORM_LOCATION, }; enum SkyUniformLocation { @@ -84,21 +85,13 @@ enum SkyUniformLocation { SKY_DIRECTIONAL_LIGHT_UNIFORM_LOCATION, }; -enum { - SPEC_CONSTANT_DISABLE_LIGHTMAP = 0, - SPEC_CONSTANT_DISABLE_DIRECTIONAL_LIGHTS = 1, - SPEC_CONSTANT_DISABLE_OMNI_LIGHTS = 2, - SPEC_CONSTANT_DISABLE_SPOT_LIGHTS = 3, - SPEC_CONSTANT_DISABLE_FOG = 4, -}; - struct RenderDataGLES3 { Ref<RenderSceneBuffersGLES3> render_buffers; bool transparent_bg = false; - Transform3D cam_transform = Transform3D(); - Transform3D inv_cam_transform = Transform3D(); - Projection cam_projection = Projection(); + Transform3D cam_transform; + Transform3D inv_cam_transform; + Projection cam_projection; bool cam_orthogonal = false; // For stereo rendering @@ -112,13 +105,12 @@ struct RenderDataGLES3 { const PagedArray<RenderGeometryInstance *> *instances = nullptr; const PagedArray<RID> *lights = nullptr; const PagedArray<RID> *reflection_probes = nullptr; - RID environment = RID(); - RID camera_attributes = RID(); - RID reflection_probe = RID(); + RID environment; + RID camera_attributes; + RID reflection_probe; int reflection_probe_pass = 0; float lod_distance_multiplier = 0.0; - Plane lod_camera_plane = Plane(); float screen_mesh_lod_threshold = 0.0; uint32_t directional_light_count = 0; @@ -344,6 +336,13 @@ private: }; static_assert(sizeof(UBO) % 16 == 0, "Scene UBO size must be a multiple of 16 bytes"); + struct MultiviewUBO { + float projection_matrix_view[RendererSceneRender::MAX_RENDER_VIEWS][16]; + float inv_projection_matrix_view[RendererSceneRender::MAX_RENDER_VIEWS][16]; + float eye_offset[RendererSceneRender::MAX_RENDER_VIEWS][4]; + }; + static_assert(sizeof(MultiviewUBO) % 16 == 0, "Multiview UBO size must be a multiple of 16 bytes"); + struct TonemapUBO { float exposure = 1.0; float white = 1.0; @@ -354,6 +353,8 @@ private: UBO ubo; GLuint ubo_buffer = 0; + MultiviewUBO multiview_ubo; + GLuint multiview_buffer = 0; GLuint tonemap_buffer = 0; bool used_depth_prepass = false; diff --git a/drivers/gles3/shader_gles3.cpp b/drivers/gles3/shader_gles3.cpp index 033f10dbc5..1dcd17ea0e 100644 --- a/drivers/gles3/shader_gles3.cpp +++ b/drivers/gles3/shader_gles3.cpp @@ -92,7 +92,7 @@ void ShaderGLES3::_add_stage(const char *p_code, StageType p_stage_type) { } } -void ShaderGLES3::_setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_name, int p_uniform_count, const char **p_uniform_names, int p_ubo_count, const UBOPair *p_ubos, int p_texture_count, const TexUnitPair *p_tex_units, int p_specialization_count, const Specialization *p_specializations, int p_variant_count, const char **p_variants) { +void ShaderGLES3::_setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_name, int p_uniform_count, const char **p_uniform_names, int p_ubo_count, const UBOPair *p_ubos, int p_feedback_count, const Feedback *p_feedback, int p_texture_count, const TexUnitPair *p_tex_units, int p_specialization_count, const Specialization *p_specializations, int p_variant_count, const char **p_variants) { name = p_name; if (p_vertex_code) { @@ -118,6 +118,8 @@ void ShaderGLES3::_setup(const char *p_vertex_code, const char *p_fragment_code, } variant_defines = p_variants; variant_count = p_variant_count; + feedbacks = p_feedback; + feedback_count = p_feedback_count; StringBuilder tohash; /* @@ -142,7 +144,7 @@ RID ShaderGLES3::version_create() { return version_owner.make_rid(version); } -void ShaderGLES3::_build_variant_code(StringBuilder &builder, uint32_t p_variant, const Version *p_version, const StageTemplate &p_template, uint64_t p_specialization) { +void ShaderGLES3::_build_variant_code(StringBuilder &builder, uint32_t p_variant, const Version *p_version, StageType p_stage_type, uint64_t p_specialization) { #ifdef GLES_OVER_GL builder.append("#version 330\n"); builder.append("#define USE_GLES_OVER_GL\n"); @@ -171,6 +173,24 @@ void ShaderGLES3::_build_variant_code(StringBuilder &builder, uint32_t p_variant } builder.append("\n"); //make sure defines begin at newline + // Insert multiview extension loading, because it needs to appear before + // any non-preprocessor code (like the "precision highp..." lines below). + builder.append("#ifdef USE_MULTIVIEW\n"); + builder.append("#if defined(GL_OVR_multiview2)\n"); + builder.append("#extension GL_OVR_multiview2 : require\n"); + builder.append("#elif defined(GL_OVR_multiview)\n"); + builder.append("#extension GL_OVR_multiview : require\n"); + builder.append("#endif\n"); + if (p_stage_type == StageType::STAGE_TYPE_VERTEX) { + builder.append("layout(num_views=2) in;\n"); + } + builder.append("#define ViewIndex gl_ViewID_OVR\n"); + builder.append("#define MAX_VIEWS 2\n"); + builder.append("#else\n"); + builder.append("#define ViewIndex 0\n"); + builder.append("#define MAX_VIEWS 1\n"); + builder.append("#endif\n"); + // Default to highp precision unless specified otherwise. builder.append("precision highp float;\n"); builder.append("precision highp int;\n"); @@ -180,8 +200,9 @@ void ShaderGLES3::_build_variant_code(StringBuilder &builder, uint32_t p_variant builder.append("precision highp sampler2DArray;\n"); #endif - for (uint32_t i = 0; i < p_template.chunks.size(); i++) { - const StageTemplate::Chunk &chunk = p_template.chunks[i]; + const StageTemplate &stage_template = stage_templates[p_stage_type]; + for (uint32_t i = 0; i < stage_template.chunks.size(); i++) { + const StageTemplate::Chunk &chunk = stage_template.chunks[i]; switch (chunk.type) { case StageTemplate::Chunk::TYPE_MATERIAL_UNIFORMS: { builder.append(p_version->uniforms.get_data()); //uniforms (same for vertex and fragment) @@ -224,7 +245,7 @@ void ShaderGLES3::_compile_specialization(Version::Specialization &spec, uint32_ //vertex stage { StringBuilder builder; - _build_variant_code(builder, p_variant, p_version, stage_templates[STAGE_TYPE_VERTEX], p_specialization); + _build_variant_code(builder, p_variant, p_version, STAGE_TYPE_VERTEX, p_specialization); spec.vert_id = glCreateShader(GL_VERTEX_SHADER); String builder_string = builder.as_string(); @@ -272,7 +293,7 @@ void ShaderGLES3::_compile_specialization(Version::Specialization &spec, uint32_ //fragment stage { StringBuilder builder; - _build_variant_code(builder, p_variant, p_version, stage_templates[STAGE_TYPE_FRAGMENT], p_specialization); + _build_variant_code(builder, p_variant, p_version, STAGE_TYPE_FRAGMENT, p_specialization); spec.frag_id = glCreateShader(GL_FRAGMENT_SHADER); String builder_string = builder.as_string(); @@ -320,9 +341,21 @@ void ShaderGLES3::_compile_specialization(Version::Specialization &spec, uint32_ glAttachShader(spec.id, spec.frag_id); glAttachShader(spec.id, spec.vert_id); - //for (int i = 0; i < attribute_pair_count; i++) { - // glBindAttribLocation(v.id, attribute_pairs[i].index, attribute_pairs[i].name); - //} + // If feedback exists, set it up. + + if (feedback_count) { + Vector<const char *> feedback; + for (int i = 0; i < feedback_count; i++) { + if (feedbacks[i].specialization == 0 || (feedbacks[i].specialization & p_specialization)) { + // Specialization for this feedback is enabled + feedback.push_back(feedbacks[i].name); + } + } + + if (feedback.size()) { + glTransformFeedbackVaryings(spec.id, feedback.size(), feedback.ptr(), GL_INTERLEAVED_ATTRIBS); + } + } glLinkProgram(spec.id); @@ -413,7 +446,7 @@ RS::ShaderNativeSourceCode ShaderGLES3::version_get_native_source_code(RID p_ver { StringBuilder builder; - _build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_VERTEX], specialization_default_mask); + _build_variant_code(builder, i, version, STAGE_TYPE_VERTEX, specialization_default_mask); RS::ShaderNativeSourceCode::Version::Stage stage; stage.name = "vertex"; @@ -425,7 +458,7 @@ RS::ShaderNativeSourceCode ShaderGLES3::version_get_native_source_code(RID p_ver //fragment stage { StringBuilder builder; - _build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_FRAGMENT], specialization_default_mask); + _build_variant_code(builder, i, version, STAGE_TYPE_FRAGMENT, specialization_default_mask); RS::ShaderNativeSourceCode::Version::Stage stage; stage.name = "fragment"; diff --git a/drivers/gles3/shader_gles3.h b/drivers/gles3/shader_gles3.h index 2b72549b5b..83b950ef45 100644 --- a/drivers/gles3/shader_gles3.h +++ b/drivers/gles3/shader_gles3.h @@ -70,15 +70,20 @@ protected: bool default_value = false; }; + struct Feedback { + const char *name; + uint64_t specialization; + }; + private: //versions CharString general_defines; // A version is a high-level construct which is a combination of built-in and user-defined shader code, Each user-created Shader makes one version - // Variants use #ifdefs to toggle behaviour on and off to change behaviour of the shader + // Variants use #ifdefs to toggle behavior on and off to change behavior of the shader // All variants are compiled each time a new version is created - // Specializations use #ifdefs to toggle behaviour on and off for performance, on supporting hardware, they will compile a version with everything enabled, and then compile more copies to improve performance - // Use specializations to enable and disabled advanced features, use variants to toggle behaviour when different data may be used (e.g. using a samplerArray vs a sampler, or doing a depth prepass vs a color pass) + // Specializations use #ifdefs to toggle behavior on and off for performance, on supporting hardware, they will compile a version with everything enabled, and then compile more copies to improve performance + // Use specializations to enable and disabled advanced features, use variants to toggle behavior when different data may be used (e.g. using a samplerArray vs a sampler, or doing a depth prepass vs a color pass) struct Version { Vector<StringName> texture_uniforms; CharString uniforms; @@ -153,7 +158,7 @@ private: StageTemplate stage_templates[STAGE_TYPE_MAX]; - void _build_variant_code(StringBuilder &p_builder, uint32_t p_variant, const Version *p_version, const StageTemplate &p_template, uint64_t p_specialization); + void _build_variant_code(StringBuilder &p_builder, uint32_t p_variant, const Version *p_version, StageType p_stage_type, uint64_t p_specialization); void _add_stage(const char *p_code, StageType p_stage_type); @@ -165,6 +170,8 @@ private: int uniform_count = 0; const UBOPair *ubo_pairs = nullptr; int ubo_count = 0; + const Feedback *feedbacks; + int feedback_count = 0; const TexUnitPair *texunit_pairs = nullptr; int texunit_pair_count = 0; int specialization_count = 0; @@ -178,13 +185,13 @@ private: protected: ShaderGLES3(); - void _setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_name, int p_uniform_count, const char **p_uniform_names, int p_ubo_count, const UBOPair *p_ubos, int p_texture_count, const TexUnitPair *p_tex_units, int p_specialization_count, const Specialization *p_specializations, int p_variant_count, const char **p_variants); + void _setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_name, int p_uniform_count, const char **p_uniform_names, int p_ubo_count, const UBOPair *p_ubos, int p_feedback_count, const Feedback *p_feedback, int p_texture_count, const TexUnitPair *p_tex_units, int p_specialization_count, const Specialization *p_specializations, int p_variant_count, const char **p_variants); - _FORCE_INLINE_ void _version_bind_shader(RID p_version, int p_variant, uint64_t p_specialization) { - ERR_FAIL_INDEX(p_variant, variant_count); + _FORCE_INLINE_ bool _version_bind_shader(RID p_version, int p_variant, uint64_t p_specialization) { + ERR_FAIL_INDEX_V(p_variant, variant_count, false); Version *version = version_owner.get_or_null(p_version); - ERR_FAIL_COND(!version); + ERR_FAIL_COND_V(!version, false); if (version->variants.size() == 0) { _initialize_version(version); //may lack initialization @@ -208,11 +215,14 @@ protected: spec = version->variants[p_variant].lookup_ptr(specialization_default_mask); } - ERR_FAIL_COND(!spec); // Should never happen - ERR_FAIL_COND(!spec->ok); // Should never happen + if (!spec || !spec->ok) { + WARN_PRINT_ONCE("shader failed to compile, unable to bind shader."); + return false; + } glUseProgram(spec->id); current_shader = spec; + return true; } _FORCE_INLINE_ int _version_get_uniform(int p_which, RID p_version, int p_variant, uint64_t p_specialization) { diff --git a/drivers/gles3/shaders/SCsub b/drivers/gles3/shaders/SCsub index 83ffe8b1e1..34713e7e29 100644 --- a/drivers/gles3/shaders/SCsub +++ b/drivers/gles3/shaders/SCsub @@ -17,3 +17,8 @@ if "GLES3_GLSL" in env["BUILDERS"]: env.GLES3_GLSL("scene.glsl") env.GLES3_GLSL("sky.glsl") env.GLES3_GLSL("cubemap_filter.glsl") + env.GLES3_GLSL("canvas_occlusion.glsl") + env.GLES3_GLSL("canvas_sdf.glsl") + env.GLES3_GLSL("particles.glsl") + env.GLES3_GLSL("particles_copy.glsl") + env.GLES3_GLSL("skeleton.glsl") diff --git a/drivers/gles3/shaders/canvas.glsl b/drivers/gles3/shaders/canvas.glsl index a177112476..60139de472 100644 --- a/drivers/gles3/shaders/canvas.glsl +++ b/drivers/gles3/shaders/canvas.glsl @@ -10,6 +10,7 @@ mode_instanced = #define USE_ATTRIBUTES \n#define USE_INSTANCING #[specializations] DISABLE_LIGHTING = false +USE_RGBA_SHADOWS = false #[vertex] @@ -18,9 +19,6 @@ layout(location = 0) in vec2 vertex_attrib; layout(location = 3) in vec4 color_attrib; layout(location = 4) in vec2 uv_attrib; -layout(location = 10) in uvec4 bone_attrib; -layout(location = 11) in vec4 weight_attrib; - #ifdef USE_INSTANCING layout(location = 1) in highp vec4 instance_xform0; @@ -80,8 +78,6 @@ void main() { uv = draw_data[draw_data_instance].uv_c; color = vec4(unpackHalf2x16(draw_data[draw_data_instance].color_c_rg), unpackHalf2x16(draw_data[draw_data_instance].color_c_ba)); } - uvec4 bones = uvec4(0, 0, 0, 0); - vec4 bone_weights = vec4(0.0); #elif defined(USE_ATTRIBUTES) draw_data_instance = gl_InstanceID; @@ -92,9 +88,6 @@ void main() { vec4 color = color_attrib * draw_data[draw_data_instance].modulation; vec2 uv = uv_attrib; - uvec4 bones = bone_attrib; - vec4 bone_weights = weight_attrib; - #ifdef USE_INSTANCING vec4 instance_color = vec4(unpackHalf2x16(instance_color_custom_data.x), unpackHalf2x16(instance_color_custom_data.y)); color *= instance_color; @@ -109,7 +102,6 @@ void main() { vec2 uv = draw_data[draw_data_instance].src_rect.xy + abs(draw_data[draw_data_instance].src_rect.zw) * ((draw_data[draw_data_instance].flags & FLAGS_TRANSPOSE_RECT) != uint(0) ? vertex_base.yx : vertex_base.xy); vec4 color = draw_data[draw_data_instance].modulation; vec2 vertex = draw_data[draw_data_instance].dst_rect.xy + abs(draw_data[draw_data_instance].dst_rect.zw) * mix(vertex_base, vec2(1.0, 1.0) - vertex_base, lessThan(draw_data[draw_data_instance].src_rect.zw, vec2(0.0, 0.0))); - uvec4 bones = uvec4(0, 0, 0, 0); #endif @@ -152,48 +144,6 @@ void main() { uv += 1e-5; } -#ifdef USE_ATTRIBUTES -#if 0 - if (bool(draw_data[draw_data_instance].flags & FLAGS_USE_SKELETON) && bone_weights != vec4(0.0)) { //must be a valid bone - //skeleton transform - ivec4 bone_indicesi = ivec4(bone_indices); - - uvec2 tex_ofs = bone_indicesi.x * 2; - - mat2x4 m; - m = mat2x4( - texelFetch(skeleton_buffer, tex_ofs + 0), - texelFetch(skeleton_buffer, tex_ofs + 1)) * - bone_weights.x; - - tex_ofs = bone_indicesi.y * 2; - - m += mat2x4( - texelFetch(skeleton_buffer, tex_ofs + 0), - texelFetch(skeleton_buffer, tex_ofs + 1)) * - bone_weights.y; - - tex_ofs = bone_indicesi.z * 2; - - m += mat2x4( - texelFetch(skeleton_buffer, tex_ofs + 0), - texelFetch(skeleton_buffer, tex_ofs + 1)) * - bone_weights.z; - - tex_ofs = bone_indicesi.w * 2; - - m += mat2x4( - texelFetch(skeleton_buffer, tex_ofs + 0), - texelFetch(skeleton_buffer, tex_ofs + 1)) * - bone_weights.w; - - mat4 bone_matrix = skeleton_data.skeleton_transform * transpose(mat4(m[0], m[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))) * skeleton_data.skeleton_transform_inverse; - - //outvec = bone_matrix * outvec; - } -#endif -#endif - vertex = (canvas_transform * vec4(vertex, 0.0, 1.0)).xy; vertex_interp = vertex; @@ -211,8 +161,10 @@ void main() { #include "canvas_uniforms_inc.glsl" #include "stdlib_inc.glsl" -//uniform sampler2D atlas_texture; //texunit:-2 -//uniform sampler2D shadow_atlas_texture; //texunit:-3 +#ifndef DISABLE_LIGHTING +uniform sampler2D atlas_texture; //texunit:-2 +uniform sampler2D shadow_atlas_texture; //texunit:-3 +#endif // DISABLE_LIGHTING uniform sampler2D screen_texture; //texunit:-4 uniform sampler2D sdf_texture; //texunit:-5 uniform sampler2D normal_texture; //texunit:-6 @@ -244,6 +196,170 @@ layout(std140) uniform MaterialUniforms{ #GLOBALS +float vec4_to_float(vec4 p_vec) { + return dot(p_vec, vec4(1.0 / (255.0 * 255.0 * 255.0), 1.0 / (255.0 * 255.0), 1.0 / 255.0, 1.0)) * 2.0 - 1.0; +} + +vec2 screen_uv_to_sdf(vec2 p_uv) { + return screen_to_sdf * p_uv; +} + +float texture_sdf(vec2 p_sdf) { + vec2 uv = p_sdf * sdf_to_tex.xy + sdf_to_tex.zw; + float d = vec4_to_float(texture(sdf_texture, uv)); + d *= SDF_MAX_LENGTH; + return d * tex_to_sdf; +} + +vec2 texture_sdf_normal(vec2 p_sdf) { + vec2 uv = p_sdf * sdf_to_tex.xy + sdf_to_tex.zw; + + const float EPSILON = 0.001; + return normalize(vec2( + vec4_to_float(texture(sdf_texture, uv + vec2(EPSILON, 0.0))) - vec4_to_float(texture(sdf_texture, uv - vec2(EPSILON, 0.0))), + vec4_to_float(texture(sdf_texture, uv + vec2(0.0, EPSILON))) - vec4_to_float(texture(sdf_texture, uv - vec2(0.0, EPSILON))))); +} + +vec2 sdf_to_screen_uv(vec2 p_sdf) { + return p_sdf * sdf_to_screen; +} + +#ifndef DISABLE_LIGHTING +#ifdef LIGHT_CODE_USED + +vec4 light_compute( + vec3 light_vertex, + vec3 light_position, + vec3 normal, + vec4 light_color, + float light_energy, + vec4 specular_shininess, + inout vec4 shadow_modulate, + vec2 screen_uv, + vec2 uv, + vec4 color, bool is_directional) { + vec4 light = vec4(0.0); + vec3 light_direction = vec3(0.0); + + if (is_directional) { + light_direction = normalize(mix(vec3(light_position.xy, 0.0), vec3(0, 0, 1), light_position.z)); + light_position = vec3(0.0); + } else { + light_direction = normalize(light_position - light_vertex); + } + +#CODE : LIGHT + + return light; +} + +#endif + +vec3 light_normal_compute(vec3 light_vec, vec3 normal, vec3 base_color, vec3 light_color, vec4 specular_shininess, bool specular_shininess_used) { + float cNdotL = max(0.0, dot(normal, light_vec)); + + if (specular_shininess_used) { + //blinn + vec3 view = vec3(0.0, 0.0, 1.0); // not great but good enough + vec3 half_vec = normalize(view + light_vec); + + float cNdotV = max(dot(normal, view), 0.0); + float cNdotH = max(dot(normal, half_vec), 0.0); + float cVdotH = max(dot(view, half_vec), 0.0); + float cLdotH = max(dot(light_vec, half_vec), 0.0); + float shininess = exp2(15.0 * specular_shininess.a + 1.0) * 0.25; + float blinn = pow(cNdotH, shininess); + blinn *= (shininess + 8.0) * (1.0 / (8.0 * M_PI)); + float s = (blinn) / max(4.0 * cNdotV * cNdotL, 0.75); + + return specular_shininess.rgb * light_color * s + light_color * base_color * cNdotL; + } else { + return light_color * base_color * cNdotL; + } +} + +#ifdef USE_RGBA_SHADOWS + +#define SHADOW_DEPTH(m_uv) (dot(textureLod(shadow_atlas_texture, (m_uv), 0.0), vec4(1.0 / (255.0 * 255.0 * 255.0), 1.0 / (255.0 * 255.0), 1.0 / 255.0, 1.0)) * 2.0 - 1.0) + +#else + +#define SHADOW_DEPTH(m_uv) (textureLod(shadow_atlas_texture, (m_uv), 0.0).r) + +#endif + +#define SHADOW_TEST(m_uv) \ + { \ + highp float sd = SHADOW_DEPTH(m_uv); \ + shadow += step(sd, shadow_uv.z / shadow_uv.w); \ + } + +//float distance = length(shadow_pos); +vec4 light_shadow_compute(uint light_base, vec4 light_color, vec4 shadow_uv +#ifdef LIGHT_CODE_USED + , + vec3 shadow_modulate +#endif +) { + float shadow = 0.0; + uint shadow_mode = light_array[light_base].flags & LIGHT_FLAGS_FILTER_MASK; + + if (shadow_mode == LIGHT_FLAGS_SHADOW_NEAREST) { + SHADOW_TEST(shadow_uv.xy); + } else if (shadow_mode == LIGHT_FLAGS_SHADOW_PCF5) { + vec2 shadow_pixel_size = vec2(light_array[light_base].shadow_pixel_size, 0.0); + SHADOW_TEST(shadow_uv.xy - shadow_pixel_size * 2.0); + SHADOW_TEST(shadow_uv.xy - shadow_pixel_size); + SHADOW_TEST(shadow_uv.xy); + SHADOW_TEST(shadow_uv.xy + shadow_pixel_size); + SHADOW_TEST(shadow_uv.xy + shadow_pixel_size * 2.0); + shadow /= 5.0; + } else { //PCF13 + vec2 shadow_pixel_size = vec2(light_array[light_base].shadow_pixel_size, 0.0); + SHADOW_TEST(shadow_uv.xy - shadow_pixel_size * 6.0); + SHADOW_TEST(shadow_uv.xy - shadow_pixel_size * 5.0); + SHADOW_TEST(shadow_uv.xy - shadow_pixel_size * 4.0); + SHADOW_TEST(shadow_uv.xy - shadow_pixel_size * 3.0); + SHADOW_TEST(shadow_uv.xy - shadow_pixel_size * 2.0); + SHADOW_TEST(shadow_uv.xy - shadow_pixel_size); + SHADOW_TEST(shadow_uv.xy); + SHADOW_TEST(shadow_uv.xy + shadow_pixel_size); + SHADOW_TEST(shadow_uv.xy + shadow_pixel_size * 2.0); + SHADOW_TEST(shadow_uv.xy + shadow_pixel_size * 3.0); + SHADOW_TEST(shadow_uv.xy + shadow_pixel_size * 4.0); + SHADOW_TEST(shadow_uv.xy + shadow_pixel_size * 5.0); + SHADOW_TEST(shadow_uv.xy + shadow_pixel_size * 6.0); + shadow /= 13.0; + } + + vec4 shadow_color = unpackUnorm4x8(light_array[light_base].shadow_color); +#ifdef LIGHT_CODE_USED + shadow_color.rgb *= shadow_modulate; +#endif + + shadow_color.a *= light_color.a; //respect light alpha + + return mix(light_color, shadow_color, shadow); +} + +void light_blend_compute(uint light_base, vec4 light_color, inout vec3 color) { + uint blend_mode = light_array[light_base].flags & LIGHT_FLAGS_BLEND_MASK; + + switch (blend_mode) { + case LIGHT_FLAGS_BLEND_MODE_ADD: { + color.rgb += light_color.rgb * light_color.a; + } break; + case LIGHT_FLAGS_BLEND_MODE_SUB: { + color.rgb -= light_color.rgb * light_color.a; + } break; + case LIGHT_FLAGS_BLEND_MODE_MIX: { + color.rgb = mix(color.rgb, light_color.rgb, light_color.a); + } break; + } +} + +#endif + #ifdef USE_NINEPATCH float map_ninepatch_axis(float pixel, float draw_size, float tex_pixel_size, float margin_begin, float margin_end, int np_repeat, inout int draw_center) { @@ -353,7 +469,8 @@ void main() { color *= texture(color_texture, uv); } - bool using_light = false; + uint light_count = (draw_data[draw_data_instance].flags >> uint(FLAGS_LIGHT_COUNT_SHIFT)) & uint(0xF); //max 16 lights + bool using_light = light_count > 0u || directional_light_count > 0u; vec3 normal; @@ -414,11 +531,158 @@ void main() { #endif } + if (normal_used) { + //convert by item transform + normal.xy = mat2(normalize(draw_data[draw_data_instance].world_x), normalize(draw_data[draw_data_instance].world_y)) * normal.xy; + //convert by canvas transform + normal = normalize((canvas_normal_transform * vec4(normal, 0.0)).xyz); + } + + vec4 base_color = color; + #ifdef MODE_LIGHT_ONLY color = vec4(0.0); #else color *= canvas_modulation; #endif +#if !defined(DISABLE_LIGHTING) && !defined(MODE_UNSHADED) + + // Directional Lights + + for (uint i = 0u; i < directional_light_count; i++) { + uint light_base = i; + + vec2 direction = light_array[light_base].position; + vec4 light_color = light_array[light_base].color; + +#ifdef LIGHT_CODE_USED + + vec4 shadow_modulate = vec4(1.0); + light_color = light_compute(light_vertex, vec3(direction, light_array[light_base].height), normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, uv, base_color, true); +#else + + if (normal_used) { + vec3 light_vec = normalize(mix(vec3(direction, 0.0), vec3(0, 0, 1), light_array[light_base].height)); + light_color.rgb = light_normal_compute(light_vec, normal, base_color.rgb, light_color.rgb, specular_shininess, specular_shininess_used); + } else { + light_color.rgb *= base_color.rgb; + } +#endif + + if (bool(light_array[light_base].flags & LIGHT_FLAGS_HAS_SHADOW)) { + vec2 shadow_pos = (vec4(shadow_vertex, 0.0, 1.0) * mat4(light_array[light_base].shadow_matrix[0], light_array[light_base].shadow_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations. + + vec4 shadow_uv = vec4(shadow_pos.x, light_array[light_base].shadow_y_ofs, shadow_pos.y * light_array[light_base].shadow_zfar_inv, 1.0); + + light_color = light_shadow_compute(light_base, light_color, shadow_uv +#ifdef LIGHT_CODE_USED + , + shadow_modulate.rgb +#endif + ); + } + + light_blend_compute(light_base, light_color, color.rgb); + } + + // Positional Lights + + for (uint i = 0u; i < MAX_LIGHTS_PER_ITEM; i++) { + if (i >= light_count) { + break; + } + uint light_base; + if (i < 8u) { + if (i < 4u) { + light_base = draw_data[draw_data_instance].lights[0]; + } else { + light_base = draw_data[draw_data_instance].lights[1]; + } + } else { + if (i < 12u) { + light_base = draw_data[draw_data_instance].lights[2]; + } else { + light_base = draw_data[draw_data_instance].lights[3]; + } + } + light_base >>= (i & 3u) * 8u; + light_base &= uint(0xFF); + + vec2 tex_uv = (vec4(vertex, 0.0, 1.0) * mat4(light_array[light_base].texture_matrix[0], light_array[light_base].texture_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations. + vec2 tex_uv_atlas = tex_uv * light_array[light_base].atlas_rect.zw + light_array[light_base].atlas_rect.xy; + vec4 light_color = textureLod(atlas_texture, tex_uv_atlas, 0.0); + vec4 light_base_color = light_array[light_base].color; + +#ifdef LIGHT_CODE_USED + + vec4 shadow_modulate = vec4(1.0); + vec3 light_position = vec3(light_array[light_base].position, light_array[light_base].height); + + light_color.rgb *= light_base_color.rgb; + light_color = light_compute(light_vertex, light_position, normal, light_color, light_base_color.a, specular_shininess, shadow_modulate, screen_uv, uv, base_color, false); +#else + + light_color.rgb *= light_base_color.rgb * light_base_color.a; + + if (normal_used) { + vec3 light_pos = vec3(light_array[light_base].position, light_array[light_base].height); + vec3 pos = light_vertex; + vec3 light_vec = normalize(light_pos - pos); + + light_color.rgb = light_normal_compute(light_vec, normal, base_color.rgb, light_color.rgb, specular_shininess, specular_shininess_used); + } else { + light_color.rgb *= base_color.rgb; + } +#endif + if (any(lessThan(tex_uv, vec2(0.0, 0.0))) || any(greaterThanEqual(tex_uv, vec2(1.0, 1.0)))) { + //if outside the light texture, light color is zero + light_color.a = 0.0; + } + + if (bool(light_array[light_base].flags & LIGHT_FLAGS_HAS_SHADOW)) { + vec2 shadow_pos = (vec4(shadow_vertex, 0.0, 1.0) * mat4(light_array[light_base].shadow_matrix[0], light_array[light_base].shadow_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations. + + vec2 pos_norm = normalize(shadow_pos); + vec2 pos_abs = abs(pos_norm); + vec2 pos_box = pos_norm / max(pos_abs.x, pos_abs.y); + vec2 pos_rot = pos_norm * mat2(vec2(0.7071067811865476, -0.7071067811865476), vec2(0.7071067811865476, 0.7071067811865476)); //is there a faster way to 45 degrees rot? + float tex_ofs; + float dist; + if (pos_rot.y > 0.0) { + if (pos_rot.x > 0.0) { + tex_ofs = pos_box.y * 0.125 + 0.125; + dist = shadow_pos.x; + } else { + tex_ofs = pos_box.x * -0.125 + (0.25 + 0.125); + dist = shadow_pos.y; + } + } else { + if (pos_rot.x < 0.0) { + tex_ofs = pos_box.y * -0.125 + (0.5 + 0.125); + dist = -shadow_pos.x; + } else { + tex_ofs = pos_box.x * 0.125 + (0.75 + 0.125); + dist = -shadow_pos.y; + } + } + + dist *= light_array[light_base].shadow_zfar_inv; + + //float distance = length(shadow_pos); + vec4 shadow_uv = vec4(tex_ofs, light_array[light_base].shadow_y_ofs, dist, 1.0); + + light_color = light_shadow_compute(light_base, light_color, shadow_uv +#ifdef LIGHT_CODE_USED + , + shadow_modulate.rgb +#endif + ); + } + + light_blend_compute(light_base, light_color, color.rgb); + } +#endif + frag_color = color; } diff --git a/drivers/gles3/shaders/canvas_occlusion.glsl b/drivers/gles3/shaders/canvas_occlusion.glsl new file mode 100644 index 0000000000..512800839a --- /dev/null +++ b/drivers/gles3/shaders/canvas_occlusion.glsl @@ -0,0 +1,68 @@ +/* clang-format off */ +#[modes] + +mode_sdf = +mode_shadow = #define MODE_SHADOW +mode_shadow_RGBA = #define MODE_SHADOW \n#define USE_RGBA_SHADOWS + +#[specializations] + +#[vertex] + +layout(location = 0) in vec3 vertex; + +uniform highp mat4 projection; +uniform highp vec4 modelview1; +uniform highp vec4 modelview2; +uniform highp vec2 direction; +uniform highp float z_far; + +#ifdef MODE_SHADOW +out float depth; +#endif + +void main() { + highp vec4 vtx = vec4(vertex, 1.0) * mat4(modelview1, modelview2, vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); + +#ifdef MODE_SHADOW + depth = dot(direction, vtx.xy); +#endif + gl_Position = projection * vtx; +} + +#[fragment] + + +uniform highp mat4 projection; +uniform highp vec4 modelview1; +uniform highp vec4 modelview2; +uniform highp vec2 direction; +uniform highp float z_far; + +#ifdef MODE_SHADOW +in highp float depth; +#endif + +#ifdef USE_RGBA_SHADOWS +layout(location = 0) out lowp vec4 out_buf; +#else +layout(location = 0) out highp float out_buf; +#endif + +void main() { + float out_depth = 1.0; + +#ifdef MODE_SHADOW + out_depth = depth / z_far; +#endif + +#ifdef USE_RGBA_SHADOWS + out_depth = clamp(out_depth, -1.0, 1.0); + out_depth = out_depth * 0.5 + 0.5; + highp vec4 comp = fract(out_depth * vec4(255.0 * 255.0 * 255.0, 255.0 * 255.0, 255.0, 1.0)); + comp -= comp.xxyz * vec4(0.0, 1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0); + out_buf = comp; +#else + out_buf = out_depth; +#endif +} diff --git a/drivers/gles3/shaders/canvas_sdf.glsl b/drivers/gles3/shaders/canvas_sdf.glsl new file mode 100644 index 0000000000..424ec22457 --- /dev/null +++ b/drivers/gles3/shaders/canvas_sdf.glsl @@ -0,0 +1,205 @@ +/* clang-format off */ +#[modes] + +mode_load = #define MODE_LOAD +mode_load_shrink = #define MODE_LOAD_SHRINK +mode_process = #define MODE_PROCESS +mode_store = #define MODE_STORE +mode_store_shrink = #define MODE_STORE_SHRINK + +#[specializations] + +#[vertex] + +layout(location = 0) in vec2 vertex_attrib; + +/* clang-format on */ + +uniform ivec2 size; +uniform int stride; +uniform int shift; +uniform ivec2 base_size; + +void main() { + gl_Position = vec4(vertex_attrib, 1.0, 1.0); +} + +/* clang-format off */ +#[fragment] + +#define SDF_MAX_LENGTH 16384.0 + +#if defined(MODE_LOAD) || defined(MODE_LOAD_SHRINK) +uniform lowp sampler2D src_pixels;//texunit:0 +#else +uniform highp isampler2D src_process;//texunit:0 +#endif + +uniform ivec2 size; +uniform int stride; +uniform int shift; +uniform ivec2 base_size; + +#if defined(MODE_LOAD) || defined(MODE_LOAD_SHRINK) || defined(MODE_PROCESS) +layout(location = 0) out ivec4 distance_field; +#else +layout(location = 0) out vec4 distance_field; +#endif + +vec4 float_to_vec4(float p_float) { + highp vec4 comp = fract(p_float * vec4(255.0 * 255.0 * 255.0, 255.0 * 255.0, 255.0, 1.0)); + comp -= comp.xxyz * vec4(0.0, 1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0); + return comp; +} + +void main() { + ivec2 pos = ivec2(gl_FragCoord.xy); + +#ifdef MODE_LOAD + + bool solid = texelFetch(src_pixels, pos, 0).r > 0.5; + distance_field = solid ? ivec4(ivec2(-32767), 0, 0) : ivec4(ivec2(32767), 0, 0); +#endif + +#ifdef MODE_LOAD_SHRINK + + int s = 1 << shift; + ivec2 base = pos << shift; + ivec2 center = base + ivec2(shift); + + ivec2 rel = ivec2(32767); + float d = 1e20; + int found = 0; + int solid_found = 0; + for (int i = 0; i < s; i++) { + for (int j = 0; j < s; j++) { + ivec2 src_pos = base + ivec2(i, j); + if (any(greaterThanEqual(src_pos, base_size))) { + continue; + } + bool solid = texelFetch(src_pixels, src_pos, 0).r > 0.5; + if (solid) { + float dist = length(vec2(src_pos - center)); + if (dist < d) { + d = dist; + rel = src_pos; + } + solid_found++; + } + found++; + } + } + + if (solid_found == found) { + //mark solid only if all are solid + rel = ivec2(-32767); + } + + distance_field = ivec4(rel, 0, 0); +#endif + +#ifdef MODE_PROCESS + + ivec2 base = pos << shift; + ivec2 center = base + ivec2(shift); + + ivec2 rel = texelFetch(src_process, pos, 0).xy; + + bool solid = rel.x < 0; + + if (solid) { + rel = -rel - ivec2(1); + } + + if (center != rel) { + //only process if it does not point to itself + const int ofs_table_size = 8; + const ivec2 ofs_table[ofs_table_size] = ivec2[]( + ivec2(-1, -1), + ivec2(0, -1), + ivec2(+1, -1), + + ivec2(-1, 0), + ivec2(+1, 0), + + ivec2(-1, +1), + ivec2(0, +1), + ivec2(+1, +1)); + + float dist = length(vec2(rel - center)); + for (int i = 0; i < ofs_table_size; i++) { + ivec2 src_pos = pos + ofs_table[i] * stride; + if (any(lessThan(src_pos, ivec2(0))) || any(greaterThanEqual(src_pos, size))) { + continue; + } + ivec2 src_rel = texelFetch(src_process, src_pos, 0).xy; + bool src_solid = src_rel.x < 0; + if (src_solid) { + src_rel = -src_rel - ivec2(1); + } + + if (src_solid != solid) { + src_rel = ivec2(src_pos << shift); //point to itself if of different type + } + + float src_dist = length(vec2(src_rel - center)); + if (src_dist < dist) { + dist = src_dist; + rel = src_rel; + } + } + } + + if (solid) { + rel = -rel - ivec2(1); + } + + distance_field = ivec4(rel, 0, 0); +#endif + +#ifdef MODE_STORE + + ivec2 rel = texelFetch(src_process, pos, 0).xy; + + bool solid = rel.x < 0; + + if (solid) { + rel = -rel - ivec2(1); + } + + float d = length(vec2(rel - pos)); + + if (solid) { + d = -d; + } + + d /= SDF_MAX_LENGTH; + d = clamp(d, -1.0, 1.0); + distance_field = float_to_vec4(d*0.5+0.5); + +#endif + +#ifdef MODE_STORE_SHRINK + + ivec2 base = pos << shift; + ivec2 center = base + ivec2(shift); + + ivec2 rel = texelFetch(src_process, pos, 0).xy; + + bool solid = rel.x < 0; + + if (solid) { + rel = -rel - ivec2(1); + } + + float d = length(vec2(rel - center)); + + if (solid) { + d = -d; + } + d /= SDF_MAX_LENGTH; + d = clamp(d, -1.0, 1.0); + distance_field = float_to_vec4(d*0.5+0.5); + +#endif +} diff --git a/drivers/gles3/shaders/canvas_shadow.glsl b/drivers/gles3/shaders/canvas_shadow.glsl deleted file mode 100644 index 94485abd11..0000000000 --- a/drivers/gles3/shaders/canvas_shadow.glsl +++ /dev/null @@ -1,60 +0,0 @@ -/* clang-format off */ -[vertex] - -#ifdef USE_GLES_OVER_GL -#define lowp -#define mediump -#define highp -#else -precision highp float; -precision highp int; -#endif - -layout(location = 0) in highp vec3 vertex; - -uniform highp mat4 projection_matrix; -/* clang-format on */ -uniform highp mat4 light_matrix; -uniform highp mat4 model_matrix; -uniform highp float distance_norm; - -out highp vec4 position_interp; - -void main() { - gl_Position = projection_matrix * (light_matrix * (model_matrix * vec4(vertex, 1.0))); - position_interp = gl_Position; -} - -/* clang-format off */ -[fragment] - -#ifdef USE_GLES_OVER_GL -#define lowp -#define mediump -#define highp -#else -#if defined(USE_HIGHP_PRECISION) -precision highp float; -precision highp int; -#else -precision mediump float; -precision mediump int; -#endif -#endif - -in highp vec4 position_interp; -/* clang-format on */ - -void main() { - highp float depth = ((position_interp.z / position_interp.w) + 1.0) * 0.5 + 0.0; // bias - -#ifdef USE_RGBA_SHADOWS - - highp vec4 comp = fract(depth * vec4(255.0 * 255.0 * 255.0, 255.0 * 255.0, 255.0, 1.0)); - comp -= comp.xxyz * vec4(0.0, 1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0); - frag_color = comp; -#else - - frag_color = vec4(depth); -#endif -} diff --git a/drivers/gles3/shaders/canvas_uniforms_inc.glsl b/drivers/gles3/shaders/canvas_uniforms_inc.glsl index 6b65e09cbf..dd5ebecb1a 100644 --- a/drivers/gles3/shaders/canvas_uniforms_inc.glsl +++ b/drivers/gles3/shaders/canvas_uniforms_inc.glsl @@ -82,6 +82,7 @@ layout(std140) uniform CanvasData { //ubo:0 uint pad2; }; +#ifndef DISABLE_LIGHTING #define LIGHT_FLAGS_BLEND_MASK uint(3 << 16) #define LIGHT_FLAGS_BLEND_MODE_ADD uint(0 << 16) #define LIGHT_FLAGS_BLEND_MODE_SUB uint(1 << 16) @@ -94,6 +95,27 @@ layout(std140) uniform CanvasData { //ubo:0 #define LIGHT_FLAGS_SHADOW_PCF5 uint(1 << 22) #define LIGHT_FLAGS_SHADOW_PCF13 uint(2 << 22) +struct Light { + mat2x4 texture_matrix; //light to texture coordinate matrix (transposed) + mat2x4 shadow_matrix; //light to shadow coordinate matrix (transposed) + vec4 color; + + uint shadow_color; // packed + uint flags; //index to light texture + float shadow_pixel_size; + float height; + + vec2 position; + float shadow_zfar_inv; + float shadow_y_ofs; + + vec4 atlas_rect; +}; + +layout(std140) uniform LightData { //ubo:2 + Light light_array[MAX_LIGHTS]; +}; +#endif // DISABLE_LIGHTING layout(std140) uniform DrawDataInstances { //ubo:3 DrawData draw_data[MAX_DRAW_DATA_INSTANCES]; diff --git a/drivers/gles3/shaders/copy.glsl b/drivers/gles3/shaders/copy.glsl index ca2fc7e36d..796ba79c2e 100644 --- a/drivers/gles3/shaders/copy.glsl +++ b/drivers/gles3/shaders/copy.glsl @@ -2,7 +2,7 @@ #[modes] mode_default = #define MODE_SIMPLE_COPY -mode_copy_section = #define USE_COPY_SECTION +mode_copy_section = #define USE_COPY_SECTION \n#define MODE_SIMPLE_COPY mode_gaussian_blur = #define MODE_GAUSSIAN_BLUR mode_mipmap = #define MODE_MIPMAP mode_simple_color = #define MODE_SIMPLE_COLOR \n#define USE_COPY_SECTION @@ -25,8 +25,7 @@ void main() { gl_Position = vec4(vertex_attrib, 1.0, 1.0); #ifdef USE_COPY_SECTION - gl_Position.xy = (copy_section.xy + (uv_interp.xy * 0.5 + 0.5) * copy_section.zw) * 2.0 - 1.0; - uv_interp = copy_section.xy + uv_interp * copy_section.zw; + gl_Position.xy = (copy_section.xy + uv_interp.xy * copy_section.zw) * 2.0 - 1.0; #endif } diff --git a/drivers/gles3/shaders/cubemap_filter.glsl b/drivers/gles3/shaders/cubemap_filter.glsl index 88464876f1..6fcb23204d 100644 --- a/drivers/gles3/shaders/cubemap_filter.glsl +++ b/drivers/gles3/shaders/cubemap_filter.glsl @@ -31,7 +31,7 @@ uniform samplerCube source_cube; //texunit:0 uniform int face_id; #ifndef MODE_DIRECT_WRITE -uniform int sample_count; +uniform uint sample_count; uniform vec4 sample_directions_mip[MAX_SAMPLE_COUNT]; uniform float weight; #endif @@ -105,7 +105,7 @@ void main() { T[1] = cross(N, T[0]); T[2] = N; - for (int sample_num = 0; sample_num < sample_count; sample_num++) { + for (uint sample_num = 0u; sample_num < sample_count; sample_num++) { vec4 sample_direction_mip = sample_directions_mip[sample_num]; vec3 L = T * sample_direction_mip.xyz; vec3 val = textureLod(source_cube, L, sample_direction_mip.w).rgb; diff --git a/drivers/gles3/shaders/particles.glsl b/drivers/gles3/shaders/particles.glsl new file mode 100644 index 0000000000..f8741a22ab --- /dev/null +++ b/drivers/gles3/shaders/particles.glsl @@ -0,0 +1,501 @@ +/* clang-format off */ +#[modes] + +mode_default = + +#[specializations] + +MODE_3D = false +USERDATA1_USED = false +USERDATA2_USED = false +USERDATA3_USED = false +USERDATA4_USED = false +USERDATA5_USED = false +USERDATA6_USED = false + +#[vertex] + +#define SDF_MAX_LENGTH 16384.0 + +layout(std140) uniform GlobalShaderUniformData { //ubo:1 + vec4 global_shader_uniforms[MAX_GLOBAL_SHADER_UNIFORMS]; +}; + +// This needs to be outside clang-format so the ubo comment is in the right place +#ifdef MATERIAL_UNIFORMS_USED +layout(std140) uniform MaterialUniforms{ //ubo:2 + +#MATERIAL_UNIFORMS + +}; +#endif + +/* clang-format on */ + +#define MAX_ATTRACTORS 32 + +#define ATTRACTOR_TYPE_SPHERE uint(0) +#define ATTRACTOR_TYPE_BOX uint(1) +#define ATTRACTOR_TYPE_VECTOR_FIELD uint(2) + +struct Attractor { + mat4 transform; + vec4 extents; // Extents or radius. w-channel is padding. + + uint type; + float strength; + float attenuation; + float directionality; +}; + +#define MAX_COLLIDERS 32 + +#define COLLIDER_TYPE_SPHERE uint(0) +#define COLLIDER_TYPE_BOX uint(1) +#define COLLIDER_TYPE_SDF uint(2) +#define COLLIDER_TYPE_HEIGHT_FIELD uint(3) +#define COLLIDER_TYPE_2D_SDF uint(4) + +struct Collider { + mat4 transform; + vec4 extents; // Extents or radius. w-channel is padding. + + uint type; + float scale; + float pad0; + float pad1; +}; + +layout(std140) uniform FrameData { //ubo:0 + bool emitting; + uint cycle; + float system_phase; + float prev_system_phase; + + float explosiveness; + float randomness; + float time; + float delta; + + float particle_size; + float pad0; + float pad1; + float pad2; + + uint random_seed; + uint attractor_count; + uint collider_count; + uint frame; + + mat4 emission_transform; + + Attractor attractors[MAX_ATTRACTORS]; + Collider colliders[MAX_COLLIDERS]; +}; + +#define PARTICLE_FLAG_ACTIVE uint(1) +#define PARTICLE_FLAG_STARTED uint(2) +#define PARTICLE_FLAG_TRAILED uint(4) +#define PARTICLE_FRAME_MASK uint(0xFFFF) +#define PARTICLE_FRAME_SHIFT uint(16) + +// ParticleData +layout(location = 0) in highp vec4 color; +layout(location = 1) in highp vec4 velocity_flags; +layout(location = 2) in highp vec4 custom; +layout(location = 3) in highp vec4 xform_1; +layout(location = 4) in highp vec4 xform_2; +#ifdef MODE_3D +layout(location = 5) in highp vec4 xform_3; +#endif +#ifdef USERDATA1_USED +layout(location = 6) in highp vec4 userdata1; +#endif +#ifdef USERDATA2_USED +layout(location = 7) in highp vec4 userdata2; +#endif +#ifdef USERDATA3_USED +layout(location = 8) in highp vec4 userdata3; +#endif +#ifdef USERDATA4_USED +layout(location = 9) in highp vec4 userdata4; +#endif +#ifdef USERDATA5_USED +layout(location = 10) in highp vec4 userdata5; +#endif +#ifdef USERDATA6_USED +layout(location = 11) in highp vec4 userdata6; +#endif + +out highp vec4 out_color; //tfb: +out highp vec4 out_velocity_flags; //tfb: +out highp vec4 out_custom; //tfb: +out highp vec4 out_xform_1; //tfb: +out highp vec4 out_xform_2; //tfb: +#ifdef MODE_3D +out highp vec4 out_xform_3; //tfb:MODE_3D +#endif +#ifdef USERDATA1_USED +out highp vec4 out_userdata1; //tfb:USERDATA1_USED +#endif +#ifdef USERDATA2_USED +out highp vec4 out_userdata2; //tfb:USERDATA2_USED +#endif +#ifdef USERDATA3_USED +out highp vec4 out_userdata3; //tfb:USERDATA3_USED +#endif +#ifdef USERDATA4_USED +out highp vec4 out_userdata4; //tfb:USERDATA4_USED +#endif +#ifdef USERDATA5_USED +out highp vec4 out_userdata5; //tfb:USERDATA5_USED +#endif +#ifdef USERDATA6_USED +out highp vec4 out_userdata6; //tfb:USERDATA6_USED +#endif + +uniform sampler2D height_field_texture; //texunit:0 + +uniform float lifetime; +uniform bool clear; +uniform uint total_particles; +uniform bool use_fractional_delta; + +uint hash(uint x) { + x = ((x >> uint(16)) ^ x) * uint(0x45d9f3b); + x = ((x >> uint(16)) ^ x) * uint(0x45d9f3b); + x = (x >> uint(16)) ^ x; + return x; +} + +vec3 safe_normalize(vec3 direction) { + const float EPSILON = 0.001; + if (length(direction) < EPSILON) { + return vec3(0.0); + } + return normalize(direction); +} + +// Needed whenever 2D sdf texture is read from as it is packed in RGBA8. +float vec4_to_float(vec4 p_vec) { + return dot(p_vec, vec4(1.0 / (255.0 * 255.0 * 255.0), 1.0 / (255.0 * 255.0), 1.0 / 255.0, 1.0)) * 2.0 - 1.0; +} + +#GLOBALS + +void main() { + bool apply_forces = true; + bool apply_velocity = true; + float local_delta = delta; + + float mass = 1.0; + + bool restart = false; + + bool restart_position = false; + bool restart_rotation_scale = false; + bool restart_velocity = false; + bool restart_color = false; + bool restart_custom = false; + + mat4 xform = mat4(1.0); + uint flags = 0u; + + if (clear) { + out_color = vec4(1.0); + out_custom = vec4(0.0); + out_velocity_flags = vec4(0.0); + } else { + out_color = color; + out_velocity_flags = velocity_flags; + out_custom = custom; + xform[0] = xform_1; + xform[1] = xform_2; +#ifdef MODE_3D + xform[2] = xform_3; +#endif + xform = transpose(xform); + flags = floatBitsToUint(velocity_flags.w); + } + + //clear started flag if set + flags &= ~PARTICLE_FLAG_STARTED; + + bool collided = false; + vec3 collision_normal = vec3(0.0); + float collision_depth = 0.0; + + vec3 attractor_force = vec3(0.0); + +#if !defined(DISABLE_VELOCITY) + + if (bool(flags & PARTICLE_FLAG_ACTIVE)) { + xform[3].xyz += out_velocity_flags.xyz * local_delta; + } +#endif + uint index = uint(gl_VertexID); + if (emitting) { + float restart_phase = float(index) / float(total_particles); + + if (randomness > 0.0) { + uint seed = cycle; + if (restart_phase >= system_phase) { + seed -= uint(1); + } + seed *= uint(total_particles); + seed += index; + float random = float(hash(seed) % uint(65536)) / 65536.0; + restart_phase += randomness * random * 1.0 / float(total_particles); + } + + restart_phase *= (1.0 - explosiveness); + + if (system_phase > prev_system_phase) { + // restart_phase >= prev_system_phase is used so particles emit in the first frame they are processed + + if (restart_phase >= prev_system_phase && restart_phase < system_phase) { + restart = true; + if (use_fractional_delta) { + local_delta = (system_phase - restart_phase) * lifetime; + } + } + + } else if (delta > 0.0) { + if (restart_phase >= prev_system_phase) { + restart = true; + if (use_fractional_delta) { + local_delta = (1.0 - restart_phase + system_phase) * lifetime; + } + + } else if (restart_phase < system_phase) { + restart = true; + if (use_fractional_delta) { + local_delta = (system_phase - restart_phase) * lifetime; + } + } + } + + if (restart) { + flags = emitting ? (PARTICLE_FLAG_ACTIVE | PARTICLE_FLAG_STARTED | (cycle << PARTICLE_FRAME_SHIFT)) : 0u; + restart_position = true; + restart_rotation_scale = true; + restart_velocity = true; + restart_color = true; + restart_custom = true; + } + } + + bool particle_active = bool(flags & PARTICLE_FLAG_ACTIVE); + + uint particle_number = (flags >> PARTICLE_FRAME_SHIFT) * uint(total_particles) + index; + + if (restart && particle_active) { +#CODE : START + } + + if (particle_active) { + for (uint i = 0u; i < attractor_count; i++) { + vec3 dir; + float amount; + vec3 rel_vec = xform[3].xyz - attractors[i].transform[3].xyz; + vec3 local_pos = rel_vec * mat3(attractors[i].transform); + + switch (attractors[i].type) { + case ATTRACTOR_TYPE_SPHERE: { + dir = safe_normalize(rel_vec); + float d = length(local_pos) / attractors[i].extents.x; + if (d > 1.0) { + continue; + } + amount = max(0.0, 1.0 - d); + } break; + case ATTRACTOR_TYPE_BOX: { + dir = safe_normalize(rel_vec); + + vec3 abs_pos = abs(local_pos / attractors[i].extents.xyz); + float d = max(abs_pos.x, max(abs_pos.y, abs_pos.z)); + if (d > 1.0) { + continue; + } + amount = max(0.0, 1.0 - d); + + } break; + case ATTRACTOR_TYPE_VECTOR_FIELD: { + } break; + } + amount = pow(amount, attractors[i].attenuation); + dir = safe_normalize(mix(dir, attractors[i].transform[2].xyz, attractors[i].directionality)); + attractor_force -= amount * dir * attractors[i].strength; + } + + float particle_size = particle_size; + +#ifdef USE_COLLISION_SCALE + + particle_size *= dot(vec3(length(xform[0].xyz), length(xform[1].xyz), length(xform[2].xyz)), vec3(0.33333333333)); + +#endif + + if (collider_count == 1u && colliders[0].type == COLLIDER_TYPE_2D_SDF) { + //2D collision + + vec2 pos = xform[3].xy; + vec4 to_sdf_x = colliders[0].transform[0]; + vec4 to_sdf_y = colliders[0].transform[1]; + vec2 sdf_pos = vec2(dot(vec4(pos, 0, 1), to_sdf_x), dot(vec4(pos, 0, 1), to_sdf_y)); + + vec4 sdf_to_screen = vec4(colliders[0].extents.xyz, colliders[0].scale); + + vec2 uv_pos = sdf_pos * sdf_to_screen.xy + sdf_to_screen.zw; + + if (all(greaterThan(uv_pos, vec2(0.0))) && all(lessThan(uv_pos, vec2(1.0)))) { + vec2 pos2 = pos + vec2(0, particle_size); + vec2 sdf_pos2 = vec2(dot(vec4(pos2, 0, 1), to_sdf_x), dot(vec4(pos2, 0, 1), to_sdf_y)); + float sdf_particle_size = distance(sdf_pos, sdf_pos2); + + float d = vec4_to_float(texture(height_field_texture, uv_pos)) * SDF_MAX_LENGTH; + + d -= sdf_particle_size; + + if (d < 0.0) { + const float EPSILON = 0.001; + vec2 n = normalize(vec2( + vec4_to_float(texture(height_field_texture, uv_pos + vec2(EPSILON, 0.0))) - vec4_to_float(texture(height_field_texture, uv_pos - vec2(EPSILON, 0.0))), + vec4_to_float(texture(height_field_texture, uv_pos + vec2(0.0, EPSILON))) - vec4_to_float(texture(height_field_texture, uv_pos - vec2(0.0, EPSILON))))); + + collided = true; + sdf_pos2 = sdf_pos + n * d; + pos2 = vec2(dot(vec4(sdf_pos2, 0, 1), colliders[0].transform[2]), dot(vec4(sdf_pos2, 0, 1), colliders[0].transform[3])); + + n = pos - pos2; + + collision_normal = normalize(vec3(n, 0.0)); + collision_depth = length(n); + } + } + + } else { + for (uint i = 0u; i < collider_count; i++) { + vec3 normal; + float depth; + bool col = false; + + vec3 rel_vec = xform[3].xyz - colliders[i].transform[3].xyz; + vec3 local_pos = rel_vec * mat3(colliders[i].transform); + + switch (colliders[i].type) { + case COLLIDER_TYPE_SPHERE: { + float d = length(rel_vec) - (particle_size + colliders[i].extents.x); + + if (d < 0.0) { + col = true; + depth = -d; + normal = normalize(rel_vec); + } + + } break; + case COLLIDER_TYPE_BOX: { + vec3 abs_pos = abs(local_pos); + vec3 sgn_pos = sign(local_pos); + + if (any(greaterThan(abs_pos, colliders[i].extents.xyz))) { + //point outside box + + vec3 closest = min(abs_pos, colliders[i].extents.xyz); + vec3 rel = abs_pos - closest; + depth = length(rel) - particle_size; + if (depth < 0.0) { + col = true; + normal = mat3(colliders[i].transform) * (normalize(rel) * sgn_pos); + depth = -depth; + } + } else { + //point inside box + vec3 axis_len = colliders[i].extents.xyz - abs_pos; + // there has to be a faster way to do this? + if (all(lessThan(axis_len.xx, axis_len.yz))) { + normal = vec3(1, 0, 0); + } else if (all(lessThan(axis_len.yy, axis_len.xz))) { + normal = vec3(0, 1, 0); + } else { + normal = vec3(0, 0, 1); + } + + col = true; + depth = dot(normal * axis_len, vec3(1)) + particle_size; + normal = mat3(colliders[i].transform) * (normal * sgn_pos); + } + + } break; + case COLLIDER_TYPE_SDF: { + } break; + case COLLIDER_TYPE_HEIGHT_FIELD: { + vec3 local_pos_bottom = local_pos; + local_pos_bottom.y -= particle_size; + + if (any(greaterThan(abs(local_pos_bottom), colliders[i].extents.xyz))) { + continue; + } + const float DELTA = 1.0 / 8192.0; + + vec3 uvw_pos = vec3(local_pos_bottom / colliders[i].extents.xyz) * 0.5 + 0.5; + + float y = 1.0 - texture(height_field_texture, uvw_pos.xz).r; + + if (y > uvw_pos.y) { + //inside heightfield + + vec3 pos1 = (vec3(uvw_pos.x, y, uvw_pos.z) * 2.0 - 1.0) * colliders[i].extents.xyz; + vec3 pos2 = (vec3(uvw_pos.x + DELTA, 1.0 - texture(height_field_texture, uvw_pos.xz + vec2(DELTA, 0)).r, uvw_pos.z) * 2.0 - 1.0) * colliders[i].extents.xyz; + vec3 pos3 = (vec3(uvw_pos.x, 1.0 - texture(height_field_texture, uvw_pos.xz + vec2(0, DELTA)).r, uvw_pos.z + DELTA) * 2.0 - 1.0) * colliders[i].extents.xyz; + + normal = normalize(cross(pos1 - pos2, pos1 - pos3)); + float local_y = (vec3(local_pos / colliders[i].extents.xyz) * 0.5 + 0.5).y; + + col = true; + depth = dot(normal, pos1) - dot(normal, local_pos_bottom); + } + + } break; + } + + if (col) { + if (!collided) { + collided = true; + collision_normal = normal; + collision_depth = depth; + } else { + vec3 c = collision_normal * collision_depth; + c += normal * max(0.0, depth - dot(normal, c)); + collision_normal = normalize(c); + collision_depth = length(c); + } + } + } + } + } + + if (particle_active) { +#CODE : PROCESS + } + + flags &= ~PARTICLE_FLAG_ACTIVE; + if (particle_active) { + flags |= PARTICLE_FLAG_ACTIVE; + } + + xform = transpose(xform); + out_xform_1 = xform[0]; + out_xform_2 = xform[1]; +#ifdef MODE_3D + out_xform_3 = xform[2]; +#endif + out_velocity_flags.w = uintBitsToFloat(flags); +} + +/* clang-format off */ +#[fragment] + +void main() { +} +/* clang-format on */ diff --git a/drivers/gles3/shaders/particles_copy.glsl b/drivers/gles3/shaders/particles_copy.glsl new file mode 100644 index 0000000000..f273cb7b64 --- /dev/null +++ b/drivers/gles3/shaders/particles_copy.glsl @@ -0,0 +1,122 @@ +/* clang-format off */ +#[modes] + +mode_default = + +#[specializations] + +MODE_3D = false + +#[vertex] + +#include "stdlib_inc.glsl" + +// ParticleData +layout(location = 0) in highp vec4 color; +layout(location = 1) in highp vec4 velocity_flags; +layout(location = 2) in highp vec4 custom; +layout(location = 3) in highp vec4 xform_1; +layout(location = 4) in highp vec4 xform_2; +#ifdef MODE_3D +layout(location = 5) in highp vec4 xform_3; +#endif + +/* clang-format on */ +out highp vec4 out_xform_1; //tfb: +out highp vec4 out_xform_2; //tfb: +#ifdef MODE_3D +out highp vec4 out_xform_3; //tfb:MODE_3D +#endif +flat out highp uvec4 instance_color_custom_data; //tfb: + +uniform lowp vec3 sort_direction; +uniform highp float frame_remainder; + +uniform highp vec3 align_up; +uniform highp uint align_mode; + +uniform highp mat4 inv_emission_transform; + +#define TRANSFORM_ALIGN_DISABLED uint(0) +#define TRANSFORM_ALIGN_Z_BILLBOARD uint(1) +#define TRANSFORM_ALIGN_Y_TO_VELOCITY uint(2) +#define TRANSFORM_ALIGN_Z_BILLBOARD_Y_TO_VELOCITY uint(3) + +#define PARTICLE_FLAG_ACTIVE uint(1) + +void main() { + mat4 txform = mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); // zero scale, becomes invisible. + if (bool(floatBitsToUint(velocity_flags.w) & PARTICLE_FLAG_ACTIVE)) { +#ifdef MODE_3D + txform = transpose(mat4(xform_1, xform_2, xform_3, vec4(0.0, 0.0, 0.0, 1.0))); +#else + txform = transpose(mat4(xform_1, xform_2, vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))); +#endif + + switch (align_mode) { + case TRANSFORM_ALIGN_DISABLED: { + } break; //nothing + case TRANSFORM_ALIGN_Z_BILLBOARD: { + mat3 local = mat3(normalize(cross(align_up, sort_direction)), align_up, sort_direction); + local = local * mat3(txform); + txform[0].xyz = local[0]; + txform[1].xyz = local[1]; + txform[2].xyz = local[2]; + + } break; + case TRANSFORM_ALIGN_Y_TO_VELOCITY: { + vec3 v = velocity_flags.xyz; + float s = (length(txform[0]) + length(txform[1]) + length(txform[2])) / 3.0; + if (length(v) > 0.0) { + txform[1].xyz = normalize(v); + } else { + txform[1].xyz = normalize(txform[1].xyz); + } + + txform[0].xyz = normalize(cross(txform[1].xyz, txform[2].xyz)); + txform[2].xyz = vec3(0.0, 0.0, 1.0) * s; + txform[0].xyz *= s; + txform[1].xyz *= s; + } break; + case TRANSFORM_ALIGN_Z_BILLBOARD_Y_TO_VELOCITY: { + vec3 sv = velocity_flags.xyz - sort_direction * dot(sort_direction, velocity_flags.xyz); //screen velocity + float s = (length(txform[0]) + length(txform[1]) + length(txform[2])) / 3.0; + + if (length(sv) == 0.0) { + sv = align_up; + } + + sv = normalize(sv); + + txform[0].xyz = normalize(cross(sv, sort_direction)) * s; + txform[1].xyz = sv * s; + txform[2].xyz = sort_direction * s; + + } break; + } + + txform[3].xyz += velocity_flags.xyz * frame_remainder; + +#ifndef MODE_3D + // In global mode, bring 2D particles to local coordinates + // as they will be drawn with the node position as origin. + txform = inv_emission_transform * txform; +#endif + + txform = transpose(txform); + } + + instance_color_custom_data = uvec4(packHalf2x16(color.xy), packHalf2x16(color.zw), packHalf2x16(custom.xy), packHalf2x16(custom.zw)); + out_xform_1 = txform[0]; + out_xform_2 = txform[1]; +#ifdef MODE_3D + out_xform_3 = txform[2]; +#endif +} + +/* clang-format off */ +#[fragment] + +void main() { +} +/* clang-format on */ diff --git a/drivers/gles3/shaders/scene.glsl b/drivers/gles3/shaders/scene.glsl index efd6036ba9..adb4562750 100644 --- a/drivers/gles3/shaders/scene.glsl +++ b/drivers/gles3/shaders/scene.glsl @@ -16,6 +16,7 @@ DISABLE_LIGHT_OMNI = false DISABLE_LIGHT_SPOT = false DISABLE_FOG = false USE_RADIANCE_MAP = true +USE_MULTIVIEW = false #[vertex] @@ -101,7 +102,7 @@ vec3 oct_to_vec3(vec2 e) { vec3 v = vec3(e.xy, 1.0 - abs(e.x) - abs(e.y)); float t = max(-v.z, 0.0); v.xy += t * -sign(v.xy); - return v; + return normalize(v); } #ifdef USE_INSTANCING @@ -153,6 +154,15 @@ layout(std140) uniform SceneData { // ubo:2 } scene_data; +#ifdef USE_MULTIVIEW +layout(std140) uniform MultiviewData { // ubo:8 + highp mat4 projection_matrix_view[MAX_VIEWS]; + highp mat4 inv_projection_matrix_view[MAX_VIEWS]; + highp vec4 eye_offset[MAX_VIEWS]; +} +multiview_data; +#endif + uniform highp mat4 world_transform; #ifdef USE_LIGHTMAP @@ -187,7 +197,7 @@ out vec3 tangent_interp; out vec3 binormal_interp; #endif -#if defined(MATERIAL_UNIFORMS_USED) +#ifdef MATERIAL_UNIFORMS_USED /* clang-format off */ layout(std140) uniform MaterialUniforms { // ubo:3 @@ -250,8 +260,14 @@ void main() { #if defined(OVERRIDE_POSITION) highp vec4 position; #endif - highp mat4 projection_matrix = scene_data.projection_matrix; - highp mat4 inv_projection_matrix = scene_data.inv_projection_matrix; + +#ifdef USE_MULTIVIEW + mat4 projection_matrix = multiview_data.projection_matrix_view[ViewIndex]; + mat4 inv_projection_matrix = multiview_data.inv_projection_matrix_view[ViewIndex]; +#else + mat4 projection_matrix = scene_data.projection_matrix; + mat4 inv_projection_matrix = scene_data.inv_projection_matrix; +#endif //USE_MULTIVIEW #ifdef USE_INSTANCING vec4 instance_custom = vec4(unpackHalf2x16(instance_color_custom_data.z), unpackHalf2x16(instance_color_custom_data.w)); @@ -339,7 +355,6 @@ void main() { /* clang-format off */ #[fragment] - // Default to SPECULAR_SCHLICK_GGX. #if !defined(SPECULAR_DISABLED) && !defined(SPECULAR_SCHLICK_GGX) && !defined(SPECULAR_TOON) #define SPECULAR_SCHLICK_GGX @@ -351,7 +366,9 @@ void main() { #endif #endif +#ifndef MODE_RENDER_DEPTH #include "tonemap_inc.glsl" +#endif #include "stdlib_inc.glsl" /* texture unit usage, N is max_texture_unity-N @@ -413,7 +430,7 @@ layout(std140) uniform GlobalShaderUniformData { //ubo:1 /* Material Uniforms */ -#if defined(MATERIAL_UNIFORMS_USED) +#ifdef MATERIAL_UNIFORMS_USED /* clang-format off */ layout(std140) uniform MaterialUniforms { // ubo:3 @@ -463,6 +480,15 @@ layout(std140) uniform SceneData { // ubo:2 } scene_data; +#ifdef USE_MULTIVIEW +layout(std140) uniform MultiviewData { // ubo:8 + highp mat4 projection_matrix_view[MAX_VIEWS]; + highp mat4 inv_projection_matrix_view[MAX_VIEWS]; + highp vec4 eye_offset[MAX_VIEWS]; +} +multiview_data; +#endif + /* clang-format off */ #GLOBALS @@ -511,7 +537,7 @@ layout(std140) uniform OmniLightData { // ubo:5 LightData omni_lights[MAX_LIGHT_DATA_STRUCTS]; }; uniform uint omni_light_indices[MAX_FORWARD_LIGHTS]; -uniform int omni_light_count; +uniform uint omni_light_count; #endif #ifndef DISABLE_LIGHT_SPOT @@ -521,7 +547,7 @@ layout(std140) uniform SpotLightData { // ubo:6 LightData spot_lights[MAX_LIGHT_DATA_STRUCTS]; }; uniform uint spot_light_indices[MAX_FORWARD_LIGHTS]; -uniform int spot_light_count; +uniform uint spot_light_count; #endif #ifdef USE_ADDITIVE_LIGHTING @@ -530,8 +556,13 @@ uniform highp samplerCubeShadow positional_shadow; // texunit:-4 #endif // !defined(DISABLE_LIGHT_OMNI) && !defined(DISABLE_LIGHT_SPOT) -uniform highp sampler2D screen_texture; // texunit:-5 +#ifdef USE_MULTIVIEW +uniform highp sampler2DArray depth_buffer; // texunit:-6 +uniform highp sampler2DArray screen_texture; // texunit:-5 +#else uniform highp sampler2D depth_buffer; // texunit:-6 +uniform highp sampler2D screen_texture; // texunit:-5 +#endif uniform highp mat4 world_transform; uniform mediump float opaque_prepass_threshold; @@ -884,7 +915,12 @@ vec4 fog_process(vec3 vertex) { void main() { //lay out everything, whatever is unused is optimized away anyway vec3 vertex = vertex_interp; +#ifdef USE_MULTIVIEW + vec3 view = -normalize(vertex_interp - multiview_data.eye_offset[ViewIndex].xyz); +#else vec3 view = -normalize(vertex_interp); +#endif + highp mat4 model_matrix = world_transform; vec3 albedo = vec3(1.0); vec3 backlight = vec3(0.0); vec4 transmittance_color = vec4(0.0, 0.0, 0.0, 1.0); @@ -1064,7 +1100,7 @@ void main() { ref_vec = mix(ref_vec, normal, roughness * roughness); float horizon = min(1.0 + dot(ref_vec, normal), 1.0); ref_vec = scene_data.radiance_inverse_xform * ref_vec; - specular_light = textureLod(radiance_map, ref_vec, roughness * RADIANCE_MAX_LOD).rgb; + specular_light = textureLod(radiance_map, ref_vec, sqrt(roughness) * RADIANCE_MAX_LOD).rgb; specular_light = srgb_to_linear(specular_light); specular_light *= horizon * horizon; specular_light *= scene_data.ambient_light_color_energy.a; @@ -1096,9 +1132,15 @@ void main() { #if defined(CUSTOM_IRRADIANCE_USED) ambient_light = mix(ambient_light, custom_irradiance.rgb, custom_irradiance.a); #endif // CUSTOM_IRRADIANCE_USED - ambient_light *= albedo.rgb; - ambient_light *= ao; + { +#if defined(AMBIENT_LIGHT_DISABLED) + ambient_light = vec3(0.0, 0.0, 0.0); +#else + ambient_light *= albedo.rgb; + ambient_light *= ao; +#endif // AMBIENT_LIGHT_DISABLED + } // convert ao to direct light ao ao = mix(1.0, ao, ao_light_affect); @@ -1120,7 +1162,7 @@ void main() { float a004 = min(r.x * r.x, exp2(-9.28 * ndotv)) * r.x + r.y; vec2 env = vec2(-1.04, 1.04) * a004 + r.zw; - specular_light *= env.x * f0 + env.y * clamp(50.0 * f0.g, 0.0, 1.0); + specular_light *= env.x * f0 + env.y * clamp(50.0 * f0.g, metallic, 1.0); #endif } @@ -1149,7 +1191,7 @@ void main() { #endif //!DISABLE_LIGHT_DIRECTIONAL #ifndef DISABLE_LIGHT_OMNI - for (int i = 0; i < MAX_FORWARD_LIGHTS; i++) { + for (uint i = 0u; i < MAX_FORWARD_LIGHTS; i++) { if (i >= omni_light_count) { break; } @@ -1172,7 +1214,7 @@ void main() { #endif // !DISABLE_LIGHT_OMNI #ifndef DISABLE_LIGHT_SPOT - for (int i = 0; i < MAX_FORWARD_LIGHTS; i++) { + for (uint i = 0u; i < MAX_FORWARD_LIGHTS; i++) { if (i >= spot_light_count) { break; } diff --git a/drivers/gles3/shaders/skeleton.glsl b/drivers/gles3/shaders/skeleton.glsl new file mode 100644 index 0000000000..a1e3c098f4 --- /dev/null +++ b/drivers/gles3/shaders/skeleton.glsl @@ -0,0 +1,269 @@ +/* clang-format off */ +#[modes] + +mode_base_pass = +mode_blend_pass = #define MODE_BLEND_PASS + +#[specializations] + +MODE_2D = true +USE_BLEND_SHAPES = false +USE_SKELETON = false +USE_NORMAL = false +USE_TANGENT = false +FINAL_PASS = false +USE_EIGHT_WEIGHTS = false + +#[vertex] + +#include "stdlib_inc.glsl" + +#ifdef MODE_2D +#define VFORMAT vec2 +#else +#define VFORMAT vec3 +#endif + +#ifdef FINAL_PASS +#define OFORMAT vec2 +#else +#define OFORMAT uvec2 +#endif + +// These come from the source mesh and the output from previous passes. +layout(location = 0) in highp VFORMAT in_vertex; +#ifdef MODE_BLEND_PASS +#ifdef USE_NORMAL +layout(location = 1) in highp uvec2 in_normal; +#endif +#ifdef USE_TANGENT +layout(location = 2) in highp uvec2 in_tangent; +#endif +#else // MODE_BLEND_PASS +#ifdef USE_NORMAL +layout(location = 1) in highp vec2 in_normal; +#endif +#ifdef USE_TANGENT +layout(location = 2) in highp vec2 in_tangent; +#endif +#endif // MODE_BLEND_PASS + +#ifdef USE_SKELETON +#ifdef USE_EIGHT_WEIGHTS +layout(location = 10) in highp uvec4 in_bone_attrib; +layout(location = 11) in highp uvec4 in_bone_attrib2; +layout(location = 12) in mediump vec4 in_weight_attrib; +layout(location = 13) in mediump vec4 in_weight_attrib2; +#else +layout(location = 10) in highp uvec4 in_bone_attrib; +layout(location = 11) in mediump vec4 in_weight_attrib; +#endif + +uniform mediump sampler2D skeleton_texture; // texunit:0 +#endif + +/* clang-format on */ +#ifdef MODE_BLEND_PASS +layout(location = 3) in highp VFORMAT blend_vertex; +#ifdef USE_NORMAL +layout(location = 4) in highp vec2 blend_normal; +#endif +#ifdef USE_TANGENT +layout(location = 5) in highp vec2 blend_tangent; +#endif +#endif // MODE_BLEND_PASS + +out highp VFORMAT out_vertex; //tfb: + +#ifdef USE_NORMAL +flat out highp OFORMAT out_normal; //tfb:USE_NORMAL +#endif +#ifdef USE_TANGENT +flat out highp OFORMAT out_tangent; //tfb:USE_TANGENT +#endif + +#ifdef USE_BLEND_SHAPES +uniform highp float blend_weight; +uniform lowp float blend_shape_count; +#endif + +vec2 signNotZero(vec2 v) { + return mix(vec2(-1.0), vec2(1.0), greaterThanEqual(v.xy, vec2(0.0))); +} + +vec3 oct_to_vec3(vec2 oct) { + oct = oct * 2.0 - 1.0; + vec3 v = vec3(oct.xy, 1.0 - abs(oct.x) - abs(oct.y)); + if (v.z < 0.0) { + v.xy = (1.0 - abs(v.yx)) * signNotZero(v.xy); + } + return normalize(v); +} + +vec2 vec3_to_oct(vec3 e) { + e /= abs(e.x) + abs(e.y) + abs(e.z); + vec2 oct = e.z >= 0.0f ? e.xy : (vec2(1.0f) - abs(e.yx)) * signNotZero(e.xy); + return oct * 0.5f + 0.5f; +} + +vec4 oct_to_tang(vec2 oct_sign_encoded) { + // Binormal sign encoded in y component + vec2 oct = vec2(oct_sign_encoded.x, abs(oct_sign_encoded.y) * 2.0 - 1.0); + return vec4(oct_to_vec3(oct), sign(oct_sign_encoded.y)); +} + +vec2 tang_to_oct(vec4 base) { + vec2 oct = vec3_to_oct(base.xyz); + // Encode binormal sign in y component + oct.y = oct.y * 0.5f + 0.5f; + oct.y = base.w >= 0.0f ? oct.y : 1.0 - oct.y; + return oct; +} + +// Our original input for normals and tangents is 2 16-bit floats. +// Transform Feedback has to write out 32-bits per channel. +// Octahedral compression requires normalized vectors, but we need to store +// non-normalized vectors until the very end. +// Therefore, we will compress our normals into 16 bits using signed-normalized +// fixed point precision. This works well, because we know that each normal +// is no larger than |1| so we can normalize by dividing by the number of blend +// shapes. +uvec2 vec4_to_vec2(vec4 p_vec) { + return uvec2(packSnorm2x16(p_vec.xy), packSnorm2x16(p_vec.zw)); +} + +vec4 vec2_to_vec4(uvec2 p_vec) { + return vec4(unpackSnorm2x16(p_vec.x), unpackSnorm2x16(p_vec.y)); +} + +void main() { +#ifdef MODE_2D + out_vertex = in_vertex; + +#ifdef USE_BLEND_SHAPES +#ifdef MODE_BLEND_PASS + out_vertex = in_vertex + blend_vertex * blend_weight; +#else + out_vertex = in_vertex * blend_weight; +#endif +#ifdef FINAL_PASS + out_vertex = normalize(out_vertex); +#endif +#endif // USE_BLEND_SHAPES + +#ifdef USE_SKELETON + +#define TEX(m) texelFetch(skeleton_texture, ivec2(m % 256u, m / 256u), 0) +#define GET_BONE_MATRIX(a, b, w) mat2x4(TEX(a), TEX(b)) * w + + uvec4 bones = in_bone_attrib * uvec4(2u); + uvec4 bones_a = bones + uvec4(1u); + + highp mat2x4 m = GET_BONE_MATRIX(bones.x, bones_a.x, in_weight_attrib.x); + m += GET_BONE_MATRIX(bones.y, bones_a.y, in_weight_attrib.y); + m += GET_BONE_MATRIX(bones.z, bones_a.z, in_weight_attrib.z); + m += GET_BONE_MATRIX(bones.w, bones_a.w, in_weight_attrib.w); + + mat4 bone_matrix = mat4(m[0], m[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); + + //reverse order because its transposed + out_vertex = (vec4(out_vertex, 0.0, 1.0) * bone_matrix).xy; +#endif // USE_SKELETON + +#else // MODE_2D + +#ifdef USE_BLEND_SHAPES +#ifdef MODE_BLEND_PASS + out_vertex = in_vertex + blend_vertex * blend_weight; + +#ifdef USE_NORMAL + vec3 normal = vec2_to_vec4(in_normal).xyz * blend_shape_count; + vec3 normal_blend = oct_to_vec3(blend_normal) * blend_weight; +#ifdef FINAL_PASS + out_normal = vec3_to_oct(normalize(normal + normal_blend)); +#else + out_normal = vec4_to_vec2(vec4(normal + normal_blend, 0.0) / blend_shape_count); +#endif +#endif // USE_NORMAL + +#ifdef USE_TANGENT + vec4 tangent = vec2_to_vec4(in_tangent) * blend_shape_count; + vec4 tangent_blend = oct_to_tang(blend_tangent) * blend_weight; +#ifdef FINAL_PASS + out_tangent = tang_to_oct(vec4(normalize(tangent.xyz + tangent_blend.xyz), tangent.w)); +#else + out_tangent = vec4_to_vec2(vec4((tangent.xyz + tangent_blend.xyz) / blend_shape_count, tangent.w)); +#endif +#endif // USE_TANGENT + +#else // MODE_BLEND_PASS + out_vertex = in_vertex * blend_weight; + +#ifdef USE_NORMAL + vec3 normal = oct_to_vec3(in_normal); + out_normal = vec4_to_vec2(vec4(normal * blend_weight / blend_shape_count, 0.0)); +#endif +#ifdef USE_TANGENT + vec4 tangent = oct_to_tang(in_tangent); + out_tangent = vec4_to_vec2(vec4(tangent.rgb * blend_weight / blend_shape_count, tangent.w)); +#endif +#endif // MODE_BLEND_PASS +#else // USE_BLEND_SHAPES + + // Make attributes available to the skeleton shader if not written by blend shapes. + out_vertex = in_vertex; +#ifdef USE_NORMAL + out_normal = in_normal; +#endif +#ifdef USE_TANGENT + out_tangent = in_tangent; +#endif +#endif // USE_BLEND_SHAPES + +#ifdef USE_SKELETON + +#define TEX(m) texelFetch(skeleton_texture, ivec2(m % 256u, m / 256u), 0) +#define GET_BONE_MATRIX(a, b, c, w) mat4(TEX(a), TEX(b), TEX(c), vec4(0.0, 0.0, 0.0, 1.0)) * w + + uvec4 bones = in_bone_attrib * uvec4(3); + uvec4 bones_a = bones + uvec4(1); + uvec4 bones_b = bones + uvec4(2); + + highp mat4 m; + m = GET_BONE_MATRIX(bones.x, bones_a.x, bones_b.x, in_weight_attrib.x); + m += GET_BONE_MATRIX(bones.y, bones_a.y, bones_b.y, in_weight_attrib.y); + m += GET_BONE_MATRIX(bones.z, bones_a.z, bones_b.z, in_weight_attrib.z); + m += GET_BONE_MATRIX(bones.w, bones_a.w, bones_b.w, in_weight_attrib.w); + +#ifdef USE_EIGHT_WEIGHTS + bones = in_bone_attrib2 * uvec4(3); + bones_a = bones + uvec4(1); + bones_b = bones + uvec4(2); + + m += GET_BONE_MATRIX(bones.x, bones_a.x, bones_b.x, in_weight_attrib2.x); + m += GET_BONE_MATRIX(bones.y, bones_a.y, bones_b.y, in_weight_attrib2.y); + m += GET_BONE_MATRIX(bones.z, bones_a.z, bones_b.z, in_weight_attrib2.z); + m += GET_BONE_MATRIX(bones.w, bones_a.w, bones_b.w, in_weight_attrib2.w); +#endif + + // Reverse order because its transposed. + out_vertex = (vec4(out_vertex, 1.0) * m).xyz; +#ifdef USE_NORMAL + vec3 vertex_normal = oct_to_vec3(out_normal); + out_normal = vec3_to_oct(normalize((vec4(vertex_normal, 0.0) * m).xyz)); +#endif // USE_NORMAL +#ifdef USE_TANGENT + vec4 vertex_tangent = oct_to_tang(out_tangent); + out_tangent = tang_to_oct(vec4(normalize((vec4(vertex_tangent.xyz, 0.0) * m).xyz), vertex_tangent.w)); +#endif // USE_TANGENT +#endif // USE_SKELETON +#endif // MODE_2D +} + +/* clang-format off */ +#[fragment] + +void main() { + +} +/* clang-format on */ diff --git a/drivers/gles3/shaders/stdlib_inc.glsl b/drivers/gles3/shaders/stdlib_inc.glsl index d5051760d7..d819940b1d 100644 --- a/drivers/gles3/shaders/stdlib_inc.glsl +++ b/drivers/gles3/shaders/stdlib_inc.glsl @@ -38,7 +38,6 @@ vec2 unpackSnorm2x16(uint p) { vec2 v = vec2(float(p & uint(0xffff)), float(p >> uint(16))); return clamp((v - 32767.0) * vec2(0.00003051851), vec2(-1.0), vec2(1.0)); } -#endif uint packUnorm4x8(vec4 v) { uvec4 uv = uvec4(round(clamp(v, vec4(0.0), vec4(1.0)) * 255.0)); @@ -58,3 +57,5 @@ vec4 unpackSnorm4x8(uint p) { vec4 v = vec4(float(p & uint(0xff)), float((p >> uint(8)) & uint(0xff)), float((p >> uint(16)) & uint(0xff)), float(p >> uint(24))); return clamp((v - vec4(127.0)) * vec4(0.00787401574), vec4(-1.0), vec4(1.0)); } + +#endif diff --git a/drivers/gles3/shaders/tonemap.glsl b/drivers/gles3/shaders/tonemap.glsl index a478cf9170..0b769e77f2 100644 --- a/drivers/gles3/shaders/tonemap.glsl +++ b/drivers/gles3/shaders/tonemap.glsl @@ -44,7 +44,11 @@ in vec2 uv_interp; layout(location = 0) out vec4 frag_color; +#ifdef USE_MULTIVIEW +uniform highp sampler2DArray source; //texunit:0 +#else uniform highp sampler2D source; //texunit:0 +#endif #if defined(USE_GLOW_LEVEL1) || defined(USE_GLOW_LEVEL2) || defined(USE_GLOW_LEVEL3) || defined(USE_GLOW_LEVEL4) || defined(USE_GLOW_LEVEL5) || defined(USE_GLOW_LEVEL6) || defined(USE_GLOW_LEVEL7) #define USING_GLOW // only use glow when at least one glow level is selected @@ -191,10 +195,17 @@ vec3 apply_fxaa(vec3 color, vec2 uv_interp, vec2 pixel_size) { const float FXAA_REDUCE_MUL = (1.0 / 8.0); const float FXAA_SPAN_MAX = 8.0; +#ifdef USE_MULTIVIEW + vec3 rgbNW = textureLod(source, vec3(uv_interp + vec2(-1.0, -1.0) * pixel_size, ViewIndex), 0.0).xyz; + vec3 rgbNE = textureLod(source, vec3(uv_interp + vec2(1.0, -1.0) * pixel_size, ViewIndex), 0.0).xyz; + vec3 rgbSW = textureLod(source, vec3(uv_interp + vec2(-1.0, 1.0) * pixel_size, ViewIndex), 0.0).xyz; + vec3 rgbSE = textureLod(source, vec3(uv_interp + vec2(1.0, 1.0) * pixel_size, ViewIndex), 0.0).xyz; +#else vec3 rgbNW = textureLod(source, uv_interp + vec2(-1.0, -1.0) * pixel_size, 0.0).xyz; vec3 rgbNE = textureLod(source, uv_interp + vec2(1.0, -1.0) * pixel_size, 0.0).xyz; vec3 rgbSW = textureLod(source, uv_interp + vec2(-1.0, 1.0) * pixel_size, 0.0).xyz; vec3 rgbSE = textureLod(source, uv_interp + vec2(1.0, 1.0) * pixel_size, 0.0).xyz; +#endif vec3 rgbM = color; vec3 luma = vec3(0.299, 0.587, 0.114); float lumaNW = dot(rgbNW, luma); @@ -219,8 +230,13 @@ vec3 apply_fxaa(vec3 color, vec2 uv_interp, vec2 pixel_size) { dir * rcpDirMin)) * pixel_size; +#ifdef USE_MULTIVIEW + vec3 rgbA = 0.5 * (textureLod(source, vec3(uv_interp + dir * (1.0 / 3.0 - 0.5), ViewIndex), 0.0).xyz + textureLod(source, vec3(uv_interp + dir * (2.0 / 3.0 - 0.5), ViewIndex), 0.0).xyz); + vec3 rgbB = rgbA * 0.5 + 0.25 * (textureLod(source, vec3(uv_interp + dir * -0.5, ViewIndex), 0.0).xyz + textureLod(source, vec3(uv_interp + dir * 0.5, ViewIndex), 0.0).xyz); +#else vec3 rgbA = 0.5 * (textureLod(source, uv_interp + dir * (1.0 / 3.0 - 0.5), 0.0).xyz + textureLod(source, uv_interp + dir * (2.0 / 3.0 - 0.5), 0.0).xyz); vec3 rgbB = rgbA * 0.5 + 0.25 * (textureLod(source, uv_interp + dir * -0.5, 0.0).xyz + textureLod(source, uv_interp + dir * 0.5, 0.0).xyz); +#endif float lumaB = dot(rgbB, luma); if ((lumaB < lumaMin) || (lumaB > lumaMax)) { @@ -231,7 +247,11 @@ vec3 apply_fxaa(vec3 color, vec2 uv_interp, vec2 pixel_size) { } void main() { +#ifdef USE_MULTIVIEW + vec4 color = textureLod(source, vec3(uv_interp, ViewIndex), 0.0); +#else vec4 color = textureLod(source, uv_interp, 0.0); +#endif #ifdef USE_FXAA color.rgb = apply_fxaa(color.rgb, uv_interp, pixel_size); diff --git a/drivers/gles3/storage/config.cpp b/drivers/gles3/storage/config.cpp index 242c1ce0a9..9b496c0999 100644 --- a/drivers/gles3/storage/config.cpp +++ b/drivers/gles3/storage/config.cpp @@ -34,6 +34,15 @@ #include "core/config/project_settings.h" #include "core/templates/vector.h" +#ifdef ANDROID_ENABLED +#include <GLES3/gl3.h> +#include <GLES3/gl3ext.h> +#include <GLES3/gl3platform.h> + +#include <EGL/egl.h> +#include <EGL/eglext.h> +#endif + using namespace GLES3; #define _GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT 0x84FF @@ -62,7 +71,7 @@ Config::Config() { s3tc_supported = true; rgtc_supported = true; //RGTC - core since OpenGL version 3.0 #else - float_texture_supported = extensions.has("GL_ARB_texture_float") || extensions.has("GL_OES_texture_float"); + float_texture_supported = extensions.has("GL_EXT_color_buffer_float"); etc2_supported = true; #if defined(ANDROID_ENABLED) || defined(IOS_ENABLED) // Some Android devices report support for S3TC but we don't expect that and don't export the textures. @@ -75,28 +84,29 @@ Config::Config() { rgtc_supported = extensions.has("GL_EXT_texture_compression_rgtc") || extensions.has("GL_ARB_texture_compression_rgtc") || extensions.has("EXT_texture_compression_rgtc"); #endif -#ifdef GLES_OVER_GL - use_rgba_2d_shadows = false; -#else - use_rgba_2d_shadows = !(float_texture_supported && extensions.has("GL_EXT_texture_rg")); -#endif - glGetIntegerv(GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, &max_vertex_texture_image_units); glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS, &max_texture_image_units); glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size); glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &max_uniform_buffer_size); + glGetIntegerv(GL_MAX_VIEWPORT_DIMS, max_viewport_size); glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_offset_alignment); - // the use skeleton software path should be used if either float texture is not supported, - // OR max_vertex_texture_image_units is zero - use_skeleton_software = (float_texture_supported == false) || (max_vertex_texture_image_units == 0); - support_anisotropic_filter = extensions.has("GL_EXT_texture_filter_anisotropic"); if (support_anisotropic_filter) { glGetFloatv(_GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &anisotropic_level); - anisotropic_level = MIN(float(1 << int(ProjectSettings::get_singleton()->get("rendering/textures/default_filters/anisotropic_filtering_level"))), anisotropic_level); + anisotropic_level = MIN(float(1 << int(GLOBAL_GET("rendering/textures/default_filters/anisotropic_filtering_level"))), anisotropic_level); + } + + multiview_supported = extensions.has("GL_OVR_multiview2") || extensions.has("GL_OVR_multiview"); +#ifdef ANDROID_ENABLED + if (multiview_supported) { + eglFramebufferTextureMultiviewOVR = (PFNGLFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC)eglGetProcAddress("glFramebufferTextureMultiviewOVR"); + if (eglFramebufferTextureMultiviewOVR == nullptr) { + multiview_supported = false; + } } +#endif force_vertex_shading = false; //GLOBAL_GET("rendering/quality/shading/force_vertex_shading"); use_nearest_mip_filter = GLOBAL_GET("rendering/textures/default_filters/use_nearest_mipmap_filter"); diff --git a/drivers/gles3/storage/config.h b/drivers/gles3/storage/config.h index fe18345775..87202fde84 100644 --- a/drivers/gles3/storage/config.h +++ b/drivers/gles3/storage/config.h @@ -44,6 +44,10 @@ #include OPENGL_INCLUDE_H #endif +#ifdef ANDROID_ENABLED +typedef void (*PFNGLFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC)(GLenum, GLenum, GLuint, GLint, GLint, GLsizei); +#endif + namespace GLES3 { class Config { @@ -52,13 +56,12 @@ private: public: bool use_nearest_mip_filter = false; - bool use_skeleton_software = false; bool use_depth_prepass = true; - bool use_rgba_2d_shadows = false; int max_vertex_texture_image_units = 0; int max_texture_image_units = 0; int max_texture_size = 0; + int max_viewport_size[2] = { 0, 0 }; int max_uniform_buffer_size = 0; int max_renderable_elements = 0; int max_renderable_lights = 0; @@ -82,6 +85,11 @@ public: bool support_anisotropic_filter = false; float anisotropic_level = 0.0f; + bool multiview_supported = false; +#ifdef ANDROID_ENABLED + PFNGLFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC eglFramebufferTextureMultiviewOVR = nullptr; +#endif + static Config *get_singleton() { return singleton; }; Config(); diff --git a/drivers/gles3/storage/material_storage.cpp b/drivers/gles3/storage/material_storage.cpp index 8e6009c943..d413c2b00e 100644 --- a/drivers/gles3/storage/material_storage.cpp +++ b/drivers/gles3/storage/material_storage.cpp @@ -34,6 +34,7 @@ #include "config.h" #include "material_storage.h" +#include "particles_storage.h" #include "texture_storage.h" #include "drivers/gles3/rasterizer_canvas_gles3.h" @@ -89,7 +90,7 @@ _FORCE_INLINE_ static void _fill_std140_variant_ubo_value(ShaderLanguage::DataTy gui[j + 3] = 0; // ignored } } else { - int v = value; + uint32_t v = value; gui[0] = v & 1 ? 1 : 0; gui[1] = v & 2 ? 1 : 0; } @@ -116,7 +117,7 @@ _FORCE_INLINE_ static void _fill_std140_variant_ubo_value(ShaderLanguage::DataTy gui[j + 3] = 0; // ignored } } else { - int v = value; + uint32_t v = value; gui[0] = (v & 1) ? 1 : 0; gui[1] = (v & 2) ? 1 : 0; gui[2] = (v & 4) ? 1 : 0; @@ -145,7 +146,7 @@ _FORCE_INLINE_ static void _fill_std140_variant_ubo_value(ShaderLanguage::DataTy } } } else { - int v = value; + uint32_t v = value; gui[0] = (v & 1) ? 1 : 0; gui[1] = (v & 2) ? 1 : 0; gui[2] = (v & 4) ? 1 : 0; @@ -728,7 +729,7 @@ _FORCE_INLINE_ static void _fill_std140_ubo_value(ShaderLanguage::DataType type, switch (type) { case ShaderLanguage::TYPE_BOOL: { uint32_t *gui = (uint32_t *)data; - *gui = value[0].boolean ? 1 : 0; + gui[0] = value[0].boolean ? 1 : 0; } break; case ShaderLanguage::TYPE_BVEC2: { uint32_t *gui = (uint32_t *)data; @@ -897,7 +898,9 @@ _FORCE_INLINE_ static void _fill_std140_ubo_empty(ShaderLanguage::DataType type, case ShaderLanguage::TYPE_BVEC3: case ShaderLanguage::TYPE_IVEC3: case ShaderLanguage::TYPE_UVEC3: - case ShaderLanguage::TYPE_VEC3: + case ShaderLanguage::TYPE_VEC3: { + memset(data, 0, 12 * p_array_size); + } break; case ShaderLanguage::TYPE_BVEC4: case ShaderLanguage::TYPE_IVEC4: case ShaderLanguage::TYPE_UVEC4: @@ -1340,13 +1343,13 @@ MaterialStorage::MaterialStorage() { shader_data_request_func[RS::SHADER_SPATIAL] = _create_scene_shader_func; shader_data_request_func[RS::SHADER_CANVAS_ITEM] = _create_canvas_shader_func; - shader_data_request_func[RS::SHADER_PARTICLES] = nullptr; + shader_data_request_func[RS::SHADER_PARTICLES] = _create_particles_shader_func; shader_data_request_func[RS::SHADER_SKY] = _create_sky_shader_func; shader_data_request_func[RS::SHADER_FOG] = nullptr; material_data_request_func[RS::SHADER_SPATIAL] = _create_scene_material_func; material_data_request_func[RS::SHADER_CANVAS_ITEM] = _create_canvas_material_func; - material_data_request_func[RS::SHADER_PARTICLES] = nullptr; + material_data_request_func[RS::SHADER_PARTICLES] = _create_particles_material_func; material_data_request_func[RS::SHADER_SKY] = _create_sky_material_func; material_data_request_func[RS::SHADER_FOG] = nullptr; @@ -1611,32 +1614,32 @@ MaterialStorage::MaterialStorage() { { // Setup Particles compiler - /* -ShaderCompiler::DefaultIdentifierActions actions; - actions.renames["COLOR"] = "PARTICLE.color"; - actions.renames["VELOCITY"] = "PARTICLE.velocity"; + ShaderCompiler::DefaultIdentifierActions actions; + + actions.renames["COLOR"] = "out_color"; + actions.renames["VELOCITY"] = "out_velocity_flags.xyz"; //actions.renames["MASS"] = "mass"; ? actions.renames["ACTIVE"] = "particle_active"; actions.renames["RESTART"] = "restart"; - actions.renames["CUSTOM"] = "PARTICLE.custom"; - for (int i = 0; i < ParticlesShader::MAX_USERDATAS; i++) { + actions.renames["CUSTOM"] = "out_custom"; + for (int i = 0; i < PARTICLES_MAX_USERDATAS; i++) { String udname = "USERDATA" + itos(i + 1); - actions.renames[udname] = "PARTICLE.userdata" + itos(i + 1); + actions.renames[udname] = "out_userdata" + itos(i + 1); actions.usage_defines[udname] = "#define USERDATA" + itos(i + 1) + "_USED\n"; } - actions.renames["TRANSFORM"] = "PARTICLE.xform"; - actions.renames["TIME"] = "frame_history.data[0].time"; + actions.renames["TRANSFORM"] = "xform"; + actions.renames["TIME"] = "time"; actions.renames["PI"] = _MKSTR(Math_PI); actions.renames["TAU"] = _MKSTR(Math_TAU); actions.renames["E"] = _MKSTR(Math_E); - actions.renames["LIFETIME"] = "params.lifetime"; + actions.renames["LIFETIME"] = "lifetime"; actions.renames["DELTA"] = "local_delta"; actions.renames["NUMBER"] = "particle_number"; actions.renames["INDEX"] = "index"; //actions.renames["GRAVITY"] = "current_gravity"; - actions.renames["EMISSION_TRANSFORM"] = "FRAME.emission_transform"; - actions.renames["RANDOM_SEED"] = "FRAME.random_seed"; + actions.renames["EMISSION_TRANSFORM"] = "emission_transform"; + actions.renames["RANDOM_SEED"] = "random_seed"; actions.renames["FLAG_EMIT_POSITION"] = "EMISSION_FLAG_HAS_POSITION"; actions.renames["FLAG_EMIT_ROT_SCALE"] = "EMISSION_FLAG_HAS_ROTATION_SCALE"; actions.renames["FLAG_EMIT_VELOCITY"] = "EMISSION_FLAG_HAS_VELOCITY"; @@ -1658,18 +1661,10 @@ ShaderCompiler::DefaultIdentifierActions actions; actions.render_mode_defines["keep_data"] = "#define ENABLE_KEEP_DATA\n"; actions.render_mode_defines["collision_use_scale"] = "#define USE_COLLISION_SCALE\n"; - actions.sampler_array_name = "material_samplers"; - actions.base_texture_binding_index = 1; - actions.texture_layout_set = 3; - actions.base_uniform_string = "material."; - actions.base_varying_index = 10; - actions.default_filter = ShaderLanguage::FILTER_LINEAR_MIPMAP; actions.default_repeat = ShaderLanguage::REPEAT_ENABLE; - actions.global_buffer_array_variable = "global_shader_uniforms.data"; - particles_shader.compiler.initialize(actions); - */ + shaders.compiler_particles.initialize(actions); } { @@ -2212,7 +2207,7 @@ void MaterialStorage::global_shader_parameters_load_settings(bool p_load_texture for (const PropertyInfo &E : settings) { if (E.name.begins_with("shader_globals/")) { StringName name = E.name.get_slice("/", 1); - Dictionary d = ProjectSettings::get_singleton()->get(E.name); + Dictionary d = GLOBAL_GET(E.name); ERR_CONTINUE(!d.has("type")); ERR_CONTINUE(!d.has("value")); @@ -2312,7 +2307,7 @@ void MaterialStorage::global_shader_parameters_instance_free(RID p_instance) { global_shader_uniforms.instance_buffer_pos.erase(p_instance); } -void MaterialStorage::global_shader_parameters_instance_update(RID p_instance, int p_index, const Variant &p_value) { +void MaterialStorage::global_shader_parameters_instance_update(RID p_instance, int p_index, const Variant &p_value, int p_flags_count) { if (!global_shader_uniforms.instance_buffer_pos.has(p_instance)) { return; //just not allocated, ignore } @@ -2322,7 +2317,9 @@ void MaterialStorage::global_shader_parameters_instance_update(RID p_instance, i return; //again, not allocated, ignore } ERR_FAIL_INDEX(p_index, ShaderLanguage::MAX_INSTANCE_UNIFORM_INDICES); - ERR_FAIL_COND_MSG(p_value.get_type() > Variant::COLOR, "Unsupported variant type for instance parameter: " + Variant::get_type_name(p_value.get_type())); //anything greater not supported + + Variant::Type value_type = p_value.get_type(); + ERR_FAIL_COND_MSG(p_value.get_type() > Variant::COLOR, "Unsupported variant type for instance parameter: " + Variant::get_type_name(value_type)); //anything greater not supported ShaderLanguage::DataType datatype_from_value[Variant::COLOR + 1] = { ShaderLanguage::TYPE_MAX, //nil @@ -2348,9 +2345,24 @@ void MaterialStorage::global_shader_parameters_instance_update(RID p_instance, i ShaderLanguage::TYPE_VEC4 //color }; - ShaderLanguage::DataType datatype = datatype_from_value[p_value.get_type()]; + ShaderLanguage::DataType datatype = ShaderLanguage::TYPE_MAX; + if (value_type == Variant::INT && p_flags_count > 0) { + switch (p_flags_count) { + case 1: + datatype = ShaderLanguage::TYPE_BVEC2; + break; + case 2: + datatype = ShaderLanguage::TYPE_BVEC3; + break; + case 3: + datatype = ShaderLanguage::TYPE_BVEC4; + break; + } + } else { + datatype = datatype_from_value[value_type]; + } - ERR_FAIL_COND_MSG(datatype == ShaderLanguage::TYPE_MAX, "Unsupported variant type for instance parameter: " + Variant::get_type_name(p_value.get_type())); //anything greater not supported + ERR_FAIL_COND_MSG(datatype == ShaderLanguage::TYPE_MAX, "Unsupported variant type for instance parameter: " + Variant::get_type_name(value_type)); //anything greater not supported pos += p_index; @@ -2451,8 +2463,8 @@ void MaterialStorage::shader_set_code(RID p_shader, const String &p_code) { RS::ShaderMode new_mode; if (mode_string == "canvas_item") { new_mode = RS::SHADER_CANVAS_ITEM; - //} else if (mode_string == "particles") { - // new_mode = RS::SHADER_PARTICLES; + } else if (mode_string == "particles") { + new_mode = RS::SHADER_PARTICLES; } else if (mode_string == "spatial") { new_mode = RS::SHADER_SPATIAL; } else if (mode_string == "sky") { @@ -2523,6 +2535,9 @@ void MaterialStorage::shader_set_path_hint(RID p_shader, const String &p_path) { ERR_FAIL_COND(!shader); shader->path_hint = p_path; + if (shader->data) { + shader->data->set_path_hint(p_path); + } } String MaterialStorage::shader_get_code(RID p_shader) const { @@ -2790,6 +2805,10 @@ void MaterialStorage::material_update_dependency(RID p_material, DependencyTrack /* Canvas Shader Data */ +void CanvasShaderData::set_path_hint(const String &p_path) { + path = p_path; +} + void CanvasShaderData::set_code(const String &p_code) { // compile the shader @@ -2988,7 +3007,7 @@ GLES3::ShaderData *GLES3::_create_canvas_shader_func() { } void CanvasMaterialData::update_parameters(const HashMap<StringName, Variant> &p_parameters, bool p_uniform_dirty, bool p_textures_dirty) { - return update_parameters_internal(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size); + update_parameters_internal(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size); } void CanvasMaterialData::bind_uniforms() { @@ -3024,6 +3043,10 @@ GLES3::MaterialData *GLES3::_create_canvas_material_func(ShaderData *p_shader) { //////////////////////////////////////////////////////////////////////////////// // SKY SHADER +void SkyShaderData::set_path_hint(const String &p_path) { + path = p_path; +} + void SkyShaderData::set_code(const String &p_code) { //compile @@ -3232,7 +3255,7 @@ GLES3::ShaderData *GLES3::_create_sky_shader_func() { void SkyMaterialData::update_parameters(const HashMap<StringName, Variant> &p_parameters, bool p_uniform_dirty, bool p_textures_dirty) { uniform_set_updated = true; - return update_parameters_internal(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size); + update_parameters_internal(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size); } SkyMaterialData::~SkyMaterialData() { @@ -3267,6 +3290,10 @@ void SkyMaterialData::bind_uniforms() { //////////////////////////////////////////////////////////////////////////////// // Scene SHADER +void SceneShaderData::set_path_hint(const String &p_path) { + path = p_path; +} + void SceneShaderData::set_code(const String &p_code) { //compile @@ -3339,6 +3366,9 @@ void SceneShaderData::set_code(const String &p_code) { actions.usage_flag_pointers["ALPHA"] = &uses_alpha; actions.usage_flag_pointers["ALPHA_SCISSOR_THRESHOLD"] = &uses_alpha_clip; + // Use alpha clip pipeline for alpha hash/dither. + // This prevents sorting issues inherent to alpha blending and allows such materials to cast shadows. + actions.usage_flag_pointers["ALPHA_HASH_SCALE"] = &uses_alpha_clip; actions.render_mode_flags["depth_prepass_alpha"] = &uses_depth_pre_pass; actions.usage_flag_pointers["SSS_STRENGTH"] = &uses_sss; @@ -3399,6 +3429,8 @@ void SceneShaderData::set_code(const String &p_code) { vertex_input_mask |= uses_bones << 9; vertex_input_mask |= uses_weights << 10; uses_screen_texture_mipmaps = gen_code.uses_screen_texture_mipmaps; + uses_vertex_time = gen_code.uses_vertex_time; + uses_fragment_time = gen_code.uses_fragment_time; #if 0 print_line("**compiling shader:"); @@ -3519,11 +3551,15 @@ bool SceneShaderData::is_parameter_texture(const StringName &p_param) const { } bool SceneShaderData::is_animated() const { - return false; + return (uses_fragment_time && uses_discard) || (uses_vertex_time && uses_vertex); } bool SceneShaderData::casts_shadows() const { - return false; + bool has_read_screen_alpha = uses_screen_texture || uses_depth_texture || uses_normal_texture; + bool has_base_alpha = (uses_alpha && !uses_alpha_clip) || has_read_screen_alpha; + bool has_alpha = has_base_alpha || uses_blend_alpha; + + return !has_alpha || (uses_depth_pre_pass && !(depth_draw == DEPTH_DRAW_DISABLED || depth_test == DEPTH_TEST_DISABLED)); } Variant SceneShaderData::get_default_parameter(const StringName &p_parameter) const { @@ -3564,7 +3600,7 @@ void SceneMaterialData::set_next_pass(RID p_pass) { } void SceneMaterialData::update_parameters(const HashMap<StringName, Variant> &p_parameters, bool p_uniform_dirty, bool p_textures_dirty) { - return update_parameters_internal(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size); + update_parameters_internal(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size); } SceneMaterialData::~SceneMaterialData() { @@ -3597,4 +3633,209 @@ void SceneMaterialData::bind_uniforms() { } } +/* Particles SHADER */ + +void ParticlesShaderData::set_path_hint(const String &p_path) { + path = p_path; +} + +void ParticlesShaderData::set_code(const String &p_code) { + //compile + + code = p_code; + valid = false; + ubo_size = 0; + uniforms.clear(); + uses_collision = false; + + if (code.is_empty()) { + return; //just invalid, but no error + } + + ShaderCompiler::GeneratedCode gen_code; + ShaderCompiler::IdentifierActions actions; + actions.entry_point_stages["start"] = ShaderCompiler::STAGE_VERTEX; + actions.entry_point_stages["process"] = ShaderCompiler::STAGE_VERTEX; + + actions.usage_flag_pointers["COLLIDED"] = &uses_collision; + + userdata_count = 0; + for (uint32_t i = 0; i < PARTICLES_MAX_USERDATAS; i++) { + userdatas_used[i] = false; + actions.usage_flag_pointers["USERDATA" + itos(i + 1)] = &userdatas_used[i]; + } + + actions.uniforms = &uniforms; + + Error err = MaterialStorage::get_singleton()->shaders.compiler_particles.compile(RS::SHADER_PARTICLES, code, &actions, path, gen_code); + ERR_FAIL_COND_MSG(err != OK, "Shader compilation failed."); + + if (version.is_null()) { + version = MaterialStorage::get_singleton()->shaders.particles_process_shader.version_create(); + } + + for (uint32_t i = 0; i < PARTICLES_MAX_USERDATAS; i++) { + if (userdatas_used[i]) { + userdata_count++; + } + } + + Vector<StringName> texture_uniform_names; + for (int i = 0; i < gen_code.texture_uniforms.size(); i++) { + texture_uniform_names.push_back(gen_code.texture_uniforms[i].name); + } + + MaterialStorage::get_singleton()->shaders.particles_process_shader.version_set_code(version, gen_code.code, gen_code.uniforms, gen_code.stage_globals[ShaderCompiler::STAGE_VERTEX], gen_code.stage_globals[ShaderCompiler::STAGE_FRAGMENT], gen_code.defines, texture_uniform_names); + ERR_FAIL_COND(!MaterialStorage::get_singleton()->shaders.particles_process_shader.version_is_valid(version)); + + ubo_size = gen_code.uniform_total_size; + ubo_offsets = gen_code.uniform_offsets; + texture_uniforms = gen_code.texture_uniforms; + + valid = true; +} + +void ParticlesShaderData::set_default_texture_parameter(const StringName &p_name, RID p_texture, int p_index) { + if (!p_texture.is_valid()) { + if (default_texture_params.has(p_name) && default_texture_params[p_name].has(p_index)) { + default_texture_params[p_name].erase(p_index); + + if (default_texture_params[p_name].is_empty()) { + default_texture_params.erase(p_name); + } + } + } else { + if (!default_texture_params.has(p_name)) { + default_texture_params[p_name] = HashMap<int, RID>(); + } + default_texture_params[p_name][p_index] = p_texture; + } +} + +void ParticlesShaderData::get_shader_uniform_list(List<PropertyInfo> *p_param_list) const { + HashMap<int, StringName> order; + + for (const KeyValue<StringName, ShaderLanguage::ShaderNode::Uniform> &E : uniforms) { + if (E.value.scope == ShaderLanguage::ShaderNode::Uniform::SCOPE_GLOBAL || E.value.scope == ShaderLanguage::ShaderNode::Uniform::SCOPE_INSTANCE) { + continue; + } + + if (E.value.texture_order >= 0) { + order[E.value.texture_order + 100000] = E.key; + } else { + order[E.value.order] = E.key; + } + } + + String last_group; + for (const KeyValue<int, StringName> &E : order) { + String group = uniforms[E.value].group; + if (!uniforms[E.value].subgroup.is_empty()) { + group += "::" + uniforms[E.value].subgroup; + } + + if (group != last_group) { + PropertyInfo pi; + pi.usage = PROPERTY_USAGE_GROUP; + pi.name = group; + p_param_list->push_back(pi); + + last_group = group; + } + + PropertyInfo pi = ShaderLanguage::uniform_to_property_info(uniforms[E.value]); + pi.name = E.value; + p_param_list->push_back(pi); + } +} + +void ParticlesShaderData::get_instance_param_list(List<RendererMaterialStorage::InstanceShaderParam> *p_param_list) const { + for (const KeyValue<StringName, ShaderLanguage::ShaderNode::Uniform> &E : uniforms) { + if (E.value.scope != ShaderLanguage::ShaderNode::Uniform::SCOPE_INSTANCE) { + continue; + } + + RendererMaterialStorage::InstanceShaderParam p; + p.info = ShaderLanguage::uniform_to_property_info(E.value); + p.info.name = E.key; //supply name + p.index = E.value.instance_index; + p.default_value = ShaderLanguage::constant_value_to_variant(E.value.default_value, E.value.type, E.value.array_size, E.value.hint); + p_param_list->push_back(p); + } +} + +bool ParticlesShaderData::is_parameter_texture(const StringName &p_param) const { + if (!uniforms.has(p_param)) { + return false; + } + + return uniforms[p_param].texture_order >= 0; +} + +bool ParticlesShaderData::is_animated() const { + return false; +} + +bool ParticlesShaderData::casts_shadows() const { + return false; +} + +Variant ParticlesShaderData::get_default_parameter(const StringName &p_parameter) const { + if (uniforms.has(p_parameter)) { + ShaderLanguage::ShaderNode::Uniform uniform = uniforms[p_parameter]; + Vector<ShaderLanguage::ConstantNode::Value> default_value = uniform.default_value; + return ShaderLanguage::constant_value_to_variant(default_value, uniform.type, uniform.array_size, uniform.hint); + } + return Variant(); +} + +RS::ShaderNativeSourceCode ParticlesShaderData::get_native_source_code() const { + return MaterialStorage::get_singleton()->shaders.particles_process_shader.version_get_native_source_code(version); +} + +ParticlesShaderData::~ParticlesShaderData() { + if (version.is_valid()) { + MaterialStorage::get_singleton()->shaders.particles_process_shader.version_free(version); + } +} + +GLES3::ShaderData *GLES3::_create_particles_shader_func() { + ParticlesShaderData *shader_data = memnew(ParticlesShaderData); + return shader_data; +} + +void ParticleProcessMaterialData::update_parameters(const HashMap<StringName, Variant> &p_parameters, bool p_uniform_dirty, bool p_textures_dirty) { + update_parameters_internal(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size); +} + +ParticleProcessMaterialData::~ParticleProcessMaterialData() { +} + +GLES3::MaterialData *GLES3::_create_particles_material_func(ShaderData *p_shader) { + ParticleProcessMaterialData *material_data = memnew(ParticleProcessMaterialData); + material_data->shader_data = static_cast<ParticlesShaderData *>(p_shader); + //update will happen later anyway so do nothing. + return material_data; +} + +void ParticleProcessMaterialData::bind_uniforms() { + // Bind Material Uniforms + glBindBufferBase(GL_UNIFORM_BUFFER, GLES3::PARTICLES_MATERIAL_UNIFORM_LOCATION, uniform_buffer); + + RID *textures = texture_cache.ptrw(); + ShaderCompiler::GeneratedCode::Texture *texture_uniforms = shader_data->texture_uniforms.ptrw(); + for (int ti = 0; ti < texture_cache.size(); ti++) { + Texture *texture = TextureStorage::get_singleton()->get_texture(textures[ti]); + glActiveTexture(GL_TEXTURE1 + ti); // Start at GL_TEXTURE1 becuase texture slot 0 is reserved for the heightmap texture. + glBindTexture(target_from_type[texture_uniforms[ti].type], texture->tex_id); + + // Set sampler state here as the same texture can be used in multiple places with different flags + // Need to convert sampler state from ShaderLanguage::Texture* to RS::CanvasItemTexture* + RS::CanvasItemTextureFilter filter = RS::CanvasItemTextureFilter((int(texture_uniforms[ti].filter) + 1) % RS::CANVAS_ITEM_TEXTURE_FILTER_MAX); + RS::CanvasItemTextureRepeat repeat = RS::CanvasItemTextureRepeat((int(texture_uniforms[ti].repeat) + 1) % RS::CANVAS_ITEM_TEXTURE_REPEAT_MIRROR); + texture->gl_set_filter(filter); + texture->gl_set_repeat(repeat); + } +} + #endif // !GLES3_ENABLED diff --git a/drivers/gles3/storage/material_storage.h b/drivers/gles3/storage/material_storage.h index 6504c7748c..8ae5e5eb9c 100644 --- a/drivers/gles3/storage/material_storage.h +++ b/drivers/gles3/storage/material_storage.h @@ -44,6 +44,7 @@ #include "../shaders/canvas.glsl.gen.h" #include "../shaders/cubemap_filter.glsl.gen.h" +#include "../shaders/particles.glsl.gen.h" #include "../shaders/scene.glsl.gen.h" #include "../shaders/sky.glsl.gen.h" @@ -53,6 +54,7 @@ namespace GLES3 { struct ShaderData { virtual void set_code(const String &p_Code) = 0; + virtual void set_path_hint(const String &p_hint) = 0; virtual void set_default_texture_parameter(const StringName &p_name, RID p_texture, int p_index) = 0; virtual void get_shader_uniform_list(List<PropertyInfo> *p_param_list) const = 0; @@ -165,6 +167,7 @@ struct CanvasShaderData : public ShaderData { bool uses_time = false; virtual void set_code(const String &p_Code); + virtual void set_path_hint(const String &p_hint); virtual void set_default_texture_parameter(const StringName &p_name, RID p_texture, int p_index); virtual void get_shader_uniform_list(List<PropertyInfo> *p_param_list) const; virtual void get_instance_param_list(List<RendererMaterialStorage::InstanceShaderParam> *p_param_list) const; @@ -216,6 +219,7 @@ struct SkyShaderData : public ShaderData { bool uses_light; virtual void set_code(const String &p_Code); + virtual void set_path_hint(const String &p_hint); virtual void set_default_texture_parameter(const StringName &p_name, RID p_texture, int p_index); virtual void get_shader_uniform_list(List<PropertyInfo> *p_param_list) const; virtual void get_instance_param_list(List<RendererMaterialStorage::InstanceShaderParam> *p_param_list) const; @@ -318,6 +322,8 @@ struct SceneShaderData : public ShaderData { bool uses_depth_texture; bool uses_normal_texture; bool uses_time; + bool uses_vertex_time; + bool uses_fragment_time; bool writes_modelview_or_projection; bool uses_world_coordinates; bool uses_tangent; @@ -337,6 +343,7 @@ struct SceneShaderData : public ShaderData { uint32_t index = 0; virtual void set_code(const String &p_Code); + virtual void set_path_hint(const String &p_hint); virtual void set_default_texture_parameter(const StringName &p_name, RID p_texture, int p_index); virtual void get_shader_uniform_list(List<PropertyInfo> *p_param_list) const; virtual void get_instance_param_list(List<RendererMaterialStorage::InstanceShaderParam> *p_param_list) const; @@ -368,6 +375,62 @@ struct SceneMaterialData : public MaterialData { MaterialData *_create_scene_material_func(ShaderData *p_shader); +/* Particle Shader */ + +enum { + PARTICLES_MAX_USERDATAS = 6 +}; + +struct ParticlesShaderData : public ShaderData { + bool valid = false; + RID version; + bool uses_collision = false; + + HashMap<StringName, ShaderLanguage::ShaderNode::Uniform> uniforms; + Vector<ShaderCompiler::GeneratedCode::Texture> texture_uniforms; + + Vector<uint32_t> ubo_offsets; + uint32_t ubo_size = 0; + + String path; + String code; + HashMap<StringName, HashMap<int, RID>> default_texture_params; + + bool uses_time = false; + + bool userdatas_used[PARTICLES_MAX_USERDATAS] = {}; + uint32_t userdata_count = 0; + + virtual void set_code(const String &p_Code); + virtual void set_path_hint(const String &p_hint); + virtual void set_default_texture_parameter(const StringName &p_name, RID p_texture, int p_index); + virtual void get_shader_uniform_list(List<PropertyInfo> *p_param_list) const; + virtual void get_instance_param_list(List<RendererMaterialStorage::InstanceShaderParam> *p_param_list) const; + virtual bool is_parameter_texture(const StringName &p_param) const; + virtual bool is_animated() const; + virtual bool casts_shadows() const; + virtual Variant get_default_parameter(const StringName &p_parameter) const; + virtual RS::ShaderNativeSourceCode get_native_source_code() const; + + ParticlesShaderData() {} + virtual ~ParticlesShaderData(); +}; + +ShaderData *_create_particles_shader_func(); + +struct ParticleProcessMaterialData : public MaterialData { + ParticlesShaderData *shader_data = nullptr; + RID uniform_set; + + virtual void set_render_priority(int p_priority) {} + virtual void set_next_pass(RID p_pass) {} + virtual void update_parameters(const HashMap<StringName, Variant> &p_parameters, bool p_uniform_dirty, bool p_textures_dirty); + virtual void bind_uniforms(); + virtual ~ParticleProcessMaterialData(); +}; + +MaterialData *_create_particles_material_func(ShaderData *p_shader); + /* Global shader uniform structs */ struct GlobalShaderUniforms { enum { @@ -504,6 +567,7 @@ public: SkyShaderGLES3 sky_shader; SceneShaderGLES3 scene_shader; CubemapFilterShaderGLES3 cubemap_filter_shader; + ParticlesShaderGLES3 particles_process_shader; ShaderCompiler compiler_canvas; ShaderCompiler compiler_scene; @@ -530,7 +594,7 @@ public: virtual int32_t global_shader_parameters_instance_allocate(RID p_instance) override; virtual void global_shader_parameters_instance_free(RID p_instance) override; - virtual void global_shader_parameters_instance_update(RID p_instance, int p_index, const Variant &p_value) override; + virtual void global_shader_parameters_instance_update(RID p_instance, int p_index, const Variant &p_value, int p_flags_count = 0) override; GLuint global_shader_parameters_get_uniform_buffer() const; diff --git a/drivers/gles3/storage/mesh_storage.cpp b/drivers/gles3/storage/mesh_storage.cpp index 22d84eba93..285f32f1a5 100644 --- a/drivers/gles3/storage/mesh_storage.cpp +++ b/drivers/gles3/storage/mesh_storage.cpp @@ -44,10 +44,16 @@ MeshStorage *MeshStorage::get_singleton() { MeshStorage::MeshStorage() { singleton = this; + + { + skeleton_shader.shader.initialize(); + skeleton_shader.shader_version = skeleton_shader.shader.version_create(); + } } MeshStorage::~MeshStorage() { singleton = nullptr; + skeleton_shader.shader.version_free(skeleton_shader.shader_version); } /* MESH API */ @@ -87,7 +93,6 @@ void MeshStorage::mesh_set_blend_shape_count(RID p_mesh, int p_blend_shape_count ERR_FAIL_COND(!mesh); ERR_FAIL_COND(mesh->surface_count > 0); //surfaces already exist - WARN_PRINT_ONCE("blend shapes not supported by GLES3 renderer yet"); mesh->blend_shape_count = p_blend_shape_count; } @@ -111,7 +116,6 @@ void MeshStorage::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface) uint32_t attrib_stride = 0; uint32_t skin_stride = 0; - // TODO: I think this should be <=, but it is copied from RendererRD, will have to verify later for (int i = 0; i < RS::ARRAY_WEIGHTS; i++) { if ((p_surface.format & (1 << i))) { switch (i) { @@ -245,8 +249,77 @@ void MeshStorage::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface) s->aabb = p_surface.aabb; s->bone_aabbs = p_surface.bone_aabbs; //only really useful for returning them. - if (mesh->blend_shape_count > 0) { - //s->blend_shape_buffer = RD::get_singleton()->storage_buffer_create(p_surface.blend_shape_data.size(), p_surface.blend_shape_data); + if (p_surface.skin_data.size() || mesh->blend_shape_count > 0) { + // Size must match the size of the vertex array. + int size = p_surface.vertex_data.size(); + int vertex_size = 0; + int stride = 0; + int normal_offset = 0; + int tangent_offset = 0; + if ((p_surface.format & (1 << RS::ARRAY_VERTEX))) { + if (p_surface.format & RS::ARRAY_FLAG_USE_2D_VERTICES) { + vertex_size = 2; + } else { + vertex_size = 3; + } + stride = sizeof(float) * vertex_size; + } + if ((p_surface.format & (1 << RS::ARRAY_NORMAL))) { + normal_offset = stride; + stride += sizeof(uint16_t) * 2; + } + if ((p_surface.format & (1 << RS::ARRAY_TANGENT))) { + tangent_offset = stride; + stride += sizeof(uint16_t) * 2; + } + + if (mesh->blend_shape_count > 0) { + // Blend shapes are passed as one large array, for OpenGL, we need to split each of them into their own buffer + s->blend_shapes = memnew_arr(Mesh::Surface::BlendShape, mesh->blend_shape_count); + + for (uint32_t i = 0; i < mesh->blend_shape_count; i++) { + glGenVertexArrays(1, &s->blend_shapes[i].vertex_array); + glBindVertexArray(s->blend_shapes[i].vertex_array); + glGenBuffers(1, &s->blend_shapes[i].vertex_buffer); + glBindBuffer(GL_ARRAY_BUFFER, s->blend_shapes[i].vertex_buffer); + glBufferData(GL_ARRAY_BUFFER, size, p_surface.blend_shape_data.ptr() + i * size, (s->format & RS::ARRAY_FLAG_USE_DYNAMIC_UPDATE) ? GL_DYNAMIC_DRAW : GL_STATIC_DRAW); + + if ((p_surface.format & (1 << RS::ARRAY_VERTEX))) { + glEnableVertexAttribArray(RS::ARRAY_VERTEX + 3); + glVertexAttribPointer(RS::ARRAY_VERTEX + 3, vertex_size, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(0)); + } + if ((p_surface.format & (1 << RS::ARRAY_NORMAL))) { + glEnableVertexAttribArray(RS::ARRAY_NORMAL + 3); + glVertexAttribPointer(RS::ARRAY_NORMAL + 3, 2, GL_UNSIGNED_SHORT, GL_TRUE, stride, CAST_INT_TO_UCHAR_PTR(normal_offset)); + } + if ((p_surface.format & (1 << RS::ARRAY_TANGENT))) { + glEnableVertexAttribArray(RS::ARRAY_TANGENT + 3); + glVertexAttribPointer(RS::ARRAY_TANGENT + 3, 2, GL_UNSIGNED_SHORT, GL_TRUE, stride, CAST_INT_TO_UCHAR_PTR(tangent_offset)); + } + } + glBindVertexArray(0); + glBindBuffer(GL_ARRAY_BUFFER, 0); + } + + // Create a vertex array to use for skeleton/blend shapes. + glGenVertexArrays(1, &s->skeleton_vertex_array); + glBindVertexArray(s->skeleton_vertex_array); + glBindBuffer(GL_ARRAY_BUFFER, s->vertex_buffer); + + if ((p_surface.format & (1 << RS::ARRAY_VERTEX))) { + glEnableVertexAttribArray(RS::ARRAY_VERTEX); + glVertexAttribPointer(RS::ARRAY_VERTEX, vertex_size, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(0)); + } + if ((p_surface.format & (1 << RS::ARRAY_NORMAL))) { + glEnableVertexAttribArray(RS::ARRAY_NORMAL); + glVertexAttribPointer(RS::ARRAY_NORMAL, 2, GL_UNSIGNED_SHORT, GL_TRUE, stride, CAST_INT_TO_UCHAR_PTR(normal_offset)); + } + if ((p_surface.format & (1 << RS::ARRAY_TANGENT))) { + glEnableVertexAttribArray(RS::ARRAY_TANGENT); + glVertexAttribPointer(RS::ARRAY_TANGENT, 2, GL_UNSIGNED_SHORT, GL_TRUE, stride, CAST_INT_TO_UCHAR_PTR(tangent_offset)); + } + glBindVertexArray(0); + glBindBuffer(GL_ARRAY_BUFFER, 0); } if (mesh->surface_count == 0) { @@ -309,12 +382,48 @@ RS::BlendShapeMode MeshStorage::mesh_get_blend_shape_mode(RID p_mesh) const { } void MeshStorage::mesh_surface_update_vertex_region(RID p_mesh, int p_surface, int p_offset, const Vector<uint8_t> &p_data) { + Mesh *mesh = mesh_owner.get_or_null(p_mesh); + ERR_FAIL_COND(!mesh); + ERR_FAIL_UNSIGNED_INDEX((uint32_t)p_surface, mesh->surface_count); + ERR_FAIL_COND(p_data.size() == 0); + + uint64_t data_size = p_data.size(); + ERR_FAIL_COND(p_offset + data_size > mesh->surfaces[p_surface]->vertex_buffer_size); + const uint8_t *r = p_data.ptr(); + + glBindBuffer(GL_ARRAY_BUFFER, mesh->surfaces[p_surface]->vertex_buffer); + glBufferSubData(GL_ARRAY_BUFFER, p_offset, data_size, r); + glBindBuffer(GL_ARRAY_BUFFER, 0); } void MeshStorage::mesh_surface_update_attribute_region(RID p_mesh, int p_surface, int p_offset, const Vector<uint8_t> &p_data) { + Mesh *mesh = mesh_owner.get_or_null(p_mesh); + ERR_FAIL_COND(!mesh); + ERR_FAIL_UNSIGNED_INDEX((uint32_t)p_surface, mesh->surface_count); + ERR_FAIL_COND(p_data.size() == 0); + + uint64_t data_size = p_data.size(); + ERR_FAIL_COND(p_offset + data_size > mesh->surfaces[p_surface]->attribute_buffer_size); + const uint8_t *r = p_data.ptr(); + + glBindBuffer(GL_ARRAY_BUFFER, mesh->surfaces[p_surface]->attribute_buffer); + glBufferSubData(GL_ARRAY_BUFFER, p_offset, data_size, r); + glBindBuffer(GL_ARRAY_BUFFER, 0); } void MeshStorage::mesh_surface_update_skin_region(RID p_mesh, int p_surface, int p_offset, const Vector<uint8_t> &p_data) { + Mesh *mesh = mesh_owner.get_or_null(p_mesh); + ERR_FAIL_COND(!mesh); + ERR_FAIL_UNSIGNED_INDEX((uint32_t)p_surface, mesh->surface_count); + ERR_FAIL_COND(p_data.size() == 0); + + uint64_t data_size = p_data.size(); + ERR_FAIL_COND(p_offset + data_size > mesh->surfaces[p_surface]->skin_buffer_size); + const uint8_t *r = p_data.ptr(); + + glBindBuffer(GL_ARRAY_BUFFER, mesh->surfaces[p_surface]->skin_buffer); + glBufferSubData(GL_ARRAY_BUFFER, p_offset, data_size, r); + glBindBuffer(GL_ARRAY_BUFFER, 0); } void MeshStorage::mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material) { @@ -373,7 +482,13 @@ RS::SurfaceData MeshStorage::mesh_get_surface(RID p_mesh, int p_surface) const { } sd.bone_aabbs = s.bone_aabbs; - glBindBuffer(GL_ARRAY_BUFFER, 0); + + if (mesh->blend_shape_count) { + sd.blend_shape_data = Vector<uint8_t>(); + for (uint32_t i = 0; i < mesh->blend_shape_count; i++) { + sd.blend_shape_data.append_array(Utilities::buffer_get_data(GL_ARRAY_BUFFER, s.blend_shapes[i].vertex_buffer, s.vertex_buffer_size)); + } + } return sd; } @@ -569,6 +684,24 @@ void MeshStorage::mesh_clear(RID p_mesh) { memdelete_arr(s.lods); } + if (mesh->blend_shape_count) { + for (uint32_t j = 0; j < mesh->blend_shape_count; j++) { + if (s.blend_shapes[j].vertex_buffer != 0) { + glDeleteBuffers(1, &s.blend_shapes[j].vertex_buffer); + s.blend_shapes[j].vertex_buffer = 0; + } + if (s.blend_shapes[j].vertex_array != 0) { + glDeleteVertexArrays(1, &s.blend_shapes[j].vertex_array); + s.blend_shapes[j].vertex_array = 0; + } + } + memdelete_arr(s.blend_shapes); + } + if (s.skeleton_vertex_array != 0) { + glDeleteVertexArrays(1, &s.skeleton_vertex_array); + s.skeleton_vertex_array = 0; + } + memdelete(mesh->surfaces[i]); } if (mesh->surfaces) { @@ -624,15 +757,15 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V case RS::ARRAY_NORMAL: { attribs[i].offset = vertex_stride; attribs[i].size = 2; - attribs[i].type = GL_UNSIGNED_SHORT; - vertex_stride += sizeof(uint16_t) * 2; + attribs[i].type = (mis ? GL_FLOAT : GL_UNSIGNED_SHORT); + vertex_stride += sizeof(uint16_t) * 2 * (mis ? 2 : 1); attribs[i].normalized = GL_TRUE; } break; case RS::ARRAY_TANGENT: { attribs[i].offset = vertex_stride; attribs[i].size = 2; - attribs[i].type = GL_UNSIGNED_SHORT; - vertex_stride += sizeof(uint16_t) * 2; + attribs[i].type = (mis ? GL_FLOAT : GL_UNSIGNED_SHORT); + vertex_stride += sizeof(uint16_t) * 2 * (mis ? 2 : 1); attribs[i].normalized = GL_TRUE; } break; case RS::ARRAY_COLOR: { @@ -677,7 +810,7 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V attribs[i].offset = skin_stride; attribs[i].size = 4; attribs[i].type = GL_UNSIGNED_SHORT; - attributes_stride += 4 * sizeof(uint16_t); + skin_stride += 4 * sizeof(uint16_t); attribs[i].normalized = GL_FALSE; attribs[i].integer = true; } break; @@ -685,7 +818,7 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V attribs[i].offset = skin_stride; attribs[i].size = 4; attribs[i].type = GL_UNSIGNED_SHORT; - attributes_stride += 4 * sizeof(uint16_t); + skin_stride += 4 * sizeof(uint16_t); attribs[i].normalized = GL_TRUE; } break; } @@ -776,7 +909,7 @@ void MeshStorage::mesh_instance_set_blend_shape_weight(RID p_mesh_instance, int ERR_FAIL_COND(!mi); ERR_FAIL_INDEX(p_shape, (int)mi->blend_weights.size()); mi->blend_weights[p_shape] = p_weight; - mi->weights_dirty = true; + mi->dirty = true; } void MeshStorage::_mesh_instance_clear(MeshInstance *mi) { @@ -788,38 +921,65 @@ void MeshStorage::_mesh_instance_clear(MeshInstance *mi) { } memfree(mi->surfaces[i].versions); } + + if (mi->surfaces[i].vertex_buffers[0] != 0) { + glDeleteBuffers(2, mi->surfaces[i].vertex_buffers); + mi->surfaces[i].vertex_buffers[0] = 0; + mi->surfaces[i].vertex_buffers[1] = 0; + } + if (mi->surfaces[i].vertex_buffer != 0) { glDeleteBuffers(1, &mi->surfaces[i].vertex_buffer); mi->surfaces[i].vertex_buffer = 0; } } mi->surfaces.clear(); - - if (mi->blend_weights_buffer != 0) { - glDeleteBuffers(1, &mi->blend_weights_buffer); - mi->blend_weights_buffer = 0; - } mi->blend_weights.clear(); - mi->weights_dirty = false; mi->skeleton_version = 0; } void MeshStorage::_mesh_instance_add_surface(MeshInstance *mi, Mesh *mesh, uint32_t p_surface) { - if (mesh->blend_shape_count > 0 && mi->blend_weights_buffer == 0) { + if (mesh->blend_shape_count > 0) { mi->blend_weights.resize(mesh->blend_shape_count); for (uint32_t i = 0; i < mi->blend_weights.size(); i++) { - mi->blend_weights[i] = 0; + mi->blend_weights[i] = 0.0; } - // Todo allocate buffer for blend_weights and copy data to it - //mi->blend_weights_buffer = RD::get_singleton()->storage_buffer_create(sizeof(float) * mi->blend_weights.size(), mi->blend_weights.to_byte_array()); - - mi->weights_dirty = true; } MeshInstance::Surface s; - if (mesh->blend_shape_count > 0 || (mesh->surfaces[p_surface]->format & RS::ARRAY_FORMAT_BONES)) { - //surface warrants transform - //s.vertex_buffer = RD::get_singleton()->vertex_buffer_create(mesh->surfaces[p_surface]->vertex_buffer_size, Vector<uint8_t>(), true); + if ((mesh->blend_shape_count > 0 || (mesh->surfaces[p_surface]->format & RS::ARRAY_FORMAT_BONES)) && mesh->surfaces[p_surface]->vertex_buffer_size > 0) { + // Cache surface properties + s.format_cache = mesh->surfaces[p_surface]->format; + if ((s.format_cache & (1 << RS::ARRAY_VERTEX))) { + if (s.format_cache & RS::ARRAY_FLAG_USE_2D_VERTICES) { + s.vertex_size_cache = 2; + } else { + s.vertex_size_cache = 3; + } + s.vertex_stride_cache = sizeof(float) * s.vertex_size_cache; + } + if ((s.format_cache & (1 << RS::ARRAY_NORMAL))) { + s.vertex_normal_offset_cache = s.vertex_stride_cache; + s.vertex_stride_cache += sizeof(uint32_t) * 2; + } + if ((s.format_cache & (1 << RS::ARRAY_TANGENT))) { + s.vertex_tangent_offset_cache = s.vertex_stride_cache; + s.vertex_stride_cache += sizeof(uint32_t) * 2; + } + + // Buffer to be used for rendering. Final output of skeleton and blend shapes. + glGenBuffers(1, &s.vertex_buffer); + glBindBuffer(GL_ARRAY_BUFFER, s.vertex_buffer); + glBufferData(GL_ARRAY_BUFFER, s.vertex_stride_cache * mesh->surfaces[p_surface]->vertex_count, nullptr, GL_DYNAMIC_DRAW); + if (mesh->blend_shape_count > 0) { + // Ping-Pong buffers for processing blendshapes. + glGenBuffers(2, s.vertex_buffers); + for (uint32_t i = 0; i < 2; i++) { + glBindBuffer(GL_ARRAY_BUFFER, s.vertex_buffers[i]); + glBufferData(GL_ARRAY_BUFFER, s.vertex_stride_cache * mesh->surfaces[p_surface]->vertex_count, nullptr, GL_DYNAMIC_DRAW); + } + } + glBindBuffer(GL_ARRAY_BUFFER, 0); //unbind } mi->surfaces.push_back(s); @@ -831,11 +991,6 @@ void MeshStorage::mesh_instance_check_for_update(RID p_mesh_instance) { bool needs_update = mi->dirty; - if (mi->weights_dirty && !mi->weight_update_list.in_list()) { - dirty_mesh_instance_weights.add(&mi->weight_update_list); - needs_update = true; - } - if (mi->array_update_list.in_list()) { return; } @@ -852,22 +1007,223 @@ void MeshStorage::mesh_instance_check_for_update(RID p_mesh_instance) { } } -void MeshStorage::update_mesh_instances() { - while (dirty_mesh_instance_weights.first()) { - MeshInstance *mi = dirty_mesh_instance_weights.first()->self(); +void MeshStorage::_blend_shape_bind_mesh_instance_buffer(MeshInstance *p_mi, uint32_t p_surface) { + glBindBuffer(GL_ARRAY_BUFFER, p_mi->surfaces[p_surface].vertex_buffers[0]); - if (mi->blend_weights_buffer != 0) { - //RD::get_singleton()->buffer_update(mi->blend_weights_buffer, 0, mi->blend_weights.size() * sizeof(float), mi->blend_weights.ptr()); - } - dirty_mesh_instance_weights.remove(&mi->weight_update_list); - mi->weights_dirty = false; + if ((p_mi->surfaces[p_surface].format_cache & (1 << RS::ARRAY_VERTEX))) { + glEnableVertexAttribArray(RS::ARRAY_VERTEX); + glVertexAttribPointer(RS::ARRAY_VERTEX, p_mi->surfaces[p_surface].vertex_size_cache, GL_FLOAT, GL_FALSE, p_mi->surfaces[p_surface].vertex_stride_cache, CAST_INT_TO_UCHAR_PTR(0)); + } else { + glDisableVertexAttribArray(RS::ARRAY_VERTEX); + } + if ((p_mi->surfaces[p_surface].format_cache & (1 << RS::ARRAY_NORMAL))) { + glEnableVertexAttribArray(RS::ARRAY_NORMAL); + glVertexAttribIPointer(RS::ARRAY_NORMAL, 2, GL_UNSIGNED_INT, p_mi->surfaces[p_surface].vertex_stride_cache, CAST_INT_TO_UCHAR_PTR(p_mi->surfaces[p_surface].vertex_normal_offset_cache)); + } else { + glDisableVertexAttribArray(RS::ARRAY_NORMAL); + } + if ((p_mi->surfaces[p_surface].format_cache & (1 << RS::ARRAY_TANGENT))) { + glEnableVertexAttribArray(RS::ARRAY_TANGENT); + glVertexAttribIPointer(RS::ARRAY_TANGENT, 2, GL_UNSIGNED_INT, p_mi->surfaces[p_surface].vertex_stride_cache, CAST_INT_TO_UCHAR_PTR(p_mi->surfaces[p_surface].vertex_tangent_offset_cache)); + } else { + glDisableVertexAttribArray(RS::ARRAY_TANGENT); + } +} + +void MeshStorage::_compute_skeleton(MeshInstance *p_mi, Skeleton *p_sk, uint32_t p_surface) { + glBindBuffer(GL_ARRAY_BUFFER, 0); + + // Add in the bones and weights. + glBindBuffer(GL_ARRAY_BUFFER, p_mi->mesh->surfaces[p_surface]->skin_buffer); + + bool use_8_weights = p_mi->surfaces[p_surface].format_cache & RS::ARRAY_FLAG_USE_8_BONE_WEIGHTS; + int skin_stride = sizeof(int16_t) * (use_8_weights ? 16 : 8); + glEnableVertexAttribArray(RS::ARRAY_BONES); + glVertexAttribIPointer(RS::ARRAY_BONES, 4, GL_UNSIGNED_SHORT, skin_stride, CAST_INT_TO_UCHAR_PTR(0)); + if (use_8_weights) { + glEnableVertexAttribArray(11); + glVertexAttribIPointer(11, 4, GL_UNSIGNED_SHORT, skin_stride, CAST_INT_TO_UCHAR_PTR(4 * sizeof(uint16_t))); + glEnableVertexAttribArray(12); + glVertexAttribPointer(12, 4, GL_UNSIGNED_SHORT, GL_TRUE, skin_stride, CAST_INT_TO_UCHAR_PTR(8 * sizeof(uint16_t))); + glEnableVertexAttribArray(13); + glVertexAttribPointer(13, 4, GL_UNSIGNED_SHORT, GL_TRUE, skin_stride, CAST_INT_TO_UCHAR_PTR(12 * sizeof(uint16_t))); + } else { + glEnableVertexAttribArray(RS::ARRAY_WEIGHTS); + glVertexAttribPointer(RS::ARRAY_WEIGHTS, 4, GL_UNSIGNED_SHORT, GL_TRUE, skin_stride, CAST_INT_TO_UCHAR_PTR(4 * sizeof(uint16_t))); } + + glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, p_mi->surfaces[p_surface].vertex_buffer); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, p_sk->transforms_texture); + + glBeginTransformFeedback(GL_POINTS); + glDrawArrays(GL_POINTS, 0, p_mi->mesh->surfaces[p_surface]->vertex_count); + glEndTransformFeedback(); + + glDisableVertexAttribArray(RS::ARRAY_BONES); + glDisableVertexAttribArray(RS::ARRAY_WEIGHTS); + glDisableVertexAttribArray(RS::ARRAY_BONES + 2); + glDisableVertexAttribArray(RS::ARRAY_WEIGHTS + 2); + glBindVertexArray(0); + glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0); +} + +void MeshStorage::update_mesh_instances() { if (dirty_mesh_instance_arrays.first() == nullptr) { return; //nothing to do } + glEnable(GL_RASTERIZER_DISCARD); // Process skeletons and blend shapes using transform feedback - // TODO: Implement when working on skeletons and blend shapes + while (dirty_mesh_instance_arrays.first()) { + MeshInstance *mi = dirty_mesh_instance_arrays.first()->self(); + + Skeleton *sk = skeleton_owner.get_or_null(mi->skeleton); + + // Precompute base weight if using blend shapes. + float base_weight = 1.0; + if (mi->mesh->blend_shape_count && mi->mesh->blend_shape_mode == RS::BLEND_SHAPE_MODE_NORMALIZED) { + for (uint32_t i = 0; i < mi->mesh->blend_shape_count; i++) { + base_weight -= mi->blend_weights[i]; + } + } + + for (uint32_t i = 0; i < mi->surfaces.size(); i++) { + if (mi->surfaces[i].vertex_buffer == 0 || mi->mesh->surfaces[i]->skeleton_vertex_array == 0) { + continue; + } + + bool array_is_2d = mi->surfaces[i].format_cache & RS::ARRAY_FLAG_USE_2D_VERTICES; + bool can_use_skeleton = sk != nullptr && sk->use_2d == array_is_2d && (mi->surfaces[i].format_cache & RS::ARRAY_FORMAT_BONES); + bool use_8_weights = mi->surfaces[i].format_cache & RS::ARRAY_FLAG_USE_8_BONE_WEIGHTS; + + // Always process blend shapes first. + if (mi->mesh->blend_shape_count) { + SkeletonShaderGLES3::ShaderVariant variant = SkeletonShaderGLES3::MODE_BASE_PASS; + uint64_t specialization = 0; + specialization |= array_is_2d ? SkeletonShaderGLES3::MODE_2D : 0; + specialization |= SkeletonShaderGLES3::USE_BLEND_SHAPES; + if (!array_is_2d) { + if ((mi->surfaces[i].format_cache & (1 << RS::ARRAY_NORMAL))) { + specialization |= SkeletonShaderGLES3::USE_NORMAL; + } + if ((mi->surfaces[i].format_cache & (1 << RS::ARRAY_TANGENT))) { + specialization |= SkeletonShaderGLES3::USE_TANGENT; + } + } + + bool success = skeleton_shader.shader.version_bind_shader(skeleton_shader.shader_version, variant, specialization); + if (!success) { + continue; + } + + skeleton_shader.shader.version_set_uniform(SkeletonShaderGLES3::BLEND_WEIGHT, base_weight, skeleton_shader.shader_version, variant, specialization); + skeleton_shader.shader.version_set_uniform(SkeletonShaderGLES3::BLEND_SHAPE_COUNT, float(mi->mesh->blend_shape_count), skeleton_shader.shader_version, variant, specialization); + + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindVertexArray(mi->mesh->surfaces[i]->skeleton_vertex_array); + glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, mi->surfaces[i].vertex_buffers[0]); + glBeginTransformFeedback(GL_POINTS); + glDrawArrays(GL_POINTS, 0, mi->mesh->surfaces[i]->vertex_count); + glEndTransformFeedback(); + + variant = SkeletonShaderGLES3::MODE_BLEND_PASS; + success = skeleton_shader.shader.version_bind_shader(skeleton_shader.shader_version, variant, specialization); + if (!success) { + continue; + } + + //Do the last blend shape separately, as it can be combined with the skeleton pass. + for (uint32_t bs = 0; bs < mi->mesh->blend_shape_count - 1; bs++) { + float weight = mi->blend_weights[bs]; + + if (Math::is_zero_approx(weight)) { + //not bother with this one + continue; + } + skeleton_shader.shader.version_set_uniform(SkeletonShaderGLES3::BLEND_WEIGHT, weight, skeleton_shader.shader_version, variant, specialization); + skeleton_shader.shader.version_set_uniform(SkeletonShaderGLES3::BLEND_SHAPE_COUNT, float(mi->mesh->blend_shape_count), skeleton_shader.shader_version, variant, specialization); + + glBindVertexArray(mi->mesh->surfaces[i]->blend_shapes[bs].vertex_array); + _blend_shape_bind_mesh_instance_buffer(mi, i); + glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, mi->surfaces[i].vertex_buffers[1]); + + glBeginTransformFeedback(GL_POINTS); + glDrawArrays(GL_POINTS, 0, mi->mesh->surfaces[i]->vertex_count); + glEndTransformFeedback(); + + SWAP(mi->surfaces[i].vertex_buffers[0], mi->surfaces[i].vertex_buffers[1]); + } + uint32_t bs = mi->mesh->blend_shape_count - 1; + + float weight = mi->blend_weights[bs]; + + glBindVertexArray(mi->mesh->surfaces[i]->blend_shapes[bs].vertex_array); + _blend_shape_bind_mesh_instance_buffer(mi, i); + + specialization |= can_use_skeleton ? SkeletonShaderGLES3::USE_SKELETON : 0; + specialization |= (can_use_skeleton && use_8_weights) ? SkeletonShaderGLES3::USE_EIGHT_WEIGHTS : 0; + specialization |= SkeletonShaderGLES3::FINAL_PASS; + success = skeleton_shader.shader.version_bind_shader(skeleton_shader.shader_version, variant, specialization); + if (!success) { + continue; + } + + skeleton_shader.shader.version_set_uniform(SkeletonShaderGLES3::BLEND_WEIGHT, weight, skeleton_shader.shader_version, variant, specialization); + skeleton_shader.shader.version_set_uniform(SkeletonShaderGLES3::BLEND_SHAPE_COUNT, float(mi->mesh->blend_shape_count), skeleton_shader.shader_version, variant, specialization); + + if (can_use_skeleton) { + // Do last blendshape in the same pass as the Skeleton. + _compute_skeleton(mi, sk, i); + can_use_skeleton = false; + } else { + // Do last blendshape by itself and prepare vertex data for use by the renderer. + glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, mi->surfaces[i].vertex_buffer); + + glBeginTransformFeedback(GL_POINTS); + glDrawArrays(GL_POINTS, 0, mi->mesh->surfaces[i]->vertex_count); + glEndTransformFeedback(); + } + + glBindVertexArray(0); + glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0); + } + + // This branch should only execute when Skeleton is run by itself. + if (can_use_skeleton) { + SkeletonShaderGLES3::ShaderVariant variant = SkeletonShaderGLES3::MODE_BASE_PASS; + uint64_t specialization = 0; + specialization |= array_is_2d ? SkeletonShaderGLES3::MODE_2D : 0; + specialization |= SkeletonShaderGLES3::USE_SKELETON; + specialization |= SkeletonShaderGLES3::FINAL_PASS; + specialization |= use_8_weights ? SkeletonShaderGLES3::USE_EIGHT_WEIGHTS : 0; + if (!array_is_2d) { + if ((mi->surfaces[i].format_cache & (1 << RS::ARRAY_NORMAL))) { + specialization |= SkeletonShaderGLES3::USE_NORMAL; + } + if ((mi->surfaces[i].format_cache & (1 << RS::ARRAY_TANGENT))) { + specialization |= SkeletonShaderGLES3::USE_TANGENT; + } + } + + bool success = skeleton_shader.shader.version_bind_shader(skeleton_shader.shader_version, variant, specialization); + if (!success) { + continue; + } + + glBindVertexArray(mi->mesh->surfaces[i]->skeleton_vertex_array); + _compute_skeleton(mi, sk, i); + } + } + mi->dirty = false; + if (sk) { + mi->skeleton_version = sk->version; + } + dirty_mesh_instance_arrays.remove(&mi->array_update_list); + } + glDisable(GL_RASTERIZER_DISCARD); + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, 0); } /* MULTIMESH API */ @@ -968,7 +1324,7 @@ void MeshStorage::multimesh_set_mesh(RID p_multimesh, RID p_mesh) { #define MULTIMESH_DIRTY_REGION_SIZE 512 void MeshStorage::_multimesh_make_local(MultiMesh *multimesh) const { - if (multimesh->data_cache.size() > 0) { + if (multimesh->data_cache.size() > 0 || multimesh->instances == 0) { return; //already local } ERR_FAIL_COND(multimesh->data_cache.size() > 0); @@ -1385,7 +1741,7 @@ Vector<float> MeshStorage::multimesh_get_buffer(RID p_multimesh) const { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_COND_V(!multimesh, Vector<float>()); Vector<float> ret; - if (multimesh->buffer == 0) { + if (multimesh->buffer == 0 || multimesh->instances == 0) { return Vector<float>(); } else if (multimesh->data_cache.size()) { ret = multimesh->data_cache; @@ -1538,45 +1894,207 @@ void MeshStorage::_update_dirty_multimeshes() { /* SKELETON API */ RID MeshStorage::skeleton_allocate() { - return RID(); + return skeleton_owner.allocate_rid(); } void MeshStorage::skeleton_initialize(RID p_rid) { + skeleton_owner.initialize_rid(p_rid, Skeleton()); } void MeshStorage::skeleton_free(RID p_rid) { + _update_dirty_skeletons(); + skeleton_allocate_data(p_rid, 0); + Skeleton *skeleton = skeleton_owner.get_or_null(p_rid); + skeleton->dependency.deleted_notify(p_rid); + skeleton_owner.free(p_rid); +} + +void MeshStorage::_skeleton_make_dirty(Skeleton *skeleton) { + if (!skeleton->dirty) { + skeleton->dirty = true; + skeleton->dirty_list = skeleton_dirty_list; + skeleton_dirty_list = skeleton; + } } void MeshStorage::skeleton_allocate_data(RID p_skeleton, int p_bones, bool p_2d_skeleton) { + Skeleton *skeleton = skeleton_owner.get_or_null(p_skeleton); + ERR_FAIL_COND(!skeleton); + ERR_FAIL_COND(p_bones < 0); + + if (skeleton->size == p_bones && skeleton->use_2d == p_2d_skeleton) { + return; + } + + skeleton->size = p_bones; + skeleton->use_2d = p_2d_skeleton; + skeleton->height = (p_bones * (p_2d_skeleton ? 2 : 3)) / 256; + if ((p_bones * (p_2d_skeleton ? 2 : 3)) % 256) { + skeleton->height++; + } + + if (skeleton->transforms_texture != 0) { + glDeleteTextures(1, &skeleton->transforms_texture); + skeleton->transforms_texture = 0; + skeleton->data.clear(); + } + + if (skeleton->size) { + skeleton->data.resize(256 * skeleton->height * 4); + glGenTextures(1, &skeleton->transforms_texture); + glBindTexture(GL_TEXTURE_2D, skeleton->transforms_texture); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, 256, skeleton->height, 0, GL_RGBA, GL_FLOAT, nullptr); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glBindTexture(GL_TEXTURE_2D, 0); + + memset(skeleton->data.ptrw(), 0, skeleton->data.size() * sizeof(float)); + + _skeleton_make_dirty(skeleton); + } + + skeleton->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_SKELETON_DATA); } void MeshStorage::skeleton_set_base_transform_2d(RID p_skeleton, const Transform2D &p_base_transform) { + Skeleton *skeleton = skeleton_owner.get_or_null(p_skeleton); + + ERR_FAIL_NULL(skeleton); + ERR_FAIL_COND(!skeleton->use_2d); + + skeleton->base_transform_2d = p_base_transform; } int MeshStorage::skeleton_get_bone_count(RID p_skeleton) const { - return 0; + Skeleton *skeleton = skeleton_owner.get_or_null(p_skeleton); + ERR_FAIL_COND_V(!skeleton, 0); + + return skeleton->size; } void MeshStorage::skeleton_bone_set_transform(RID p_skeleton, int p_bone, const Transform3D &p_transform) { + Skeleton *skeleton = skeleton_owner.get_or_null(p_skeleton); + + ERR_FAIL_COND(!skeleton); + ERR_FAIL_INDEX(p_bone, skeleton->size); + ERR_FAIL_COND(skeleton->use_2d); + + float *dataptr = skeleton->data.ptrw() + p_bone * 12; + + dataptr[0] = p_transform.basis.rows[0][0]; + dataptr[1] = p_transform.basis.rows[0][1]; + dataptr[2] = p_transform.basis.rows[0][2]; + dataptr[3] = p_transform.origin.x; + dataptr[4] = p_transform.basis.rows[1][0]; + dataptr[5] = p_transform.basis.rows[1][1]; + dataptr[6] = p_transform.basis.rows[1][2]; + dataptr[7] = p_transform.origin.y; + dataptr[8] = p_transform.basis.rows[2][0]; + dataptr[9] = p_transform.basis.rows[2][1]; + dataptr[10] = p_transform.basis.rows[2][2]; + dataptr[11] = p_transform.origin.z; + + _skeleton_make_dirty(skeleton); } Transform3D MeshStorage::skeleton_bone_get_transform(RID p_skeleton, int p_bone) const { - return Transform3D(); + Skeleton *skeleton = skeleton_owner.get_or_null(p_skeleton); + + ERR_FAIL_COND_V(!skeleton, Transform3D()); + ERR_FAIL_INDEX_V(p_bone, skeleton->size, Transform3D()); + ERR_FAIL_COND_V(skeleton->use_2d, Transform3D()); + + const float *dataptr = skeleton->data.ptr() + p_bone * 12; + + Transform3D t; + + t.basis.rows[0][0] = dataptr[0]; + t.basis.rows[0][1] = dataptr[1]; + t.basis.rows[0][2] = dataptr[2]; + t.origin.x = dataptr[3]; + t.basis.rows[1][0] = dataptr[4]; + t.basis.rows[1][1] = dataptr[5]; + t.basis.rows[1][2] = dataptr[6]; + t.origin.y = dataptr[7]; + t.basis.rows[2][0] = dataptr[8]; + t.basis.rows[2][1] = dataptr[9]; + t.basis.rows[2][2] = dataptr[10]; + t.origin.z = dataptr[11]; + + return t; } void MeshStorage::skeleton_bone_set_transform_2d(RID p_skeleton, int p_bone, const Transform2D &p_transform) { + Skeleton *skeleton = skeleton_owner.get_or_null(p_skeleton); + + ERR_FAIL_COND(!skeleton); + ERR_FAIL_INDEX(p_bone, skeleton->size); + ERR_FAIL_COND(!skeleton->use_2d); + + float *dataptr = skeleton->data.ptrw() + p_bone * 8; + + dataptr[0] = p_transform.columns[0][0]; + dataptr[1] = p_transform.columns[1][0]; + dataptr[2] = 0; + dataptr[3] = p_transform.columns[2][0]; + dataptr[4] = p_transform.columns[0][1]; + dataptr[5] = p_transform.columns[1][1]; + dataptr[6] = 0; + dataptr[7] = p_transform.columns[2][1]; + + _skeleton_make_dirty(skeleton); } Transform2D MeshStorage::skeleton_bone_get_transform_2d(RID p_skeleton, int p_bone) const { - return Transform2D(); + Skeleton *skeleton = skeleton_owner.get_or_null(p_skeleton); + + ERR_FAIL_COND_V(!skeleton, Transform2D()); + ERR_FAIL_INDEX_V(p_bone, skeleton->size, Transform2D()); + ERR_FAIL_COND_V(!skeleton->use_2d, Transform2D()); + + const float *dataptr = skeleton->data.ptr() + p_bone * 8; + + Transform2D t; + t.columns[0][0] = dataptr[0]; + t.columns[1][0] = dataptr[1]; + t.columns[2][0] = dataptr[3]; + t.columns[0][1] = dataptr[4]; + t.columns[1][1] = dataptr[5]; + t.columns[2][1] = dataptr[7]; + + return t; } -void MeshStorage::skeleton_update_dependency(RID p_base, DependencyTracker *p_instance) { +void MeshStorage::_update_dirty_skeletons() { + while (skeleton_dirty_list) { + Skeleton *skeleton = skeleton_dirty_list; + + if (skeleton->size) { + glBindTexture(GL_TEXTURE_2D, skeleton->transforms_texture); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, 256, skeleton->height, 0, GL_RGBA, GL_FLOAT, skeleton->data.ptr()); + glBindTexture(GL_TEXTURE_2D, 0); + } + + skeleton_dirty_list = skeleton->dirty_list; + + skeleton->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_SKELETON_BONES); + + skeleton->version++; + + skeleton->dirty = false; + skeleton->dirty_list = nullptr; + } + + skeleton_dirty_list = nullptr; } -/* OCCLUDER */ +void MeshStorage::skeleton_update_dependency(RID p_skeleton, DependencyTracker *p_instance) { + Skeleton *skeleton = skeleton_owner.get_or_null(p_skeleton); + ERR_FAIL_COND(!skeleton); -void MeshStorage::occluder_set_mesh(RID p_occluder, const PackedVector3Array &p_vertices, const PackedInt32Array &p_indices) { + p_instance->update_dependency(&skeleton->dependency); } #endif // GLES3_ENABLED diff --git a/drivers/gles3/storage/mesh_storage.h b/drivers/gles3/storage/mesh_storage.h index 74f5800795..0f30814928 100644 --- a/drivers/gles3/storage/mesh_storage.h +++ b/drivers/gles3/storage/mesh_storage.h @@ -33,6 +33,7 @@ #ifdef GLES3_ENABLED +#include "../shaders/skeleton.glsl.gen.h" #include "core/templates/local_vector.h" #include "core/templates/rid_owner.h" #include "core/templates/self_list.h" @@ -102,7 +103,13 @@ struct Mesh { Vector<AABB> bone_aabbs; - GLuint blend_shape_buffer = 0; + struct BlendShape { + GLuint vertex_buffer = 0; + GLuint vertex_array = 0; + }; + + BlendShape *blend_shapes = nullptr; + GLuint skeleton_vertex_array = 0; RID material; }; @@ -136,7 +143,14 @@ struct MeshInstance { Mesh *mesh = nullptr; RID skeleton; struct Surface { + GLuint vertex_buffers[2] = { 0, 0 }; + GLuint vertex_arrays[2] = { 0, 0 }; GLuint vertex_buffer = 0; + int vertex_stride_cache = 0; + int vertex_size_cache = 0; + int vertex_normal_offset_cache = 0; + int vertex_tangent_offset_cache = 0; + uint32_t format_cache = 0; Mesh::Surface::Version *versions = nullptr; //allocated on demand uint32_t version_count = 0; @@ -144,7 +158,6 @@ struct MeshInstance { LocalVector<Surface> surfaces; LocalVector<float> blend_weights; - GLuint blend_weights_buffer = 0; List<MeshInstance *>::Element *I = nullptr; //used to erase itself uint64_t skeleton_version = 0; bool dirty = false; @@ -186,13 +199,15 @@ struct MultiMesh { struct Skeleton { bool use_2d = false; int size = 0; + int height = 0; Vector<float> data; - GLuint buffer = 0; bool dirty = false; Skeleton *dirty_list = nullptr; Transform2D base_transform_2d; + GLuint transforms_texture = 0; + uint64_t version = 1; Dependency dependency; @@ -202,6 +217,11 @@ class MeshStorage : public RendererMeshStorage { private: static MeshStorage *singleton; + struct { + SkeletonShaderGLES3 shader; + RID shader_version; + } skeleton_shader; + /* Mesh */ mutable RID_Owner<Mesh, true> mesh_owner; @@ -214,6 +234,7 @@ private: void _mesh_instance_clear(MeshInstance *mi); void _mesh_instance_add_surface(MeshInstance *mi, Mesh *mesh, uint32_t p_surface); + void _blend_shape_bind_mesh_instance_buffer(MeshInstance *p_mi, uint32_t p_surface); SelfList<MeshInstance>::List dirty_mesh_instance_weights; SelfList<MeshInstance>::List dirty_mesh_instance_arrays; @@ -232,9 +253,10 @@ private: mutable RID_Owner<Skeleton, true> skeleton_owner; - Skeleton *skeleton_dirty_list = nullptr; - _FORCE_INLINE_ void _skeleton_make_dirty(Skeleton *skeleton); + void _compute_skeleton(MeshInstance *p_mi, Skeleton *p_sk, uint32_t p_surface); + + Skeleton *skeleton_dirty_list = nullptr; public: static MeshStorage *get_singleton(); @@ -325,13 +347,13 @@ public: return s->index_count ? s->index_count : s->vertex_count; } - _FORCE_INLINE_ uint32_t mesh_surface_get_lod(void *p_surface, float p_model_scale, float p_distance_threshold, float p_mesh_lod_threshold, uint32_t *r_index_count = nullptr) const { + _FORCE_INLINE_ uint32_t mesh_surface_get_lod(void *p_surface, float p_model_scale, float p_distance_threshold, float p_mesh_lod_threshold, uint32_t &r_index_count) const { Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); + ERR_FAIL_COND_V(!s, 0); int32_t current_lod = -1; - if (r_index_count) { - *r_index_count = s->index_count; - } + r_index_count = s->index_count; + for (uint32_t i = 0; i < s->lod_count; i++) { float screen_size = s->lods[i].edge_length * p_model_scale / p_distance_threshold; if (screen_size > p_mesh_lod_threshold) { @@ -342,9 +364,7 @@ public: if (current_lod == -1) { return 0; } else { - if (r_index_count) { - *r_index_count = s->lods[current_lod].index_count; - } + r_index_count = s->lods[current_lod].index_count; return current_lod + 1; } } @@ -406,6 +426,8 @@ public: virtual void mesh_instance_check_for_update(RID p_mesh_instance) override; virtual void update_mesh_instances() override; + // TODO: considering hashing versions with multimesh buffer RID. + // Doing so would allow us to avoid specifying multimesh buffer pointers every frame and may improve performance. _FORCE_INLINE_ void mesh_instance_surface_get_vertex_arrays_and_format(RID p_mesh_instance, uint32_t p_surface_index, uint32_t p_input_mask, GLuint &r_vertex_array_gl) { MeshInstance *mi = mesh_instance_owner.get_or_null(p_mesh_instance); ERR_FAIL_COND(!mi); @@ -534,9 +556,11 @@ public: virtual void skeleton_update_dependency(RID p_base, DependencyTracker *p_instance) override; - /* OCCLUDER */ + void _update_dirty_skeletons(); - void occluder_set_mesh(RID p_occluder, const PackedVector3Array &p_vertices, const PackedInt32Array &p_indices); + _FORCE_INLINE_ bool skeleton_is_valid(RID p_skeleton) { + return skeleton_owner.get_or_null(p_skeleton) != nullptr; + } }; } // namespace GLES3 diff --git a/drivers/gles3/storage/particles_storage.cpp b/drivers/gles3/storage/particles_storage.cpp index 9ed9fedd5a..1a0d97df01 100644 --- a/drivers/gles3/storage/particles_storage.cpp +++ b/drivers/gles3/storage/particles_storage.cpp @@ -31,6 +31,12 @@ #ifdef GLES3_ENABLED #include "particles_storage.h" +#include "material_storage.h" +#include "mesh_storage.h" +#include "texture_storage.h" +#include "utilities.h" + +#include "servers/rendering/rendering_server_default.h" using namespace GLES3; @@ -42,213 +48,1338 @@ ParticlesStorage *ParticlesStorage::get_singleton() { ParticlesStorage::ParticlesStorage() { singleton = this; + GLES3::MaterialStorage *material_storage = GLES3::MaterialStorage::get_singleton(); + + { + String global_defines; + global_defines += "#define MAX_GLOBAL_SHADER_UNIFORMS 256\n"; // TODO: this is arbitrary for now + material_storage->shaders.particles_process_shader.initialize(global_defines); + } + { + // default material and shader for particles shader + particles_shader.default_shader = material_storage->shader_allocate(); + material_storage->shader_initialize(particles_shader.default_shader); + material_storage->shader_set_code(particles_shader.default_shader, R"( +// Default particles shader. + +shader_type particles; + +void process() { + COLOR = vec4(1.0); +} +)"); + particles_shader.default_material = material_storage->material_allocate(); + material_storage->material_initialize(particles_shader.default_material); + material_storage->material_set_shader(particles_shader.default_material, particles_shader.default_shader); + } + { + particles_shader.copy_shader.initialize(); + particles_shader.copy_shader_version = particles_shader.copy_shader.version_create(); + } } ParticlesStorage::~ParticlesStorage() { singleton = nullptr; + GLES3::MaterialStorage *material_storage = GLES3::MaterialStorage::get_singleton(); + + material_storage->material_free(particles_shader.default_material); + material_storage->shader_free(particles_shader.default_shader); + particles_shader.copy_shader.version_free(particles_shader.copy_shader_version); } /* PARTICLES */ RID ParticlesStorage::particles_allocate() { - return RID(); + return particles_owner.allocate_rid(); } void ParticlesStorage::particles_initialize(RID p_rid) { + particles_owner.initialize_rid(p_rid, Particles()); } void ParticlesStorage::particles_free(RID p_rid) { + update_particles(); + Particles *particles = particles_owner.get_or_null(p_rid); + particles->dependency.deleted_notify(p_rid); + _particles_free_data(particles); + particles_owner.free(p_rid); } void ParticlesStorage::particles_set_mode(RID p_particles, RS::ParticlesMode p_mode) { -} + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + if (particles->mode == p_mode) { + return; + } -void ParticlesStorage::particles_emit(RID p_particles, const Transform3D &p_transform, const Vector3 &p_velocity, const Color &p_color, const Color &p_custom, uint32_t p_emit_flags) { + _particles_free_data(particles); + + particles->mode = p_mode; } void ParticlesStorage::particles_set_emitting(RID p_particles, bool p_emitting) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + + particles->emitting = p_emitting; +} + +bool ParticlesStorage::particles_get_emitting(RID p_particles) { + ERR_FAIL_COND_V_MSG(RSG::threaded, false, "This function should never be used with threaded rendering, as it stalls the renderer."); + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND_V(!particles, false); + + return particles->emitting; +} + +void ParticlesStorage::_particles_free_data(Particles *particles) { + particles->userdata_count = 0; + particles->instance_buffer_size_cache = 0; + particles->instance_buffer_stride_cache = 0; + particles->num_attrib_arrays_cache = 0; + particles->process_buffer_stride_cache = 0; + + if (particles->front_process_buffer != 0) { + glDeleteVertexArrays(1, &particles->front_vertex_array); + glDeleteBuffers(1, &particles->front_process_buffer); + glDeleteBuffers(1, &particles->front_instance_buffer); + particles->front_vertex_array = 0; + particles->front_process_buffer = 0; + particles->front_instance_buffer = 0; + + glDeleteVertexArrays(1, &particles->back_vertex_array); + glDeleteBuffers(1, &particles->back_process_buffer); + glDeleteBuffers(1, &particles->back_instance_buffer); + particles->back_vertex_array = 0; + particles->back_process_buffer = 0; + particles->back_instance_buffer = 0; + } + + if (particles->sort_buffer != 0) { + glDeleteBuffers(1, &particles->last_frame_buffer); + glDeleteBuffers(1, &particles->sort_buffer); + particles->last_frame_buffer = 0; + particles->sort_buffer = 0; + particles->sort_buffer_filled = false; + particles->last_frame_buffer_filled = false; + } + + if (particles->frame_params_ubo != 0) { + glDeleteBuffers(1, &particles->frame_params_ubo); + particles->frame_params_ubo = 0; + } } void ParticlesStorage::particles_set_amount(RID p_particles, int p_amount) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + + if (particles->amount == p_amount) { + return; + } + + _particles_free_data(particles); + + particles->amount = p_amount; + + particles->prev_ticks = 0; + particles->phase = 0; + particles->prev_phase = 0; + particles->clear = true; + + particles->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_PARTICLES); } void ParticlesStorage::particles_set_lifetime(RID p_particles, double p_lifetime) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + particles->lifetime = p_lifetime; } void ParticlesStorage::particles_set_one_shot(RID p_particles, bool p_one_shot) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + particles->one_shot = p_one_shot; } void ParticlesStorage::particles_set_pre_process_time(RID p_particles, double p_time) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + particles->pre_process_time = p_time; } - void ParticlesStorage::particles_set_explosiveness_ratio(RID p_particles, real_t p_ratio) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + particles->explosiveness = p_ratio; } - void ParticlesStorage::particles_set_randomness_ratio(RID p_particles, real_t p_ratio) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + particles->randomness = p_ratio; } void ParticlesStorage::particles_set_custom_aabb(RID p_particles, const AABB &p_aabb) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + particles->custom_aabb = p_aabb; + particles->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_AABB); } void ParticlesStorage::particles_set_speed_scale(RID p_particles, double p_scale) { -} - -void ParticlesStorage::particles_set_use_local_coordinates(RID p_particles, bool p_enable) { -} + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); -void ParticlesStorage::particles_set_process_material(RID p_particles, RID p_material) { + particles->speed_scale = p_scale; } +void ParticlesStorage::particles_set_use_local_coordinates(RID p_particles, bool p_enable) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); -RID ParticlesStorage::particles_get_process_material(RID p_particles) const { - return RID(); + particles->use_local_coords = p_enable; + particles->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_PARTICLES); } void ParticlesStorage::particles_set_fixed_fps(RID p_particles, int p_fps) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + + particles->fixed_fps = p_fps; + + _particles_free_data(particles); + + particles->prev_ticks = 0; + particles->phase = 0; + particles->prev_phase = 0; + particles->clear = true; + + particles->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_PARTICLES); } void ParticlesStorage::particles_set_interpolate(RID p_particles, bool p_enable) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + + particles->interpolate = p_enable; } void ParticlesStorage::particles_set_fractional_delta(RID p_particles, bool p_enable) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + + particles->fractional_delta = p_enable; } -void ParticlesStorage::particles_set_subemitter(RID p_particles, RID p_subemitter_particles) { +void ParticlesStorage::particles_set_trails(RID p_particles, bool p_enable, double p_length) { + if (p_enable) { + WARN_PRINT_ONCE("The OpenGL 3 renderer does not support particle trails"); + } } -void ParticlesStorage::particles_set_view_axis(RID p_particles, const Vector3 &p_axis, const Vector3 &p_up_axis) { +void ParticlesStorage::particles_set_trail_bind_poses(RID p_particles, const Vector<Transform3D> &p_bind_poses) { + if (p_bind_poses.size() != 0) { + WARN_PRINT_ONCE("The OpenGL 3 renderer does not support particle trails"); + } } void ParticlesStorage::particles_set_collision_base_size(RID p_particles, real_t p_size) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + + particles->collision_base_size = p_size; } void ParticlesStorage::particles_set_transform_align(RID p_particles, RS::ParticlesTransformAlign p_transform_align) { -} + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); -void ParticlesStorage::particles_set_trails(RID p_particles, bool p_enable, double p_length) { + particles->transform_align = p_transform_align; } -void ParticlesStorage::particles_set_trail_bind_poses(RID p_particles, const Vector<Transform3D> &p_bind_poses) { +void ParticlesStorage::particles_set_process_material(RID p_particles, RID p_material) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + + particles->process_material = p_material; + particles->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_PARTICLES); //the instance buffer may have changed } -void ParticlesStorage::particles_restart(RID p_particles) { +RID ParticlesStorage::particles_get_process_material(RID p_particles) const { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND_V(!particles, RID()); + + return particles->process_material; } void ParticlesStorage::particles_set_draw_order(RID p_particles, RS::ParticlesDrawOrder p_order) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + + particles->draw_order = p_order; } -void ParticlesStorage::particles_set_draw_passes(RID p_particles, int p_count) { +void ParticlesStorage::particles_set_draw_passes(RID p_particles, int p_passes) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + + particles->draw_passes.resize(p_passes); } void ParticlesStorage::particles_set_draw_pass_mesh(RID p_particles, int p_pass, RID p_mesh) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + ERR_FAIL_INDEX(p_pass, particles->draw_passes.size()); + particles->draw_passes.write[p_pass] = p_mesh; + particles->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_PARTICLES); +} + +void ParticlesStorage::particles_restart(RID p_particles) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + + particles->restart_request = true; +} + +void ParticlesStorage::particles_set_subemitter(RID p_particles, RID p_subemitter_particles) { + if (p_subemitter_particles.is_valid()) { + WARN_PRINT_ONCE("The OpenGL 3 renderer does not support particle sub emitters"); + } +} + +void ParticlesStorage::particles_emit(RID p_particles, const Transform3D &p_transform, const Vector3 &p_velocity, const Color &p_color, const Color &p_custom, uint32_t p_emit_flags) { + WARN_PRINT_ONCE("The OpenGL 3 renderer does not support manually emitting particles"); } void ParticlesStorage::particles_request_process(RID p_particles) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + + if (!particles->dirty) { + particles->dirty = true; + particles->update_list = particle_update_list; + particle_update_list = particles; + } } AABB ParticlesStorage::particles_get_current_aabb(RID p_particles) { - return AABB(); + if (RSG::threaded) { + WARN_PRINT_ONCE("Calling this function with threaded rendering enabled stalls the renderer, use with care."); + } + + const Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND_V(!particles, AABB()); + + int total_amount = particles->amount; + + // If available, read from the sort buffer which should be 2 frames out of date. + // This will help alleviate GPU stalls. + GLuint read_buffer = particles->sort_buffer_filled ? particles->sort_buffer : particles->back_instance_buffer; + + Vector<uint8_t> buffer = Utilities::buffer_get_data(GL_ARRAY_BUFFER, read_buffer, total_amount * sizeof(ParticleInstanceData3D)); + ERR_FAIL_COND_V(buffer.size() != (int)(total_amount * sizeof(ParticleInstanceData3D)), AABB()); + + Transform3D inv = particles->emission_transform.affine_inverse(); + + AABB aabb; + if (buffer.size()) { + bool first = true; + + const uint8_t *data_ptr = (const uint8_t *)buffer.ptr(); + uint32_t particle_data_size = sizeof(ParticleInstanceData3D) + sizeof(float) * particles->userdata_count; + + for (int i = 0; i < total_amount; i++) { + const ParticleInstanceData3D &particle_data = *(const ParticleInstanceData3D *)&data_ptr[particle_data_size * i]; + // If scale is 0.0, we assume the particle is inactive. + if (particle_data.xform[0] > 0.0) { + Vector3 pos = Vector3(particle_data.xform[3], particle_data.xform[7], particle_data.xform[11]); + if (!particles->use_local_coords) { + pos = inv.xform(pos); + } + if (first) { + aabb.position = pos; + first = false; + } else { + aabb.expand_to(pos); + } + } + } + } + + float longest_axis_size = 0; + for (int i = 0; i < particles->draw_passes.size(); i++) { + if (particles->draw_passes[i].is_valid()) { + AABB maabb = MeshStorage::get_singleton()->mesh_get_aabb(particles->draw_passes[i], RID()); + longest_axis_size = MAX(maabb.get_longest_axis_size(), longest_axis_size); + } + } + + aabb.grow_by(longest_axis_size); + + return aabb; } AABB ParticlesStorage::particles_get_aabb(RID p_particles) const { - return AABB(); + const Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND_V(!particles, AABB()); + + return particles->custom_aabb; } void ParticlesStorage::particles_set_emission_transform(RID p_particles, const Transform3D &p_transform) { -} + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); -bool ParticlesStorage::particles_get_emitting(RID p_particles) { - return false; + particles->emission_transform = p_transform; } int ParticlesStorage::particles_get_draw_passes(RID p_particles) const { - return 0; -} + const Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND_V(!particles, 0); -RID ParticlesStorage::particles_get_draw_pass_mesh(RID p_particles, int p_pass) const { - return RID(); + return particles->draw_passes.size(); } -void ParticlesStorage::particles_add_collision(RID p_particles, RID p_instance) { +RID ParticlesStorage::particles_get_draw_pass_mesh(RID p_particles, int p_pass) const { + const Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND_V(!particles, RID()); + ERR_FAIL_INDEX_V(p_pass, particles->draw_passes.size(), RID()); + + return particles->draw_passes[p_pass]; } -void ParticlesStorage::particles_remove_collision(RID p_particles, RID p_instance) { +void ParticlesStorage::particles_add_collision(RID p_particles, RID p_particles_collision_instance) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + particles->collisions.insert(p_particles_collision_instance); +} + +void ParticlesStorage::particles_remove_collision(RID p_particles, RID p_particles_collision_instance) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + particles->collisions.erase(p_particles_collision_instance); +} + +void ParticlesStorage::particles_set_canvas_sdf_collision(RID p_particles, bool p_enable, const Transform2D &p_xform, const Rect2 &p_to_screen, GLuint p_texture) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + particles->has_sdf_collision = p_enable; + particles->sdf_collision_transform = p_xform; + particles->sdf_collision_to_screen = p_to_screen; + particles->sdf_collision_texture = p_texture; +} + +// Does one step of processing particles by reading from back_process_buffer and writing to front_process_buffer. +void ParticlesStorage::_particles_process(Particles *p_particles, double p_delta) { + GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton(); + GLES3::MaterialStorage *material_storage = GLES3::MaterialStorage::get_singleton(); + + double new_phase = Math::fmod(p_particles->phase + (p_delta / p_particles->lifetime) * p_particles->speed_scale, 1.0); + + //update current frame + ParticlesFrameParams frame_params; + + if (p_particles->clear) { + p_particles->cycle_number = 0; + p_particles->random_seed = Math::rand(); + } else if (new_phase < p_particles->phase) { + if (p_particles->one_shot) { + p_particles->emitting = false; + } + p_particles->cycle_number++; + } + + frame_params.emitting = p_particles->emitting; + frame_params.system_phase = new_phase; + frame_params.prev_system_phase = p_particles->phase; + + p_particles->phase = new_phase; + + frame_params.time = RSG::rasterizer->get_total_time(); + frame_params.delta = p_delta * p_particles->speed_scale; + frame_params.random_seed = p_particles->random_seed; + frame_params.explosiveness = p_particles->explosiveness; + frame_params.randomness = p_particles->randomness; + + if (p_particles->use_local_coords) { + GLES3::MaterialStorage::store_transform(Transform3D(), frame_params.emission_transform); + } else { + GLES3::MaterialStorage::store_transform(p_particles->emission_transform, frame_params.emission_transform); + } + + frame_params.cycle = p_particles->cycle_number; + frame_params.frame = p_particles->frame_counter++; + frame_params.pad0 = 0; + frame_params.pad1 = 0; + frame_params.pad2 = 0; + + { //collision and attractors + + frame_params.collider_count = 0; + frame_params.attractor_count = 0; + frame_params.particle_size = p_particles->collision_base_size; + + GLuint collision_heightmap_texture = 0; + + Transform3D to_particles; + if (p_particles->use_local_coords) { + to_particles = p_particles->emission_transform.affine_inverse(); + } + + if (p_particles->has_sdf_collision && p_particles->sdf_collision_texture != 0) { + //2D collision + + Transform2D xform = p_particles->sdf_collision_transform; //will use dotproduct manually so invert beforehand + Transform2D revert = xform.affine_inverse(); + frame_params.collider_count = 1; + frame_params.colliders[0].transform[0] = xform.columns[0][0]; + frame_params.colliders[0].transform[1] = xform.columns[0][1]; + frame_params.colliders[0].transform[2] = 0; + frame_params.colliders[0].transform[3] = xform.columns[2][0]; + + frame_params.colliders[0].transform[4] = xform.columns[1][0]; + frame_params.colliders[0].transform[5] = xform.columns[1][1]; + frame_params.colliders[0].transform[6] = 0; + frame_params.colliders[0].transform[7] = xform.columns[2][1]; + + frame_params.colliders[0].transform[8] = revert.columns[0][0]; + frame_params.colliders[0].transform[9] = revert.columns[0][1]; + frame_params.colliders[0].transform[10] = 0; + frame_params.colliders[0].transform[11] = revert.columns[2][0]; + + frame_params.colliders[0].transform[12] = revert.columns[1][0]; + frame_params.colliders[0].transform[13] = revert.columns[1][1]; + frame_params.colliders[0].transform[14] = 0; + frame_params.colliders[0].transform[15] = revert.columns[2][1]; + + frame_params.colliders[0].extents[0] = p_particles->sdf_collision_to_screen.size.x; + frame_params.colliders[0].extents[1] = p_particles->sdf_collision_to_screen.size.y; + frame_params.colliders[0].extents[2] = p_particles->sdf_collision_to_screen.position.x; + frame_params.colliders[0].scale = p_particles->sdf_collision_to_screen.position.y; + frame_params.colliders[0].type = ParticlesFrameParams::COLLISION_TYPE_2D_SDF; + + collision_heightmap_texture = p_particles->sdf_collision_texture; + } + + for (const RID &E : p_particles->collisions) { + ParticlesCollisionInstance *pci = particles_collision_instance_owner.get_or_null(E); + if (!pci || !pci->active) { + continue; + } + ParticlesCollision *pc = particles_collision_owner.get_or_null(pci->collision); + ERR_CONTINUE(!pc); + + Transform3D to_collider = pci->transform; + if (p_particles->use_local_coords) { + to_collider = to_particles * to_collider; + } + Vector3 scale = to_collider.basis.get_scale(); + to_collider.basis.orthonormalize(); + + if (pc->type <= RS::PARTICLES_COLLISION_TYPE_VECTOR_FIELD_ATTRACT) { + //attractor + if (frame_params.attractor_count >= ParticlesFrameParams::MAX_ATTRACTORS) { + continue; + } + + ParticlesFrameParams::Attractor &attr = frame_params.attractors[frame_params.attractor_count]; + + GLES3::MaterialStorage::store_transform(to_collider, attr.transform); + attr.strength = pc->attractor_strength; + attr.attenuation = pc->attractor_attenuation; + attr.directionality = pc->attractor_directionality; + + switch (pc->type) { + case RS::PARTICLES_COLLISION_TYPE_SPHERE_ATTRACT: { + attr.type = ParticlesFrameParams::ATTRACTOR_TYPE_SPHERE; + float radius = pc->radius; + radius *= (scale.x + scale.y + scale.z) / 3.0; + attr.extents[0] = radius; + attr.extents[1] = radius; + attr.extents[2] = radius; + } break; + case RS::PARTICLES_COLLISION_TYPE_BOX_ATTRACT: { + attr.type = ParticlesFrameParams::ATTRACTOR_TYPE_BOX; + Vector3 extents = pc->extents * scale; + attr.extents[0] = extents.x; + attr.extents[1] = extents.y; + attr.extents[2] = extents.z; + } break; + case RS::PARTICLES_COLLISION_TYPE_VECTOR_FIELD_ATTRACT: { + WARN_PRINT_ONCE("Vector field particle attractors are not available in the OpenGL2 renderer."); + } break; + default: { + } + } + + frame_params.attractor_count++; + } else { + //collider + if (frame_params.collider_count >= ParticlesFrameParams::MAX_COLLIDERS) { + continue; + } + + ParticlesFrameParams::Collider &col = frame_params.colliders[frame_params.collider_count]; + + GLES3::MaterialStorage::store_transform(to_collider, col.transform); + switch (pc->type) { + case RS::PARTICLES_COLLISION_TYPE_SPHERE_COLLIDE: { + col.type = ParticlesFrameParams::COLLISION_TYPE_SPHERE; + float radius = pc->radius; + radius *= (scale.x + scale.y + scale.z) / 3.0; + col.extents[0] = radius; + col.extents[1] = radius; + col.extents[2] = radius; + } break; + case RS::PARTICLES_COLLISION_TYPE_BOX_COLLIDE: { + col.type = ParticlesFrameParams::COLLISION_TYPE_BOX; + Vector3 extents = pc->extents * scale; + col.extents[0] = extents.x; + col.extents[1] = extents.y; + col.extents[2] = extents.z; + } break; + case RS::PARTICLES_COLLISION_TYPE_SDF_COLLIDE: { + WARN_PRINT_ONCE("SDF Particle Colliders are not available in the OpenGL 3 renderer."); + } break; + case RS::PARTICLES_COLLISION_TYPE_HEIGHTFIELD_COLLIDE: { + if (collision_heightmap_texture != 0) { //already taken + continue; + } + + col.type = ParticlesFrameParams::COLLISION_TYPE_HEIGHT_FIELD; + Vector3 extents = pc->extents * scale; + col.extents[0] = extents.x; + col.extents[1] = extents.y; + col.extents[2] = extents.z; + collision_heightmap_texture = pc->heightfield_texture; + } break; + default: { + } + } + + frame_params.collider_count++; + } + } + + // Bind heightmap or SDF texture. + GLuint heightmap = collision_heightmap_texture; + if (heightmap == 0) { + GLES3::Texture *tex = texture_storage->get_texture(texture_storage->texture_gl_get_default(GLES3::DEFAULT_GL_TEXTURE_BLACK)); + heightmap = tex->tex_id; + } + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, heightmap); + } + + if (p_particles->frame_params_ubo == 0) { + glGenBuffers(1, &p_particles->frame_params_ubo); + } + // Update per-frame UBO. + glBindBufferBase(GL_UNIFORM_BUFFER, PARTICLES_FRAME_UNIFORM_LOCATION, p_particles->frame_params_ubo); + glBufferData(GL_UNIFORM_BUFFER, sizeof(ParticlesFrameParams), &frame_params, GL_STREAM_DRAW); + + // Get shader and set shader uniforms; + ParticleProcessMaterialData *m = static_cast<ParticleProcessMaterialData *>(material_storage->material_get_data(p_particles->process_material, RS::SHADER_PARTICLES)); + if (!m) { + m = static_cast<ParticleProcessMaterialData *>(material_storage->material_get_data(particles_shader.default_material, RS::SHADER_PARTICLES)); + } + + ERR_FAIL_COND(!m); + + ParticlesShaderGLES3::ShaderVariant variant = ParticlesShaderGLES3::MODE_DEFAULT; + + uint32_t specialization = 0; + for (uint32_t i = 0; i < p_particles->userdata_count; i++) { + specialization |= (1 << i); + } + + if (p_particles->mode == RS::ParticlesMode::PARTICLES_MODE_3D) { + specialization |= ParticlesShaderGLES3::MODE_3D; + } + + RID version = particles_shader.default_shader_version; + if (m->shader_data->version.is_valid() && m->shader_data->valid) { + // Bind material uniform buffer and textures. + m->bind_uniforms(); + version = m->shader_data->version; + } + + bool success = material_storage->shaders.particles_process_shader.version_bind_shader(version, variant, specialization); + if (!success) { + return; + } + + material_storage->shaders.particles_process_shader.version_set_uniform(ParticlesShaderGLES3::LIFETIME, p_particles->lifetime, version, variant, specialization); + material_storage->shaders.particles_process_shader.version_set_uniform(ParticlesShaderGLES3::CLEAR, p_particles->clear, version, variant, specialization); + material_storage->shaders.particles_process_shader.version_set_uniform(ParticlesShaderGLES3::TOTAL_PARTICLES, uint32_t(p_particles->amount), version, variant, specialization); + material_storage->shaders.particles_process_shader.version_set_uniform(ParticlesShaderGLES3::USE_FRACTIONAL_DELTA, p_particles->fractional_delta, version, variant, specialization); + + p_particles->clear = false; + + p_particles->has_collision_cache = m->shader_data->uses_collision; + + glBindVertexArray(p_particles->back_vertex_array); + + glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, p_particles->front_process_buffer); + + glBeginTransformFeedback(GL_POINTS); + glDrawArrays(GL_POINTS, 0, p_particles->amount); + glEndTransformFeedback(); + + glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, 0); + glBindVertexArray(0); + + SWAP(p_particles->front_process_buffer, p_particles->back_process_buffer); + SWAP(p_particles->front_vertex_array, p_particles->back_vertex_array); } -void ParticlesStorage::particles_set_canvas_sdf_collision(RID p_particles, bool p_enable, const Transform2D &p_xform, const Rect2 &p_to_screen, RID p_texture) { +void ParticlesStorage::particles_set_view_axis(RID p_particles, const Vector3 &p_axis, const Vector3 &p_up_axis) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND(!particles); + + if (particles->draw_order != RS::PARTICLES_DRAW_ORDER_VIEW_DEPTH && particles->transform_align != RS::PARTICLES_TRANSFORM_ALIGN_Z_BILLBOARD && particles->transform_align != RS::PARTICLES_TRANSFORM_ALIGN_Z_BILLBOARD_Y_TO_VELOCITY) { + return; + } + + if (particles->front_process_buffer == 0) { + return; //particles have not processed yet + } + + Vector3 axis = -p_axis; // cameras look to z negative + + if (particles->use_local_coords) { + axis = particles->emission_transform.basis.xform_inv(axis).normalized(); + } + + // Sort will be done on CPU since we don't have compute shaders. + // If the sort_buffer has valid data + // Use a buffer that is 2 frames out of date to avoid stalls. + if (particles->draw_order == RS::PARTICLES_DRAW_ORDER_VIEW_DEPTH && particles->sort_buffer_filled) { + glBindBuffer(GL_ARRAY_BUFFER, particles->sort_buffer); + + ParticleInstanceData3D *particle_array; +#ifndef __EMSCRIPTEN__ + particle_array = static_cast<ParticleInstanceData3D *>(glMapBufferRange(GL_ARRAY_BUFFER, 0, particles->amount * sizeof(ParticleInstanceData3D), GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)); + ERR_FAIL_NULL(particle_array); +#else + LocalVector<ParticleInstanceData3D> particle_vector; + particle_vector.resize(particles->amount); + particle_array = particle_vector.ptr(); + glGetBufferSubData(GL_ARRAY_BUFFER, 0, particles->amount * sizeof(ParticleInstanceData3D), particle_array); +#endif + SortArray<ParticleInstanceData3D, ParticlesViewSort> sorter; + sorter.compare.z_dir = axis; + sorter.sort(particle_array, particles->amount); + +#ifndef __EMSCRIPTEN__ + glUnmapBuffer(GL_ARRAY_BUFFER); +#else + glBufferSubData(GL_ARRAY_BUFFER, 0, particles->amount * sizeof(ParticleInstanceData3D), particle_vector.ptr()); +#endif + } + + glEnable(GL_RASTERIZER_DISCARD); + _particles_update_instance_buffer(particles, axis, p_up_axis); + glDisable(GL_RASTERIZER_DISCARD); +} + +void ParticlesStorage::_particles_update_buffers(Particles *particles) { + GLES3::MaterialStorage *material_storage = GLES3::MaterialStorage::get_singleton(); + uint32_t userdata_count = 0; + + if (particles->process_material.is_valid()) { + GLES3::ParticleProcessMaterialData *material_data = static_cast<GLES3::ParticleProcessMaterialData *>(material_storage->material_get_data(particles->process_material, RS::SHADER_PARTICLES)); + if (material_data && material_data->shader_data->version.is_valid() && material_data->shader_data->valid) { + userdata_count = material_data->shader_data->userdata_count; + } + } + + if (userdata_count != particles->userdata_count) { + // Mismatch userdata, re-create buffers. + _particles_free_data(particles); + } + + if (particles->amount > 0 && particles->front_process_buffer == 0) { + int total_amount = particles->amount; + + particles->userdata_count = userdata_count; + + uint32_t xform_size = particles->mode == RS::PARTICLES_MODE_2D ? 2 : 3; + particles->instance_buffer_stride_cache = sizeof(float) * 4 * (xform_size + 1); + particles->instance_buffer_size_cache = particles->instance_buffer_stride_cache * total_amount; + particles->num_attrib_arrays_cache = 5 + userdata_count + (xform_size - 2); + particles->process_buffer_stride_cache = sizeof(float) * 4 * particles->num_attrib_arrays_cache; + + int process_data_amount = 4 * particles->num_attrib_arrays_cache * total_amount; + float *data = memnew_arr(float, process_data_amount); + + for (int i = 0; i < process_data_amount; i++) { + data[i] = 0; + } + + { + glGenVertexArrays(1, &particles->front_vertex_array); + glBindVertexArray(particles->front_vertex_array); + glGenBuffers(1, &particles->front_process_buffer); + glGenBuffers(1, &particles->front_instance_buffer); + + glBindBuffer(GL_ARRAY_BUFFER, particles->front_process_buffer); + glBufferData(GL_ARRAY_BUFFER, particles->process_buffer_stride_cache * total_amount, data, GL_DYNAMIC_COPY); + + for (uint32_t j = 0; j < particles->num_attrib_arrays_cache; j++) { + glEnableVertexAttribArray(j); + glVertexAttribPointer(j, 4, GL_FLOAT, GL_FALSE, particles->process_buffer_stride_cache, CAST_INT_TO_UCHAR_PTR(sizeof(float) * 4 * j)); + } + glBindVertexArray(0); + + glBindBuffer(GL_ARRAY_BUFFER, particles->front_instance_buffer); + glBufferData(GL_ARRAY_BUFFER, particles->instance_buffer_size_cache, nullptr, GL_DYNAMIC_COPY); + } + + { + glGenVertexArrays(1, &particles->back_vertex_array); + glBindVertexArray(particles->back_vertex_array); + glGenBuffers(1, &particles->back_process_buffer); + glGenBuffers(1, &particles->back_instance_buffer); + + glBindBuffer(GL_ARRAY_BUFFER, particles->back_process_buffer); + glBufferData(GL_ARRAY_BUFFER, particles->process_buffer_stride_cache * total_amount, data, GL_DYNAMIC_COPY); + + for (uint32_t j = 0; j < particles->num_attrib_arrays_cache; j++) { + glEnableVertexAttribArray(j); + glVertexAttribPointer(j, 4, GL_FLOAT, GL_FALSE, particles->process_buffer_stride_cache, CAST_INT_TO_UCHAR_PTR(sizeof(float) * 4 * j)); + } + glBindVertexArray(0); + + glBindBuffer(GL_ARRAY_BUFFER, particles->back_instance_buffer); + glBufferData(GL_ARRAY_BUFFER, particles->instance_buffer_size_cache, nullptr, GL_DYNAMIC_COPY); + } + glBindBuffer(GL_ARRAY_BUFFER, 0); + + memdelete_arr(data); + } +} + +void ParticlesStorage::_particles_allocate_history_buffers(Particles *particles) { + if (particles->sort_buffer == 0) { + glGenBuffers(1, &particles->last_frame_buffer); + glBindBuffer(GL_ARRAY_BUFFER, particles->last_frame_buffer); + glBufferData(GL_ARRAY_BUFFER, particles->instance_buffer_size_cache, nullptr, GL_DYNAMIC_READ); + + glGenBuffers(1, &particles->sort_buffer); + glBindBuffer(GL_ARRAY_BUFFER, particles->sort_buffer); + glBufferData(GL_ARRAY_BUFFER, particles->instance_buffer_size_cache, nullptr, GL_DYNAMIC_READ); + particles->sort_buffer_filled = false; + particles->last_frame_buffer_filled = false; + glBindBuffer(GL_ARRAY_BUFFER, 0); + } +} +void ParticlesStorage::_particles_update_instance_buffer(Particles *particles, const Vector3 &p_axis, const Vector3 &p_up_axis) { + ParticlesCopyShaderGLES3::ShaderVariant variant = ParticlesCopyShaderGLES3::MODE_DEFAULT; + + uint64_t specialization = 0; + if (particles->mode == RS::ParticlesMode::PARTICLES_MODE_3D) { + specialization |= ParticlesCopyShaderGLES3::MODE_3D; + } + + bool success = particles_shader.copy_shader.version_bind_shader(particles_shader.copy_shader_version, variant, specialization); + if (!success) { + return; + } + + // Affect 2D only. + if (particles->use_local_coords) { + // In local mode, particle positions are calculated locally (relative to the node position) + // and they're also drawn locally. + // It works as expected, so we just pass an identity transform. + particles_shader.copy_shader.version_set_uniform(ParticlesCopyShaderGLES3::INV_EMISSION_TRANSFORM, Transform3D(), particles_shader.copy_shader_version, variant, specialization); + } else { + // In global mode, particle positions are calculated globally (relative to the canvas origin) + // but they're drawn locally. + // So, we need to pass the inverse of the emission transform to bring the + // particles to local coordinates before drawing. + Transform3D inv = particles->emission_transform.affine_inverse(); + particles_shader.copy_shader.version_set_uniform(ParticlesCopyShaderGLES3::INV_EMISSION_TRANSFORM, inv, particles_shader.copy_shader_version, variant, specialization); + } + + particles_shader.copy_shader.version_set_uniform(ParticlesCopyShaderGLES3::FRAME_REMAINDER, particles->interpolate ? particles->frame_remainder : 0.0, particles_shader.copy_shader_version, variant, specialization); + particles_shader.copy_shader.version_set_uniform(ParticlesCopyShaderGLES3::ALIGN_MODE, uint32_t(particles->transform_align), particles_shader.copy_shader_version, variant, specialization); + particles_shader.copy_shader.version_set_uniform(ParticlesCopyShaderGLES3::ALIGN_UP, p_up_axis, particles_shader.copy_shader_version, variant, specialization); + particles_shader.copy_shader.version_set_uniform(ParticlesCopyShaderGLES3::SORT_DIRECTION, p_axis, particles_shader.copy_shader_version, variant, specialization); + + glBindVertexArray(particles->back_vertex_array); + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, particles->front_instance_buffer, 0, particles->instance_buffer_size_cache); + glBeginTransformFeedback(GL_POINTS); + + if (particles->draw_order == RS::PARTICLES_DRAW_ORDER_LIFETIME) { + uint32_t lifetime_split = MIN(particles->amount * particles->phase, particles->amount - 1); + uint32_t stride = particles->process_buffer_stride_cache; + + glBindBuffer(GL_ARRAY_BUFFER, particles->back_process_buffer); + + // Offset VBO so you render starting at the newest particle. + if (particles->amount - lifetime_split > 0) { + glEnableVertexAttribArray(0); // Color. + glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(stride * lifetime_split + sizeof(float) * 4 * 0)); + glEnableVertexAttribArray(1); // .xyz: velocity. .z: flags. + glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(stride * lifetime_split + sizeof(float) * 4 * 1)); + glEnableVertexAttribArray(2); // Custom. + glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(stride * lifetime_split + sizeof(float) * 4 * 2)); + glEnableVertexAttribArray(3); // Xform1. + glVertexAttribPointer(3, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(stride * lifetime_split + sizeof(float) * 4 * 3)); + glEnableVertexAttribArray(4); // Xform2. + glVertexAttribPointer(4, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(stride * lifetime_split + sizeof(float) * 4 * 4)); + if (particles->mode == RS::PARTICLES_MODE_3D) { + glEnableVertexAttribArray(5); // Xform3. + glVertexAttribPointer(5, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(stride * lifetime_split + sizeof(float) * 4 * 5)); + } + + uint32_t to_draw = particles->amount - lifetime_split; + glDrawArrays(GL_POINTS, 0, to_draw); + } + + // Then render from index 0 up intil the newest particle. + if (lifetime_split > 0) { + glEndTransformFeedback(); + // Now output to the second portion of the instance buffer. + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, particles->front_instance_buffer, particles->instance_buffer_stride_cache * (particles->amount - lifetime_split), particles->instance_buffer_stride_cache * (lifetime_split)); + glBeginTransformFeedback(GL_POINTS); + // Reset back to normal. + for (uint32_t j = 0; j < particles->num_attrib_arrays_cache; j++) { + glEnableVertexAttribArray(j); + glVertexAttribPointer(j, 4, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(sizeof(float) * 4 * j)); + } + + glDrawArrays(GL_POINTS, 0, lifetime_split); + } + } else { + glDrawArrays(GL_POINTS, 0, particles->amount); + } + + glEndTransformFeedback(); + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, 0, 0, 0); + glBindVertexArray(0); + glBindBuffer(GL_ARRAY_BUFFER, 0); } void ParticlesStorage::update_particles() { + glEnable(GL_RASTERIZER_DISCARD); + + GLuint global_buffer = GLES3::MaterialStorage::get_singleton()->global_shader_parameters_get_uniform_buffer(); + + glBindBufferBase(GL_UNIFORM_BUFFER, PARTICLES_GLOBALS_UNIFORM_LOCATION, global_buffer); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + + while (particle_update_list) { + // Use transform feedback to process particles. + + Particles *particles = particle_update_list; + + particle_update_list = particles->update_list; + particles->update_list = nullptr; + particles->dirty = false; + + _particles_update_buffers(particles); + + if (particles->restart_request) { + particles->prev_ticks = 0; + particles->phase = 0; + particles->prev_phase = 0; + particles->clear = true; + particles->restart_request = false; + } + + if (particles->inactive && !particles->emitting) { + //go next + continue; + } + + if (particles->emitting) { + if (particles->inactive) { + //restart system from scratch + particles->prev_ticks = 0; + particles->phase = 0; + particles->prev_phase = 0; + particles->clear = true; + } + particles->inactive = false; + particles->inactive_time = 0; + } else { + particles->inactive_time += particles->speed_scale * RSG::rasterizer->get_frame_delta_time(); + if (particles->inactive_time > particles->lifetime * 1.2) { + particles->inactive = true; + continue; + } + } + + // Copy the instance buffer that was last used into the last_frame buffer. + // sort_buffer should now be 2 frames out of date. + if (particles->draw_order == RS::PARTICLES_DRAW_ORDER_VIEW_DEPTH || particles->draw_order == RS::PARTICLES_DRAW_ORDER_REVERSE_LIFETIME) { + _particles_allocate_history_buffers(particles); + SWAP(particles->last_frame_buffer, particles->sort_buffer); + + glBindBuffer(GL_COPY_READ_BUFFER, particles->back_instance_buffer); + glBindBuffer(GL_COPY_WRITE_BUFFER, particles->last_frame_buffer); + glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, 0, 0, particles->instance_buffer_size_cache); + + // Last frame's last_frame turned into this frame's sort buffer. + particles->sort_buffer_filled = particles->last_frame_buffer_filled; + particles->sort_buffer_phase = particles->last_frame_phase; + particles->last_frame_buffer_filled = true; + particles->last_frame_phase = particles->phase; + glBindBuffer(GL_COPY_READ_BUFFER, 0); + glBindBuffer(GL_COPY_WRITE_BUFFER, 0); + } + + int fixed_fps = 0; + if (particles->fixed_fps > 0) { + fixed_fps = particles->fixed_fps; + } + + bool zero_time_scale = Engine::get_singleton()->get_time_scale() <= 0.0; + + if (particles->clear && particles->pre_process_time > 0.0) { + double frame_time; + if (fixed_fps > 0) { + frame_time = 1.0 / fixed_fps; + } else { + frame_time = 1.0 / 30.0; + } + + double todo = particles->pre_process_time; + + while (todo >= 0) { + _particles_process(particles, frame_time); + todo -= frame_time; + } + } + + if (fixed_fps > 0) { + double frame_time; + double decr; + if (zero_time_scale) { + frame_time = 0.0; + decr = 1.0 / fixed_fps; + } else { + frame_time = 1.0 / fixed_fps; + decr = frame_time; + } + double delta = RSG::rasterizer->get_frame_delta_time(); + if (delta > 0.1) { //avoid recursive stalls if fps goes below 10 + delta = 0.1; + } else if (delta <= 0.0) { //unlikely but.. + delta = 0.001; + } + double todo = particles->frame_remainder + delta; + + while (todo >= frame_time) { + _particles_process(particles, frame_time); + todo -= decr; + } + + particles->frame_remainder = todo; + + } else { + if (zero_time_scale) { + _particles_process(particles, 0.0); + } else { + _particles_process(particles, RSG::rasterizer->get_frame_delta_time()); + } + } + + // Copy particles to instance buffer and pack Color/Custom. + // We don't have camera information here, so don't copy here if we need camera information for view depth or align mode. + if (particles->draw_order != RS::PARTICLES_DRAW_ORDER_VIEW_DEPTH && particles->transform_align != RS::PARTICLES_TRANSFORM_ALIGN_Z_BILLBOARD && particles->transform_align != RS::PARTICLES_TRANSFORM_ALIGN_Z_BILLBOARD_Y_TO_VELOCITY) { + _particles_update_instance_buffer(particles, Vector3(0.0, 0.0, 0.0), Vector3(0.0, 0.0, 0.0)); + + if (particles->draw_order == RS::PARTICLES_DRAW_ORDER_REVERSE_LIFETIME && particles->sort_buffer_filled) { + if (particles->mode == RS::ParticlesMode::PARTICLES_MODE_2D) { + _particles_reverse_lifetime_sort<ParticleInstanceData2D>(particles); + } else { + _particles_reverse_lifetime_sort<ParticleInstanceData3D>(particles); + } + } + } + + SWAP(particles->front_instance_buffer, particles->back_instance_buffer); + + // At the end of update, the back_buffer contains the most up-to-date-information to read from. + + particles->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_AABB); + } + + glDisable(GL_RASTERIZER_DISCARD); +} + +template <typename ParticleInstanceData> +void ParticlesStorage::_particles_reverse_lifetime_sort(Particles *particles) { + glBindBuffer(GL_ARRAY_BUFFER, particles->sort_buffer); + + ParticleInstanceData *particle_array; + uint32_t buffer_size = particles->amount * sizeof(ParticleInstanceData); +#ifndef __EMSCRIPTEN__ + particle_array = static_cast<ParticleInstanceData *>(glMapBufferRange(GL_ARRAY_BUFFER, 0, buffer_size, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)); + + ERR_FAIL_NULL(particle_array); +#else + LocalVector<ParticleInstanceData> particle_vector; + particle_vector.resize(particles->amount); + particle_array = particle_vector.ptr(); + glGetBufferSubData(GL_ARRAY_BUFFER, 0, buffer_size, particle_array); +#endif + + uint32_t lifetime_split = MIN(particles->amount * particles->sort_buffer_phase, particles->amount - 1); + + for (uint32_t i = 0; i < lifetime_split / 2; i++) { + SWAP(particle_array[i], particle_array[lifetime_split - i]); + } + + for (uint32_t i = 0; i < (particles->amount - lifetime_split) / 2; i++) { + SWAP(particle_array[lifetime_split + i + 1], particle_array[particles->amount - 1 - i]); + } + +#ifndef __EMSCRIPTEN__ + glUnmapBuffer(GL_ARRAY_BUFFER); +#else + glBufferSubData(GL_ARRAY_BUFFER, 0, buffer_size, particle_vector.ptr()); +#endif + glBindBuffer(GL_ARRAY_BUFFER, 0); +} + +Dependency *ParticlesStorage::particles_get_dependency(RID p_particles) const { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_NULL_V(particles, nullptr); + + return &particles->dependency; } bool ParticlesStorage::particles_is_inactive(RID p_particles) const { - return false; + ERR_FAIL_COND_V_MSG(RSG::threaded, false, "This function should never be used with threaded rendering, as it stalls the renderer."); + const Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND_V(!particles, false); + return !particles->emitting && particles->inactive; } -/* PARTICLES COLLISION */ +/* PARTICLES COLLISION API */ RID ParticlesStorage::particles_collision_allocate() { - return RID(); + return particles_collision_owner.allocate_rid(); } - void ParticlesStorage::particles_collision_initialize(RID p_rid) { + particles_collision_owner.initialize_rid(p_rid, ParticlesCollision()); } void ParticlesStorage::particles_collision_free(RID p_rid) { + ParticlesCollision *particles_collision = particles_collision_owner.get_or_null(p_rid); + + if (particles_collision->heightfield_texture != 0) { + glDeleteTextures(1, &particles_collision->heightfield_texture); + particles_collision->heightfield_texture = 0; + glDeleteFramebuffers(1, &particles_collision->heightfield_fb); + particles_collision->heightfield_fb = 0; + } + particles_collision->dependency.deleted_notify(p_rid); + particles_collision_owner.free(p_rid); +} + +GLuint ParticlesStorage::particles_collision_get_heightfield_framebuffer(RID p_particles_collision) const { + ParticlesCollision *particles_collision = particles_collision_owner.get_or_null(p_particles_collision); + ERR_FAIL_COND_V(!particles_collision, 0); + ERR_FAIL_COND_V(particles_collision->type != RS::PARTICLES_COLLISION_TYPE_HEIGHTFIELD_COLLIDE, 0); + + if (particles_collision->heightfield_texture == 0) { + //create + const int resolutions[RS::PARTICLES_COLLISION_HEIGHTFIELD_RESOLUTION_MAX] = { 256, 512, 1024, 2048, 4096, 8192 }; + Size2i size; + if (particles_collision->extents.x > particles_collision->extents.z) { + size.x = resolutions[particles_collision->heightfield_resolution]; + size.y = int32_t(particles_collision->extents.z / particles_collision->extents.x * size.x); + } else { + size.y = resolutions[particles_collision->heightfield_resolution]; + size.x = int32_t(particles_collision->extents.x / particles_collision->extents.z * size.y); + } + + glGenTextures(1, &particles_collision->heightfield_texture); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, particles_collision->heightfield_texture); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT32F, size.x, size.y, 0, GL_DEPTH_COMPONENT, GL_FLOAT, nullptr); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + + glGenFramebuffers(1, &particles_collision->heightfield_fb); + glBindFramebuffer(GL_FRAMEBUFFER, particles_collision->heightfield_fb); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, particles_collision->heightfield_texture, 0); +#ifdef DEBUG_ENABLED + GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); + if (status != GL_FRAMEBUFFER_COMPLETE) { + WARN_PRINT("Could create heightmap texture status: " + GLES3::TextureStorage::get_singleton()->get_framebuffer_error(status)); + } +#endif + particles_collision->heightfield_fb_size = size; + + glBindTexture(GL_TEXTURE_2D, 0); + glBindFramebuffer(GL_FRAMEBUFFER, 0); + } + + return particles_collision->heightfield_fb; } void ParticlesStorage::particles_collision_set_collision_type(RID p_particles_collision, RS::ParticlesCollisionType p_type) { + ParticlesCollision *particles_collision = particles_collision_owner.get_or_null(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + + if (p_type == particles_collision->type) { + return; + } + + if (particles_collision->heightfield_texture != 0) { + glDeleteTextures(1, &particles_collision->heightfield_texture); + particles_collision->heightfield_texture = 0; + glDeleteFramebuffers(1, &particles_collision->heightfield_fb); + particles_collision->heightfield_fb = 0; + } + + particles_collision->type = p_type; + particles_collision->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_AABB); } void ParticlesStorage::particles_collision_set_cull_mask(RID p_particles_collision, uint32_t p_cull_mask) { + ParticlesCollision *particles_collision = particles_collision_owner.get_or_null(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + particles_collision->cull_mask = p_cull_mask; } void ParticlesStorage::particles_collision_set_sphere_radius(RID p_particles_collision, real_t p_radius) { + ParticlesCollision *particles_collision = particles_collision_owner.get_or_null(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + + particles_collision->radius = p_radius; + particles_collision->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_AABB); } void ParticlesStorage::particles_collision_set_box_extents(RID p_particles_collision, const Vector3 &p_extents) { + ParticlesCollision *particles_collision = particles_collision_owner.get_or_null(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + + particles_collision->extents = p_extents; + particles_collision->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_AABB); } void ParticlesStorage::particles_collision_set_attractor_strength(RID p_particles_collision, real_t p_strength) { + ParticlesCollision *particles_collision = particles_collision_owner.get_or_null(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + + particles_collision->attractor_strength = p_strength; } void ParticlesStorage::particles_collision_set_attractor_directionality(RID p_particles_collision, real_t p_directionality) { + ParticlesCollision *particles_collision = particles_collision_owner.get_or_null(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + + particles_collision->attractor_directionality = p_directionality; } void ParticlesStorage::particles_collision_set_attractor_attenuation(RID p_particles_collision, real_t p_curve) { + ParticlesCollision *particles_collision = particles_collision_owner.get_or_null(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + + particles_collision->attractor_attenuation = p_curve; } void ParticlesStorage::particles_collision_set_field_texture(RID p_particles_collision, RID p_texture) { + WARN_PRINT_ONCE("The OpenGL 3 renderer does not support SDF collisions in 3D particle shaders"); } void ParticlesStorage::particles_collision_height_field_update(RID p_particles_collision) { + ParticlesCollision *particles_collision = particles_collision_owner.get_or_null(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + particles_collision->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_AABB); } void ParticlesStorage::particles_collision_set_height_field_resolution(RID p_particles_collision, RS::ParticlesCollisionHeightfieldResolution p_resolution) { + ParticlesCollision *particles_collision = particles_collision_owner.get_or_null(p_particles_collision); + ERR_FAIL_COND(!particles_collision); + ERR_FAIL_INDEX(p_resolution, RS::PARTICLES_COLLISION_HEIGHTFIELD_RESOLUTION_MAX); + + if (particles_collision->heightfield_resolution == p_resolution) { + return; + } + + particles_collision->heightfield_resolution = p_resolution; + + if (particles_collision->heightfield_texture != 0) { + glDeleteTextures(1, &particles_collision->heightfield_texture); + particles_collision->heightfield_texture = 0; + glDeleteFramebuffers(1, &particles_collision->heightfield_fb); + particles_collision->heightfield_fb = 0; + } } AABB ParticlesStorage::particles_collision_get_aabb(RID p_particles_collision) const { - return AABB(); + ParticlesCollision *particles_collision = particles_collision_owner.get_or_null(p_particles_collision); + ERR_FAIL_COND_V(!particles_collision, AABB()); + + switch (particles_collision->type) { + case RS::PARTICLES_COLLISION_TYPE_SPHERE_ATTRACT: + case RS::PARTICLES_COLLISION_TYPE_SPHERE_COLLIDE: { + AABB aabb; + aabb.position = -Vector3(1, 1, 1) * particles_collision->radius; + aabb.size = Vector3(2, 2, 2) * particles_collision->radius; + return aabb; + } + default: { + AABB aabb; + aabb.position = -particles_collision->extents; + aabb.size = particles_collision->extents * 2; + return aabb; + } + } +} + +Vector3 ParticlesStorage::particles_collision_get_extents(RID p_particles_collision) const { + const ParticlesCollision *particles_collision = particles_collision_owner.get_or_null(p_particles_collision); + ERR_FAIL_COND_V(!particles_collision, Vector3()); + return particles_collision->extents; } bool ParticlesStorage::particles_collision_is_heightfield(RID p_particles_collision) const { - return false; + const ParticlesCollision *particles_collision = particles_collision_owner.get_or_null(p_particles_collision); + ERR_FAIL_COND_V(!particles_collision, false); + return particles_collision->type == RS::PARTICLES_COLLISION_TYPE_HEIGHTFIELD_COLLIDE; } -RID ParticlesStorage::particles_collision_get_heightfield_framebuffer(RID p_particles_collision) const { - return RID(); +Dependency *ParticlesStorage::particles_collision_get_dependency(RID p_particles_collision) const { + ParticlesCollision *pc = particles_collision_owner.get_or_null(p_particles_collision); + ERR_FAIL_NULL_V(pc, nullptr); + + return &pc->dependency; } +/* Particles collision instance */ + RID ParticlesStorage::particles_collision_instance_create(RID p_collision) { - return RID(); + ParticlesCollisionInstance pci; + pci.collision = p_collision; + return particles_collision_instance_owner.make_rid(pci); } void ParticlesStorage::particles_collision_instance_free(RID p_rid) { + particles_collision_instance_owner.free(p_rid); } void ParticlesStorage::particles_collision_instance_set_transform(RID p_collision_instance, const Transform3D &p_transform) { + ParticlesCollisionInstance *pci = particles_collision_instance_owner.get_or_null(p_collision_instance); + ERR_FAIL_COND(!pci); + pci->transform = p_transform; } void ParticlesStorage::particles_collision_instance_set_active(RID p_collision_instance, bool p_active) { + ParticlesCollisionInstance *pci = particles_collision_instance_owner.get_or_null(p_collision_instance); + ERR_FAIL_COND(!pci); + pci->active = p_active; } #endif // GLES3_ENABLED diff --git a/drivers/gles3/storage/particles_storage.h b/drivers/gles3/storage/particles_storage.h index 84d1f94d8c..434718006e 100644 --- a/drivers/gles3/storage/particles_storage.h +++ b/drivers/gles3/storage/particles_storage.h @@ -33,25 +33,283 @@ #ifdef GLES3_ENABLED +#include "../shaders/particles_copy.glsl.gen.h" #include "core/templates/local_vector.h" #include "core/templates/rid_owner.h" #include "core/templates/self_list.h" #include "servers/rendering/storage/particles_storage.h" +#include "servers/rendering/storage/utilities.h" + +#include "platform_config.h" +#ifndef OPENGL_INCLUDE_H +#include <GLES3/gl3.h> +#else +#include OPENGL_INCLUDE_H +#endif namespace GLES3 { +enum ParticlesUniformLocation { + PARTICLES_FRAME_UNIFORM_LOCATION, + PARTICLES_GLOBALS_UNIFORM_LOCATION, + PARTICLES_MATERIAL_UNIFORM_LOCATION, +}; + class ParticlesStorage : public RendererParticlesStorage { private: static ParticlesStorage *singleton; + /* PARTICLES */ + + struct ParticleInstanceData3D { + float xform[12]; + float color[2]; // Color and custom are packed together into one vec4; + float custom[2]; + }; + + struct ParticleInstanceData2D { + float xform[8]; + float color[2]; // Color and custom are packed together into one vec4; + float custom[2]; + }; + + struct ParticlesViewSort { + Vector3 z_dir; + bool operator()(const ParticleInstanceData3D &p_a, const ParticleInstanceData3D &p_b) const { + return z_dir.dot(Vector3(p_a.xform[3], p_a.xform[7], p_a.xform[11])) < z_dir.dot(Vector3(p_b.xform[3], p_b.xform[7], p_b.xform[11])); + } + }; + + struct ParticlesFrameParams { + enum { + MAX_ATTRACTORS = 32, + MAX_COLLIDERS = 32, + MAX_3D_TEXTURES = 0 // GLES3 renderer doesn't support using 3D textures for flow field or collisions. + }; + + enum AttractorType { + ATTRACTOR_TYPE_SPHERE, + ATTRACTOR_TYPE_BOX, + ATTRACTOR_TYPE_VECTOR_FIELD, + }; + + struct Attractor { + float transform[16]; + float extents[4]; // Extents or radius. w-channel is padding. + + uint32_t type; + float strength; + float attenuation; + float directionality; + }; + + enum CollisionType { + COLLISION_TYPE_SPHERE, + COLLISION_TYPE_BOX, + COLLISION_TYPE_SDF, + COLLISION_TYPE_HEIGHT_FIELD, + COLLISION_TYPE_2D_SDF, + + }; + + struct Collider { + float transform[16]; + float extents[4]; // Extents or radius. w-channel is padding. + + uint32_t type; + float scale; + float pad0; + float pad1; + }; + + uint32_t emitting; + uint32_t cycle; + float system_phase; + float prev_system_phase; + + float explosiveness; + float randomness; + float time; + float delta; + + float particle_size; + float pad0; + float pad1; + float pad2; + + uint32_t random_seed; + uint32_t attractor_count; + uint32_t collider_count; + uint32_t frame; + + float emission_transform[16]; + + Attractor attractors[MAX_ATTRACTORS]; + Collider colliders[MAX_COLLIDERS]; + }; + + struct Particles { + RS::ParticlesMode mode = RS::PARTICLES_MODE_3D; + bool inactive = true; + double inactive_time = 0.0; + bool emitting = false; + bool one_shot = false; + int amount = 0; + double lifetime = 1.0; + double pre_process_time = 0.0; + real_t explosiveness = 0.0; + real_t randomness = 0.0; + bool restart_request = false; + AABB custom_aabb = AABB(Vector3(-4, -4, -4), Vector3(8, 8, 8)); + bool use_local_coords = false; + bool has_collision_cache = false; + + bool has_sdf_collision = false; + Transform2D sdf_collision_transform; + Rect2 sdf_collision_to_screen; + GLuint sdf_collision_texture = 0; + + RID process_material; + uint32_t frame_counter = 0; + RS::ParticlesTransformAlign transform_align = RS::PARTICLES_TRANSFORM_ALIGN_DISABLED; + + RS::ParticlesDrawOrder draw_order = RS::PARTICLES_DRAW_ORDER_INDEX; + + Vector<RID> draw_passes; + + GLuint frame_params_ubo = 0; + + // We may process particles multiple times each frame (if they have a fixed FPS higher than the game FPS). + // Unfortunately, this means we can't just use a round-robin system of 3 buffers. + // To ensure the sort buffer is accurate, we copy the last frame instance buffer just before processing. + + // Transform Feedback buffer and VAO for rendering. + // Each frame we render to this one. + GLuint front_vertex_array = 0; // Binds process buffer. Used for processing. + GLuint front_process_buffer = 0; // Transform + color + custom data + userdata + velocity + flags. Only needed for processing. + GLuint front_instance_buffer = 0; // Transform + color + custom data. In packed format needed for rendering. + + // VAO for transform feedback, contains last frame's data. + // Read from this one for particles process and then copy to last frame buffer. + GLuint back_vertex_array = 0; // Binds process buffer. Used for processing. + GLuint back_process_buffer = 0; // Transform + color + custom data + userdata + velocity + flags. Only needed for processing. + GLuint back_instance_buffer = 0; // Transform + color + custom data. In packed format needed for rendering. + + uint32_t instance_buffer_size_cache = 0; + uint32_t instance_buffer_stride_cache = 0; + uint32_t num_attrib_arrays_cache = 0; + uint32_t process_buffer_stride_cache = 0; + + // Only ever copied to, holds last frame's instance data, then swaps with sort_buffer. + GLuint last_frame_buffer = 0; + bool last_frame_buffer_filled = false; + float last_frame_phase = 0.0; + + // The frame-before-last's instance buffer. + // Use this to copy data back for sorting or computing AABB. + GLuint sort_buffer = 0; + bool sort_buffer_filled = false; + float sort_buffer_phase = 0.0; + + uint32_t userdata_count = 0; + + bool dirty = false; + Particles *update_list = nullptr; + + double phase = 0.0; + double prev_phase = 0.0; + uint64_t prev_ticks = 0; + uint32_t random_seed = 0; + + uint32_t cycle_number = 0; + + double speed_scale = 1.0; + + int fixed_fps = 30; + bool interpolate = true; + bool fractional_delta = false; + double frame_remainder = 0; + real_t collision_base_size = 0.01; + + bool clear = true; + + Transform3D emission_transform; + + HashSet<RID> collisions; + + Dependency dependency; + + double trail_length = 1.0; + bool trails_enabled = false; + + Particles() { + } + }; + + void _particles_process(Particles *p_particles, double p_delta); + void _particles_free_data(Particles *particles); + void _particles_update_buffers(Particles *particles); + void _particles_allocate_history_buffers(Particles *particles); + void _particles_update_instance_buffer(Particles *particles, const Vector3 &p_axis, const Vector3 &p_up_axis); + + template <typename T> + void _particles_reverse_lifetime_sort(Particles *particles); + + struct ParticlesShader { + RID default_shader; + RID default_material; + RID default_shader_version; + + ParticlesCopyShaderGLES3 copy_shader; + RID copy_shader_version; + } particles_shader; + + Particles *particle_update_list = nullptr; + + mutable RID_Owner<Particles, true> particles_owner; + + /* Particles Collision */ + + struct ParticlesCollision { + RS::ParticlesCollisionType type = RS::PARTICLES_COLLISION_TYPE_SPHERE_ATTRACT; + uint32_t cull_mask = 0xFFFFFFFF; + float radius = 1.0; + Vector3 extents = Vector3(1, 1, 1); + float attractor_strength = 1.0; + float attractor_attenuation = 1.0; + float attractor_directionality = 0.0; + GLuint field_texture = 0; + GLuint heightfield_texture = 0; + GLuint heightfield_fb = 0; + Size2i heightfield_fb_size; + + RS::ParticlesCollisionHeightfieldResolution heightfield_resolution = RS::PARTICLES_COLLISION_HEIGHTFIELD_RESOLUTION_1024; + + Dependency dependency; + }; + + struct ParticlesCollisionInstance { + RID collision; + Transform3D transform; + bool active = false; + }; + + mutable RID_Owner<ParticlesCollision, true> particles_collision_owner; + + mutable RID_Owner<ParticlesCollisionInstance> particles_collision_instance_owner; + public: static ParticlesStorage *get_singleton(); ParticlesStorage(); virtual ~ParticlesStorage(); + bool free(RID p_rid); + /* PARTICLES */ + bool owns_particles(RID p_rid) { return particles_owner.owns(p_rid); } + virtual RID particles_allocate() override; virtual void particles_initialize(RID p_rid) override; virtual void particles_free(RID p_rid) override; @@ -102,12 +360,51 @@ public: virtual void particles_add_collision(RID p_particles, RID p_instance) override; virtual void particles_remove_collision(RID p_particles, RID p_instance) override; - virtual void particles_set_canvas_sdf_collision(RID p_particles, bool p_enable, const Transform2D &p_xform, const Rect2 &p_to_screen, RID p_texture) override; + void particles_set_canvas_sdf_collision(RID p_particles, bool p_enable, const Transform2D &p_xform, const Rect2 &p_to_screen, GLuint p_texture); virtual void update_particles() override; virtual bool particles_is_inactive(RID p_particles) const override; + _FORCE_INLINE_ RS::ParticlesMode particles_get_mode(RID p_particles) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND_V(!particles, RS::PARTICLES_MODE_2D); + return particles->mode; + } + + _FORCE_INLINE_ uint32_t particles_get_amount(RID p_particles) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND_V(!particles, 0); + + return particles->amount; + } + + _FORCE_INLINE_ GLuint particles_get_gl_buffer(RID p_particles) { + Particles *particles = particles_owner.get_or_null(p_particles); + + if ((particles->draw_order == RS::PARTICLES_DRAW_ORDER_VIEW_DEPTH || particles->draw_order == RS::PARTICLES_DRAW_ORDER_REVERSE_LIFETIME) && particles->sort_buffer_filled) { + return particles->sort_buffer; + } + return particles->back_instance_buffer; + } + + _FORCE_INLINE_ bool particles_has_collision(RID p_particles) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND_V(!particles, 0); + + return particles->has_collision_cache; + } + + _FORCE_INLINE_ uint32_t particles_is_using_local_coords(RID p_particles) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_COND_V(!particles, false); + + return particles->use_local_coords; + } + + Dependency *particles_get_dependency(RID p_particles) const; + /* PARTICLES COLLISION */ + bool owns_particles_collision(RID p_rid) { return particles_collision_owner.owns(p_rid); } virtual RID particles_collision_allocate() override; virtual void particles_collision_initialize(RID p_rid) override; @@ -124,8 +421,22 @@ public: virtual void particles_collision_height_field_update(RID p_particles_collision) override; virtual void particles_collision_set_height_field_resolution(RID p_particles_collision, RS::ParticlesCollisionHeightfieldResolution p_resolution) override; virtual AABB particles_collision_get_aabb(RID p_particles_collision) const override; + Vector3 particles_collision_get_extents(RID p_particles_collision) const; virtual bool particles_collision_is_heightfield(RID p_particles_collision) const override; - virtual RID particles_collision_get_heightfield_framebuffer(RID p_particles_collision) const override; + GLuint particles_collision_get_heightfield_framebuffer(RID p_particles_collision) const; + + _FORCE_INLINE_ Size2i particles_collision_get_heightfield_size(RID p_particles_collision) const { + ParticlesCollision *particles_collision = particles_collision_owner.get_or_null(p_particles_collision); + ERR_FAIL_COND_V(!particles_collision, Size2i()); + ERR_FAIL_COND_V(particles_collision->type != RS::PARTICLES_COLLISION_TYPE_HEIGHTFIELD_COLLIDE, Size2i()); + + return particles_collision->heightfield_fb_size; + } + + Dependency *particles_collision_get_dependency(RID p_particles) const; + + /* PARTICLES COLLISION INSTANCE*/ + bool owns_particles_collision_instance(RID p_rid) { return particles_collision_instance_owner.owns(p_rid); } virtual RID particles_collision_instance_create(RID p_collision) override; virtual void particles_collision_instance_free(RID p_rid) override; diff --git a/drivers/gles3/storage/render_scene_buffers_gles3.cpp b/drivers/gles3/storage/render_scene_buffers_gles3.cpp index 9123984dc7..e0e78de728 100644 --- a/drivers/gles3/storage/render_scene_buffers_gles3.cpp +++ b/drivers/gles3/storage/render_scene_buffers_gles3.cpp @@ -50,54 +50,16 @@ void RenderSceneBuffersGLES3::configure(RID p_render_target, const Size2i p_inte //msaa = p_msaa; //screen_space_aa = p_screen_space_aa; //use_debanding = p_use_debanding; - //view_count = p_view_count; + view_count = p_view_count; free_render_buffer_data(); GLES3::RenderTarget *rt = texture_storage->get_render_target(p_render_target); is_transparent = rt->is_transparent; - - // framebuffer - glGenFramebuffers(1, &framebuffer); - glBindFramebuffer(GL_FRAMEBUFFER, framebuffer); - - glBindTexture(GL_TEXTURE_2D, rt->color); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rt->color, 0); - - glGenTextures(1, &depth_texture); - glBindTexture(GL_TEXTURE_2D, depth_texture); - - glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT24, rt->size.x, rt->size.y, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, nullptr); - - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depth_texture, 0); - - GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); - - glBindTexture(GL_TEXTURE_2D, 0); - glBindFramebuffer(GL_FRAMEBUFFER, texture_storage->system_fbo); - - if (status != GL_FRAMEBUFFER_COMPLETE) { - free_render_buffer_data(); - WARN_PRINT("Could not create 3D renderbuffer, status: " + texture_storage->get_framebuffer_error(status)); - return; - } } void RenderSceneBuffersGLES3::free_render_buffer_data() { - if (depth_texture) { - glDeleteTextures(1, &depth_texture); - depth_texture = 0; - } - if (framebuffer) { - glDeleteFramebuffers(1, &framebuffer); - framebuffer = 0; - } } #endif // GLES3_ENABLED diff --git a/drivers/gles3/storage/render_scene_buffers_gles3.h b/drivers/gles3/storage/render_scene_buffers_gles3.h index ad0d2032b0..092c14e1b8 100644 --- a/drivers/gles3/storage/render_scene_buffers_gles3.h +++ b/drivers/gles3/storage/render_scene_buffers_gles3.h @@ -56,14 +56,11 @@ public: RS::ViewportMSAA msaa = RS::VIEWPORT_MSAA_DISABLED; //RS::ViewportScreenSpaceAA screen_space_aa = RS::VIEWPORT_SCREEN_SPACE_AA_DISABLED; //bool use_debanding = false; - //uint32_t view_count = 1; + uint32_t view_count = 1; bool is_transparent = false; RID render_target; - GLuint internal_texture = 0; // Used for rendering when post effects are enabled - GLuint depth_texture = 0; // Main depth texture - GLuint framebuffer = 0; // Main framebuffer, contains internal_texture and depth_texture or render_target->color and depth_texture //built-in textures used for ping pong image processing and blurring struct Blur { diff --git a/drivers/gles3/storage/texture_storage.cpp b/drivers/gles3/storage/texture_storage.cpp index 442bd69b55..99908d197a 100644 --- a/drivers/gles3/storage/texture_storage.cpp +++ b/drivers/gles3/storage/texture_storage.cpp @@ -34,6 +34,10 @@ #include "config.h" #include "drivers/gles3/effects/copy_effects.h" +#ifdef ANDROID_ENABLED +#define glFramebufferTextureMultiviewOVR GLES3::Config::get_singleton()->eglFramebufferTextureMultiviewOVR +#endif + using namespace GLES3; TextureStorage *TextureStorage::singleton = nullptr; @@ -59,9 +63,7 @@ TextureStorage::TextureStorage() { { //create default textures { // White Textures - Ref<Image> image; - image.instantiate(); - image->create(4, 4, true, Image::FORMAT_RGBA8); + Ref<Image> image = Image::create_empty(4, 4, true, Image::FORMAT_RGBA8); image->fill(Color(1, 1, 1, 1)); image->generate_mipmaps(); @@ -90,9 +92,7 @@ TextureStorage::TextureStorage() { } { // black - Ref<Image> image; - image.instantiate(); - image->create(4, 4, true, Image::FORMAT_RGBA8); + Ref<Image> image = Image::create_empty(4, 4, true, Image::FORMAT_RGBA8); image->fill(Color(0, 0, 0, 1)); image->generate_mipmaps(); @@ -116,9 +116,7 @@ TextureStorage::TextureStorage() { } { // transparent black - Ref<Image> image; - image.instantiate(); - image->create(4, 4, true, Image::FORMAT_RGBA8); + Ref<Image> image = Image::create_empty(4, 4, true, Image::FORMAT_RGBA8); image->fill(Color(0, 0, 0, 0)); image->generate_mipmaps(); @@ -127,9 +125,7 @@ TextureStorage::TextureStorage() { } { - Ref<Image> image; - image.instantiate(); - image->create(4, 4, true, Image::FORMAT_RGBA8); + Ref<Image> image = Image::create_empty(4, 4, true, Image::FORMAT_RGBA8); image->fill(Color(0.5, 0.5, 1, 1)); image->generate_mipmaps(); @@ -138,9 +134,7 @@ TextureStorage::TextureStorage() { } { - Ref<Image> image; - image.instantiate(); - image->create(4, 4, true, Image::FORMAT_RGBA8); + Ref<Image> image = Image::create_empty(4, 4, true, Image::FORMAT_RGBA8); image->fill(Color(1.0, 0.5, 1, 1)); image->generate_mipmaps(); @@ -197,6 +191,27 @@ TextureStorage::TextureStorage() { glBindTexture(GL_TEXTURE_2D, 0); + { // Atlas Texture initialize. + uint8_t pixel_data[4 * 4 * 4]; + for (int i = 0; i < 16; i++) { + pixel_data[i * 4 + 0] = 0; + pixel_data[i * 4 + 1] = 0; + pixel_data[i * 4 + 2] = 0; + pixel_data[i * 4 + 3] = 255; + } + + glGenTextures(1, &texture_atlas.texture); + glBindTexture(GL_TEXTURE_2D, texture_atlas.texture); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 4, 4, 0, GL_RGBA, GL_UNSIGNED_BYTE, pixel_data); + } + + glBindTexture(GL_TEXTURE_2D, 0); + + { + sdf_shader.shader.initialize(); + sdf_shader.shader_version = sdf_shader.shader.version_create(); + } + #ifdef GLES_OVER_GL glEnable(GL_PROGRAM_POINT_SIZE); #endif @@ -207,6 +222,12 @@ TextureStorage::~TextureStorage() { for (int i = 0; i < DEFAULT_GL_TEXTURE_MAX; i++) { texture_free(default_gl_textures[i]); } + + glDeleteTextures(1, &texture_atlas.texture); + texture_atlas.texture = 0; + glDeleteFramebuffers(1, &texture_atlas.framebuffer); + texture_atlas.framebuffer = 0; + sdf_shader.shader.version_free(sdf_shader.shader_version); } //TODO, move back to storage @@ -230,6 +251,8 @@ void TextureStorage::canvas_texture_free(RID p_rid) { void TextureStorage::canvas_texture_set_channel(RID p_canvas_texture, RS::CanvasTextureChannel p_channel, RID p_texture) { CanvasTexture *ct = canvas_texture_owner.get_or_null(p_canvas_texture); + ERR_FAIL_NULL(ct); + switch (p_channel) { case RS::CANVAS_TEXTURE_CHANNEL_DIFFUSE: { ct->diffuse = p_texture; @@ -245,6 +268,8 @@ void TextureStorage::canvas_texture_set_channel(RID p_canvas_texture, RS::Canvas void TextureStorage::canvas_texture_set_shading_parameters(RID p_canvas_texture, const Color &p_specular_color, float p_shininess) { CanvasTexture *ct = canvas_texture_owner.get_or_null(p_canvas_texture); + ERR_FAIL_NULL(ct); + ct->specular_color.r = p_specular_color.r; ct->specular_color.g = p_specular_color.g; ct->specular_color.b = p_specular_color.b; @@ -253,61 +278,16 @@ void TextureStorage::canvas_texture_set_shading_parameters(RID p_canvas_texture, void TextureStorage::canvas_texture_set_texture_filter(RID p_canvas_texture, RS::CanvasItemTextureFilter p_filter) { CanvasTexture *ct = canvas_texture_owner.get_or_null(p_canvas_texture); + ERR_FAIL_NULL(ct); + ct->texture_filter = p_filter; } void TextureStorage::canvas_texture_set_texture_repeat(RID p_canvas_texture, RS::CanvasItemTextureRepeat p_repeat) { CanvasTexture *ct = canvas_texture_owner.get_or_null(p_canvas_texture); - ct->texture_repeat = p_repeat; -} - -/* CANVAS SHADOW */ - -RID TextureStorage::canvas_light_shadow_buffer_create(int p_width) { - Config *config = Config::get_singleton(); - CanvasLightShadow *cls = memnew(CanvasLightShadow); - - if (p_width > config->max_texture_size) { - p_width = config->max_texture_size; - } - - cls->size = p_width; - cls->height = 16; + ERR_FAIL_NULL(ct); - glActiveTexture(GL_TEXTURE0); - - glGenFramebuffers(1, &cls->fbo); - glBindFramebuffer(GL_FRAMEBUFFER, cls->fbo); - - glGenRenderbuffers(1, &cls->depth); - glBindRenderbuffer(GL_RENDERBUFFER, cls->depth); - glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT16, cls->size, cls->height); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, cls->depth); - - glGenTextures(1, &cls->distance); - glBindTexture(GL_TEXTURE_2D, cls->distance); - if (config->use_rgba_2d_shadows) { - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, cls->size, cls->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); - } else { - glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, cls->size, cls->height, 0, GL_RED, GL_FLOAT, nullptr); - } - - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, cls->distance, 0); - - GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); - //printf("errnum: %x\n",status); - glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); - - if (status != GL_FRAMEBUFFER_COMPLETE) { - memdelete(cls); - ERR_FAIL_COND_V(status != GL_FRAMEBUFFER_COMPLETE, RID()); - } - - return canvas_light_shadow_owner.make_rid(cls); + ct->texture_repeat = p_repeat; } /* Texture API */ @@ -642,7 +622,9 @@ void TextureStorage::texture_free(RID p_texture) { } if (t->tex_id != 0) { - glDeleteTextures(1, &t->tex_id); + if (!t->is_external) { + glDeleteTextures(1, &t->tex_id); + } t->tex_id = 0; } @@ -653,7 +635,7 @@ void TextureStorage::texture_free(RID p_texture) { } } - //decal_atlas_remove_texture(p_texture); + texture_atlas_remove_texture(p_texture); for (int i = 0; i < t->proxies.size(); i++) { Texture *p = texture_owner.get_or_null(t->proxies[i]); @@ -673,7 +655,7 @@ void TextureStorage::texture_2d_initialize(RID p_texture, const Ref<Image> &p_im texture.height = p_image->get_height(); texture.alloc_width = texture.width; texture.alloc_height = texture.height; - texture.mipmaps = p_image->get_mipmap_count(); + texture.mipmaps = p_image->get_mipmap_count() + 1; texture.format = p_image->get_format(); texture.type = Texture::TYPE_2D; texture.target = GL_TEXTURE_2D; @@ -708,9 +690,37 @@ void TextureStorage::texture_proxy_initialize(RID p_texture, RID p_base) { texture_owner.initialize_rid(p_texture, proxy_tex); } +RID TextureStorage::texture_create_external(Texture::Type p_type, Image::Format p_format, unsigned int p_image, int p_width, int p_height, int p_depth, int p_layers, RS::TextureLayeredType p_layered_type) { + Texture texture; + texture.active = true; + texture.is_external = true; + texture.type = p_type; + + switch (p_type) { + case Texture::TYPE_2D: { + texture.target = GL_TEXTURE_2D; + } break; + case Texture::TYPE_3D: { + texture.target = GL_TEXTURE_3D; + } break; + case Texture::TYPE_LAYERED: { + texture.target = GL_TEXTURE_2D_ARRAY; + } break; + } + + texture.real_format = texture.format = p_format; + texture.tex_id = p_image; + texture.alloc_width = texture.width = p_width; + texture.alloc_height = texture.height = p_height; + texture.depth = p_depth; + texture.layers = p_layers; + texture.layered_type = p_layered_type; + + return texture_owner.make_rid(texture); +} + void TextureStorage::texture_2d_update(RID p_texture, const Ref<Image> &p_image, int p_layer) { - // only 1 layer so far - texture_set_data(p_texture, p_image); + texture_set_data(p_texture, p_image, p_layer); #ifdef TOOLS_ENABLED Texture *tex = texture_owner.get_or_null(p_texture); @@ -719,14 +729,32 @@ void TextureStorage::texture_2d_update(RID p_texture, const Ref<Image> &p_image, } void TextureStorage::texture_proxy_update(RID p_texture, RID p_proxy_to) { + Texture *tex = texture_owner.get_or_null(p_texture); + ERR_FAIL_COND(!tex); + ERR_FAIL_COND(!tex->is_proxy); + Texture *proxy_to = texture_owner.get_or_null(p_proxy_to); + ERR_FAIL_COND(!proxy_to); + ERR_FAIL_COND(proxy_to->is_proxy); + + if (tex->proxy_to.is_valid()) { + Texture *prev_tex = texture_owner.get_or_null(tex->proxy_to); + ERR_FAIL_COND(!prev_tex); + prev_tex->proxies.erase(p_texture); + } + + *tex = *proxy_to; + + tex->proxy_to = p_proxy_to; + tex->is_render_target = false; + tex->is_proxy = true; + tex->proxies.clear(); + proxy_to->proxies.push_back(p_texture); } void TextureStorage::texture_2d_placeholder_initialize(RID p_texture) { //this could be better optimized to reuse an existing image , done this way //for now to get it working - Ref<Image> image; - image.instantiate(); - image->create(4, 4, false, Image::FORMAT_RGBA8); + Ref<Image> image = Image::create_empty(4, 4, false, Image::FORMAT_RGBA8); image->fill(Color(1, 0, 1, 1)); texture_2d_initialize(p_texture, image); @@ -735,9 +763,7 @@ void TextureStorage::texture_2d_placeholder_initialize(RID p_texture) { void TextureStorage::texture_2d_layered_placeholder_initialize(RID p_texture, RenderingServer::TextureLayeredType p_layered_type) { //this could be better optimized to reuse an existing image , done this way //for now to get it working - Ref<Image> image; - image.instantiate(); - image->create(4, 4, false, Image::FORMAT_RGBA8); + Ref<Image> image = Image::create_empty(4, 4, false, Image::FORMAT_RGBA8); image->fill(Color(1, 0, 1, 1)); Vector<Ref<Image>> images; @@ -756,9 +782,7 @@ void TextureStorage::texture_2d_layered_placeholder_initialize(RID p_texture, Re void TextureStorage::texture_3d_placeholder_initialize(RID p_texture) { //this could be better optimized to reuse an existing image , done this way //for now to get it working - Ref<Image> image; - image.instantiate(); - image->create(4, 4, false, Image::FORMAT_RGBA8); + Ref<Image> image = Image::create_empty(4, 4, false, Image::FORMAT_RGBA8); image->fill(Color(1, 0, 1, 1)); Vector<Ref<Image>> images; @@ -782,6 +806,7 @@ Ref<Image> TextureStorage::texture_2d_get(RID p_texture) const { #ifdef GLES_OVER_GL // OpenGL 3.3 supports glGetTexImage which is faster and simpler than glReadPixels. + // It also allows for reading compressed textures, mipmaps, and more formats. Vector<uint8_t> data; int data_size = Image::get_image_data_size(texture->alloc_width, texture->alloc_height, texture->real_format, texture->mipmaps > 1); @@ -812,16 +837,71 @@ Ref<Image> TextureStorage::texture_2d_get(RID p_texture) const { data.resize(data_size); ERR_FAIL_COND_V(data.size() == 0, Ref<Image>()); - Ref<Image> image; - image.instantiate(); - image->create(texture->width, texture->height, texture->mipmaps > 1, texture->real_format, data); + Ref<Image> image = Image::create_from_data(texture->width, texture->height, texture->mipmaps > 1, texture->real_format, data); ERR_FAIL_COND_V(image->is_empty(), Ref<Image>()); if (texture->format != texture->real_format) { image->convert(texture->format); } #else - // Support for Web and Mobile will come later. - Ref<Image> image; + + Vector<uint8_t> data; + + // On web and mobile we always read an RGBA8 image with no mipmaps. + int data_size = Image::get_image_data_size(texture->alloc_width, texture->alloc_height, Image::FORMAT_RGBA8, false); + + data.resize(data_size * 2); //add some memory at the end, just in case for buggy drivers + uint8_t *w = data.ptrw(); + + GLuint temp_framebuffer; + glGenFramebuffers(1, &temp_framebuffer); + + GLuint temp_color_texture; + glGenTextures(1, &temp_color_texture); + + glBindFramebuffer(GL_FRAMEBUFFER, temp_framebuffer); + + glBindTexture(GL_TEXTURE_2D, temp_color_texture); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texture->alloc_width, texture->alloc_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, temp_color_texture, 0); + + glDepthMask(GL_FALSE); + glDisable(GL_DEPTH_TEST); + glDisable(GL_CULL_FACE); + glDisable(GL_BLEND); + glDepthFunc(GL_LEQUAL); + glColorMask(1, 1, 1, 1); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, texture->tex_id); + + glViewport(0, 0, texture->alloc_width, texture->alloc_height); + glClearColor(0.0, 0.0, 0.0, 0.0); + glClear(GL_COLOR_BUFFER_BIT); + + CopyEffects::get_singleton()->copy_to_rect(Rect2i(0, 0, 1.0, 1.0)); + + glReadPixels(0, 0, texture->alloc_width, texture->alloc_height, GL_RGBA, GL_UNSIGNED_BYTE, &w[0]); + + glBindFramebuffer(GL_FRAMEBUFFER, 0); + glDeleteTextures(1, &temp_color_texture); + glDeleteFramebuffers(1, &temp_framebuffer); + + data.resize(data_size); + + ERR_FAIL_COND_V(data.size() == 0, Ref<Image>()); + Ref<Image> image = Image::create_from_data(texture->width, texture->height, false, Image::FORMAT_RGBA8, data); + ERR_FAIL_COND_V(image->is_empty(), Ref<Image>()); + + if (texture->format != Image::FORMAT_RGBA8) { + image->convert(texture->format); + } + + if (texture->mipmaps > 1) { + image->generate_mipmaps(); + } + #endif #ifdef TOOLS_ENABLED @@ -875,7 +955,7 @@ void TextureStorage::texture_replace(RID p_texture, RID p_by_texture) { //delete last, so proxies can be updated texture_owner.free(p_by_texture); - //decal_atlas_mark_dirty_on_texture(p_texture); + texture_atlas_mark_dirty_on_texture(p_texture); } void TextureStorage::texture_set_size_override(RID p_texture, int p_width, int p_height) { @@ -970,6 +1050,10 @@ Size2 TextureStorage::texture_size_with_proxy(RID p_texture) { } } +RID TextureStorage::texture_get_rd_texture_rid(RID p_texture, bool p_srgb) const { + return RID(); +} + void TextureStorage::texture_set_data(RID p_texture, const Ref<Image> &p_image, int p_layer) { Texture *texture = texture_owner.get_or_null(p_texture); @@ -1009,7 +1093,7 @@ void TextureStorage::texture_set_data(RID p_texture, const Ref<Image> &p_image, img->resize_to_po2(false); } - GLenum blit_target = (texture->target == GL_TEXTURE_CUBE_MAP) ? _cube_side_enum[p_layer] : GL_TEXTURE_2D; + GLenum blit_target = (texture->target == GL_TEXTURE_CUBE_MAP) ? _cube_side_enum[p_layer] : texture->target; Vector<uint8_t> read = img->get_data(); @@ -1066,7 +1150,11 @@ void TextureStorage::texture_set_data(RID p_texture, const Ref<Image> &p_image, glCompressedTexImage2D(blit_target, i, internal_format, bw, bh, 0, size, &read[ofs]); } else { glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - glTexImage2D(blit_target, i, internal_format, w, h, 0, format, type, &read[ofs]); + if (texture->target == GL_TEXTURE_2D_ARRAY) { + glTexSubImage3D(GL_TEXTURE_2D_ARRAY, i, 0, 0, p_layer, w, h, 0, format, type, &read[ofs]); + } else { + glTexImage2D(blit_target, i, internal_format, w, h, 0, format, type, &read[ofs]); + } } tsize += size; @@ -1143,6 +1231,217 @@ RID TextureStorage::texture_create_radiance_cubemap(RID p_source, int p_resoluti return RID(); } +/* TEXTURE ATLAS API */ + +void TextureStorage::texture_add_to_texture_atlas(RID p_texture) { + if (!texture_atlas.textures.has(p_texture)) { + TextureAtlas::Texture t; + t.users = 1; + texture_atlas.textures[p_texture] = t; + texture_atlas.dirty = true; + } else { + TextureAtlas::Texture *t = texture_atlas.textures.getptr(p_texture); + t->users++; + } +} + +void TextureStorage::texture_remove_from_texture_atlas(RID p_texture) { + TextureAtlas::Texture *t = texture_atlas.textures.getptr(p_texture); + ERR_FAIL_COND(!t); + t->users--; + if (t->users == 0) { + texture_atlas.textures.erase(p_texture); + // Do not mark it dirty, there is no need to since it remains working. + } +} + +void TextureStorage::texture_atlas_mark_dirty_on_texture(RID p_texture) { + if (texture_atlas.textures.has(p_texture)) { + texture_atlas.dirty = true; // Mark it dirty since it was most likely modified. + } +} + +void TextureStorage::texture_atlas_remove_texture(RID p_texture) { + if (texture_atlas.textures.has(p_texture)) { + texture_atlas.textures.erase(p_texture); + // There is not much a point of making it dirty, texture can be removed next time the atlas is updated. + } +} + +GLuint TextureStorage::texture_atlas_get_texture() const { + return texture_atlas.texture; +} + +void TextureStorage::update_texture_atlas() { + CopyEffects *copy_effects = CopyEffects::get_singleton(); + ERR_FAIL_NULL(copy_effects); + + if (!texture_atlas.dirty) { + return; //nothing to do + } + + texture_atlas.dirty = false; + + if (texture_atlas.texture != 0) { + glDeleteTextures(1, &texture_atlas.texture); + texture_atlas.texture = 0; + glDeleteFramebuffers(1, &texture_atlas.framebuffer); + texture_atlas.framebuffer = 0; + } + + const int border = 2; + + if (texture_atlas.textures.size()) { + //generate atlas + Vector<TextureAtlas::SortItem> itemsv; + itemsv.resize(texture_atlas.textures.size()); + int base_size = 8; + + int idx = 0; + + for (const KeyValue<RID, TextureAtlas::Texture> &E : texture_atlas.textures) { + TextureAtlas::SortItem &si = itemsv.write[idx]; + + Texture *src_tex = get_texture(E.key); + + si.size.width = (src_tex->width / border) + 1; + si.size.height = (src_tex->height / border) + 1; + si.pixel_size = Size2i(src_tex->width, src_tex->height); + + if (base_size < si.size.width) { + base_size = nearest_power_of_2_templated(si.size.width); + } + + si.texture = E.key; + idx++; + } + + //sort items by size + itemsv.sort(); + + //attempt to create atlas + int item_count = itemsv.size(); + TextureAtlas::SortItem *items = itemsv.ptrw(); + + int atlas_height = 0; + + while (true) { + Vector<int> v_offsetsv; + v_offsetsv.resize(base_size); + + int *v_offsets = v_offsetsv.ptrw(); + memset(v_offsets, 0, sizeof(int) * base_size); + + int max_height = 0; + + for (int i = 0; i < item_count; i++) { + //best fit + TextureAtlas::SortItem &si = items[i]; + int best_idx = -1; + int best_height = 0x7FFFFFFF; + for (int j = 0; j <= base_size - si.size.width; j++) { + int height = 0; + for (int k = 0; k < si.size.width; k++) { + int h = v_offsets[k + j]; + if (h > height) { + height = h; + if (height > best_height) { + break; //already bad + } + } + } + + if (height < best_height) { + best_height = height; + best_idx = j; + } + } + + //update + for (int k = 0; k < si.size.width; k++) { + v_offsets[k + best_idx] = best_height + si.size.height; + } + + si.pos.x = best_idx; + si.pos.y = best_height; + + if (si.pos.y + si.size.height > max_height) { + max_height = si.pos.y + si.size.height; + } + } + + if (max_height <= base_size * 2) { + atlas_height = max_height; + break; //good ratio, break; + } + + base_size *= 2; + } + + texture_atlas.size.width = base_size * border; + texture_atlas.size.height = nearest_power_of_2_templated(atlas_height * border); + + for (int i = 0; i < item_count; i++) { + TextureAtlas::Texture *t = texture_atlas.textures.getptr(items[i].texture); + t->uv_rect.position = items[i].pos * border + Vector2i(border / 2, border / 2); + t->uv_rect.size = items[i].pixel_size; + + t->uv_rect.position /= Size2(texture_atlas.size); + t->uv_rect.size /= Size2(texture_atlas.size); + } + } else { + texture_atlas.size.width = 4; + texture_atlas.size.height = 4; + } + + { // Atlas Texture initialize. + // TODO validate texture atlas size with maximum texture size + glGenTextures(1, &texture_atlas.texture); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, texture_atlas.texture); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, texture_atlas.size.width, texture_atlas.size.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); + + glGenFramebuffers(1, &texture_atlas.framebuffer); + glBindFramebuffer(GL_FRAMEBUFFER, texture_atlas.framebuffer); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture_atlas.texture, 0); + + GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); + + if (status != GL_FRAMEBUFFER_COMPLETE) { + glDeleteFramebuffers(1, &texture_atlas.framebuffer); + texture_atlas.framebuffer = 0; + glDeleteTextures(1, &texture_atlas.texture); + texture_atlas.texture = 0; + WARN_PRINT("Could not create texture atlas, status: " + get_framebuffer_error(status)); + return; + } + glViewport(0, 0, texture_atlas.size.width, texture_atlas.size.height); + glClearColor(0.0, 0.0, 0.0, 0.0); + glClear(GL_COLOR_BUFFER_BIT); + glBindTexture(GL_TEXTURE_2D, 0); + } + + glDisable(GL_BLEND); + + if (texture_atlas.textures.size()) { + for (const KeyValue<RID, TextureAtlas::Texture> &E : texture_atlas.textures) { + TextureAtlas::Texture *t = texture_atlas.textures.getptr(E.key); + Texture *src_tex = get_texture(E.key); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, src_tex->tex_id); + copy_effects->copy_to_rect(t->uv_rect); + } + } + glBindFramebuffer(GL_FRAMEBUFFER, 0); +} + /* DECAL API */ RID TextureStorage::decal_allocate() { @@ -1211,6 +1510,8 @@ void TextureStorage::_update_render_target(RenderTarget *rt) { return; } + Config *config = Config::get_singleton(); + rt->color_internal_format = rt->is_transparent ? GL_RGBA8 : GL_RGB10_A2; rt->color_format = GL_RGBA; rt->color_type = rt->is_transparent ? GL_UNSIGNED_BYTE : GL_UNSIGNED_INT_2_10_10_10_REV; @@ -1221,31 +1522,74 @@ void TextureStorage::_update_render_target(RenderTarget *rt) { glDepthMask(GL_FALSE); { - /* Front FBO */ + Texture *texture; + bool use_multiview = rt->view_count > 1 && config->multiview_supported; + GLenum texture_target = use_multiview ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D; - Texture *texture = get_texture(rt->texture); - ERR_FAIL_COND(!texture); + /* Front FBO */ - // framebuffer glGenFramebuffers(1, &rt->fbo); glBindFramebuffer(GL_FRAMEBUFFER, rt->fbo); // color - glGenTextures(1, &rt->color); - glBindTexture(GL_TEXTURE_2D, rt->color); + if (rt->overridden.color.is_valid()) { + texture = get_texture(rt->overridden.color); + ERR_FAIL_COND(!texture); - glTexImage2D(GL_TEXTURE_2D, 0, rt->color_internal_format, rt->size.x, rt->size.y, 0, rt->color_format, rt->color_type, nullptr); + rt->color = texture->tex_id; + rt->size = Size2i(texture->width, texture->height); + } else { + texture = get_texture(rt->texture); + ERR_FAIL_COND(!texture); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glGenTextures(1, &rt->color); + glBindTexture(texture_target, rt->color); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + if (use_multiview) { + glTexImage3D(texture_target, 0, rt->color_internal_format, rt->size.x, rt->size.y, rt->view_count, 0, rt->color_format, rt->color_type, nullptr); + } else { + glTexImage2D(texture_target, 0, rt->color_internal_format, rt->size.x, rt->size.y, 0, rt->color_format, rt->color_type, nullptr); + } + + glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } + if (use_multiview) { + glFramebufferTextureMultiviewOVR(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, rt->color, 0, 0, rt->view_count); + } else { + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rt->color, 0); + } - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rt->color, 0); + // depth + if (rt->overridden.depth.is_valid()) { + texture = get_texture(rt->overridden.depth); + ERR_FAIL_COND(!texture); - GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); + rt->depth = texture->tex_id; + } else { + glGenTextures(1, &rt->depth); + glBindTexture(texture_target, rt->depth); + + if (use_multiview) { + glTexImage3D(texture_target, 0, GL_DEPTH_COMPONENT24, rt->size.x, rt->size.y, rt->view_count, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, nullptr); + } else { + glTexImage2D(texture_target, 0, GL_DEPTH_COMPONENT24, rt->size.x, rt->size.y, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, nullptr); + } + + glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } + if (use_multiview) { + glFramebufferTextureMultiviewOVR(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, rt->depth, 0, 0, rt->view_count); + } else { + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, rt->depth, 0); + } + GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); if (status != GL_FRAMEBUFFER_COMPLETE) { glDeleteFramebuffers(1, &rt->fbo); glDeleteTextures(1, &rt->color); @@ -1253,25 +1597,38 @@ void TextureStorage::_update_render_target(RenderTarget *rt) { rt->size.x = 0; rt->size.y = 0; rt->color = 0; - texture->tex_id = 0; - texture->active = false; + rt->depth = 0; + if (rt->overridden.color.is_null()) { + texture->tex_id = 0; + texture->active = false; + } WARN_PRINT("Could not create render target, status: " + get_framebuffer_error(status)); return; } - texture->format = rt->image_format; - texture->real_format = rt->image_format; - texture->type = Texture::TYPE_2D; - texture->target = GL_TEXTURE_2D; - texture->gl_format_cache = rt->color_format; - texture->gl_type_cache = GL_UNSIGNED_BYTE; - texture->gl_internal_format_cache = rt->color_internal_format; - texture->tex_id = rt->color; - texture->width = rt->size.x; - texture->alloc_width = rt->size.x; - texture->height = rt->size.y; - texture->alloc_height = rt->size.y; - texture->active = true; + if (rt->overridden.color.is_valid()) { + texture->is_render_target = true; + } else { + texture->format = rt->image_format; + texture->real_format = rt->image_format; + texture->target = texture_target; + if (rt->view_count > 1 && config->multiview_supported) { + texture->type = Texture::TYPE_LAYERED; + texture->layers = rt->view_count; + } else { + texture->type = Texture::TYPE_2D; + texture->layers = 1; + } + texture->gl_format_cache = rt->color_format; + texture->gl_type_cache = GL_UNSIGNED_BYTE; + texture->gl_internal_format_cache = rt->color_internal_format; + texture->tex_id = rt->color; + texture->width = rt->size.x; + texture->alloc_width = rt->size.x; + texture->height = rt->size.y; + texture->alloc_height = rt->size.y; + texture->active = true; + } } glClearColor(0, 0, 0, 0); @@ -1337,19 +1694,51 @@ void TextureStorage::_clear_render_target(RenderTarget *rt) { return; } + // Dispose of the cached fbo's and the allocated textures + for (KeyValue<uint32_t, RenderTarget::RTOverridden::FBOCacheEntry> &E : rt->overridden.fbo_cache) { + glDeleteTextures(E.value.allocated_textures.size(), E.value.allocated_textures.ptr()); + // Don't delete the current FBO, we'll do that a couple lines down. + if (E.value.fbo != rt->fbo) { + glDeleteFramebuffers(1, &E.value.fbo); + } + } + rt->overridden.fbo_cache.clear(); + if (rt->fbo) { glDeleteFramebuffers(1, &rt->fbo); - glDeleteTextures(1, &rt->color); rt->fbo = 0; - rt->color = 0; } - Texture *tex = get_texture(rt->texture); - tex->alloc_height = 0; - tex->alloc_width = 0; - tex->width = 0; - tex->height = 0; - tex->active = false; + if (rt->overridden.color.is_null()) { + if (rt->texture.is_valid()) { + Texture *tex = get_texture(rt->texture); + tex->alloc_height = 0; + tex->alloc_width = 0; + tex->width = 0; + tex->height = 0; + tex->active = false; + } + } else { + Texture *tex = get_texture(rt->overridden.color); + tex->is_render_target = false; + } + + if (rt->overridden.color.is_valid()) { + rt->overridden.color = RID(); + } else if (rt->color) { + glDeleteTextures(1, &rt->color); + } + rt->color = 0; + + if (rt->overridden.depth.is_valid()) { + rt->overridden.depth = RID(); + } else if (rt->depth) { + glDeleteTextures(1, &rt->depth); + } + rt->depth = 0; + + rt->overridden.velocity = RID(); + rt->overridden.is_overridden = false; if (rt->backbuffer_fbo != 0) { glDeleteFramebuffers(1, &rt->backbuffer_fbo); @@ -1357,6 +1746,7 @@ void TextureStorage::_clear_render_target(RenderTarget *rt) { rt->backbuffer = 0; rt->backbuffer_fbo = 0; } + _render_target_clear_sdf(rt); } RID TextureStorage::render_target_create() { @@ -1381,7 +1771,9 @@ void TextureStorage::render_target_free(RID p_rid) { Texture *t = get_texture(rt->texture); if (t) { t->is_render_target = false; - texture_free(rt->texture); + if (rt->overridden.color.is_null()) { + texture_free(rt->texture); + } //memdelete(t); } render_target_owner.free(p_rid); @@ -1405,13 +1797,17 @@ void TextureStorage::render_target_set_size(RID p_render_target, int p_width, in RenderTarget *rt = render_target_owner.get_or_null(p_render_target); ERR_FAIL_COND(!rt); - if (p_width == rt->size.x && p_height == rt->size.y) { + if (p_width == rt->size.x && p_height == rt->size.y && p_view_count == rt->view_count) { + return; + } + if (rt->overridden.color.is_valid()) { return; } _clear_render_target(rt); rt->size = Size2i(p_width, p_height); + rt->view_count = p_view_count; _update_render_target(rt); } @@ -1424,10 +1820,91 @@ Size2i TextureStorage::render_target_get_size(RID p_render_target) const { return rt->size; } +void TextureStorage::render_target_set_override(RID p_render_target, RID p_color_texture, RID p_depth_texture, RID p_velocity_texture) { + RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_COND(!rt); + ERR_FAIL_COND(rt->direct_to_screen); + + rt->overridden.velocity = p_velocity_texture; + + if (rt->overridden.color == p_color_texture && rt->overridden.depth == p_depth_texture) { + return; + } + + if (p_color_texture.is_null() && p_depth_texture.is_null()) { + _clear_render_target(rt); + _update_render_target(rt); + return; + } + + if (!rt->overridden.is_overridden) { + _clear_render_target(rt); + } + + rt->overridden.color = p_color_texture; + rt->overridden.depth = p_depth_texture; + rt->overridden.is_overridden = true; + + uint32_t hash_key = hash_murmur3_one_64(p_color_texture.get_id()); + hash_key = hash_murmur3_one_64(p_depth_texture.get_id(), hash_key); + hash_key = hash_fmix32(hash_key); + + RBMap<uint32_t, RenderTarget::RTOverridden::FBOCacheEntry>::Element *cache; + if ((cache = rt->overridden.fbo_cache.find(hash_key)) != nullptr) { + rt->fbo = cache->get().fbo; + rt->color = cache->get().color; + rt->depth = cache->get().depth; + rt->size = cache->get().size; + rt->texture = p_color_texture; + return; + } + + _update_render_target(rt); + + RenderTarget::RTOverridden::FBOCacheEntry new_entry; + new_entry.fbo = rt->fbo; + new_entry.color = rt->color; + new_entry.depth = rt->depth; + new_entry.size = rt->size; + // Keep track of any textures we had to allocate because they weren't overridden. + if (p_color_texture.is_null()) { + new_entry.allocated_textures.push_back(rt->color); + } + if (p_depth_texture.is_null()) { + new_entry.allocated_textures.push_back(rt->depth); + } + rt->overridden.fbo_cache.insert(hash_key, new_entry); +} + +RID TextureStorage::render_target_get_override_color(RID p_render_target) const { + RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_COND_V(!rt, RID()); + + return rt->overridden.color; +} + +RID TextureStorage::render_target_get_override_depth(RID p_render_target) const { + RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_COND_V(!rt, RID()); + + return rt->overridden.depth; +} + +RID TextureStorage::render_target_get_override_velocity(RID p_render_target) const { + RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_COND_V(!rt, RID()); + + return rt->overridden.velocity; +} + RID TextureStorage::render_target_get_texture(RID p_render_target) { RenderTarget *rt = render_target_owner.get_or_null(p_render_target); ERR_FAIL_COND_V(!rt, RID()); + if (rt->overridden.color.is_valid()) { + return rt->overridden.color; + } + return rt->texture; } @@ -1437,8 +1914,10 @@ void TextureStorage::render_target_set_transparent(RID p_render_target, bool p_t rt->is_transparent = p_transparent; - _clear_render_target(rt); - _update_render_target(rt); + if (rt->overridden.color.is_null()) { + _clear_render_target(rt); + _update_render_target(rt); + } } bool TextureStorage::render_target_get_transparent(RID p_render_target) const { @@ -1459,6 +1938,11 @@ void TextureStorage::render_target_set_direct_to_screen(RID p_render_target, boo // those functions change how they operate depending on the value of DIRECT_TO_SCREEN _clear_render_target(rt); rt->direct_to_screen = p_direct_to_screen; + if (rt->direct_to_screen) { + rt->overridden.color = RID(); + rt->overridden.depth = RID(); + rt->overridden.velocity = RID(); + } _update_render_target(rt); } @@ -1491,6 +1975,7 @@ void TextureStorage::render_target_set_msaa(RID p_render_target, RS::ViewportMSA } WARN_PRINT("2D MSAA is not yet supported for GLES3."); + _clear_render_target(rt); rt->msaa = p_msaa; _update_render_target(rt); @@ -1541,13 +2026,283 @@ void TextureStorage::render_target_do_clear_request(RID p_render_target) { } void TextureStorage::render_target_set_sdf_size_and_scale(RID p_render_target, RS::ViewportSDFOversize p_size, RS::ViewportSDFScale p_scale) { + RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_COND(!rt); + if (rt->sdf_oversize == p_size && rt->sdf_scale == p_scale) { + return; + } + + rt->sdf_oversize = p_size; + rt->sdf_scale = p_scale; + + _render_target_clear_sdf(rt); +} + +Rect2i TextureStorage::_render_target_get_sdf_rect(const RenderTarget *rt) const { + Size2i margin; + int scale; + switch (rt->sdf_oversize) { + case RS::VIEWPORT_SDF_OVERSIZE_100_PERCENT: { + scale = 100; + } break; + case RS::VIEWPORT_SDF_OVERSIZE_120_PERCENT: { + scale = 120; + } break; + case RS::VIEWPORT_SDF_OVERSIZE_150_PERCENT: { + scale = 150; + } break; + case RS::VIEWPORT_SDF_OVERSIZE_200_PERCENT: { + scale = 200; + } break; + default: { + } + } + + margin = (rt->size * scale / 100) - rt->size; + + Rect2i r(Vector2i(), rt->size); + r.position -= margin; + r.size += margin * 2; + + return r; } Rect2i TextureStorage::render_target_get_sdf_rect(RID p_render_target) const { - return Rect2i(); + const RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_COND_V(!rt, Rect2i()); + + return _render_target_get_sdf_rect(rt); } void TextureStorage::render_target_mark_sdf_enabled(RID p_render_target, bool p_enabled) { + RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_COND(!rt); + + rt->sdf_enabled = p_enabled; +} + +bool TextureStorage::render_target_is_sdf_enabled(RID p_render_target) const { + const RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_COND_V(!rt, false); + + return rt->sdf_enabled; +} + +GLuint TextureStorage::render_target_get_sdf_texture(RID p_render_target) { + RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_COND_V(!rt, 0); + if (rt->sdf_texture_read == 0) { + Texture *texture = texture_owner.get_or_null(default_gl_textures[DEFAULT_GL_TEXTURE_BLACK]); + return texture->tex_id; + } + + return rt->sdf_texture_read; +} + +void TextureStorage::_render_target_allocate_sdf(RenderTarget *rt) { + ERR_FAIL_COND(rt->sdf_texture_write_fb != 0); + + Size2i size = _render_target_get_sdf_rect(rt).size; + + glGenTextures(1, &rt->sdf_texture_write); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, rt->sdf_texture_write); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, size.width, size.height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + glGenFramebuffers(1, &rt->sdf_texture_write_fb); + glBindFramebuffer(GL_FRAMEBUFFER, rt->sdf_texture_write_fb); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rt->sdf_texture_write, 0); + + int scale; + switch (rt->sdf_scale) { + case RS::VIEWPORT_SDF_SCALE_100_PERCENT: { + scale = 100; + } break; + case RS::VIEWPORT_SDF_SCALE_50_PERCENT: { + scale = 50; + } break; + case RS::VIEWPORT_SDF_SCALE_25_PERCENT: { + scale = 25; + } break; + default: { + scale = 100; + } break; + } + + rt->process_size = size * scale / 100; + rt->process_size.x = MAX(rt->process_size.x, 1); + rt->process_size.y = MAX(rt->process_size.y, 1); + + glGenTextures(2, rt->sdf_texture_process); + glBindTexture(GL_TEXTURE_2D, rt->sdf_texture_process[0]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RG16I, rt->process_size.width, rt->process_size.height, 0, GL_RG_INTEGER, GL_SHORT, nullptr); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + glBindTexture(GL_TEXTURE_2D, rt->sdf_texture_process[1]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RG16I, rt->process_size.width, rt->process_size.height, 0, GL_RG_INTEGER, GL_SHORT, nullptr); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + glGenTextures(1, &rt->sdf_texture_read); + glBindTexture(GL_TEXTURE_2D, rt->sdf_texture_read); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, rt->process_size.width, rt->process_size.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); +} + +void TextureStorage::_render_target_clear_sdf(RenderTarget *rt) { + if (rt->sdf_texture_write_fb != 0) { + glDeleteTextures(1, &rt->sdf_texture_read); + glDeleteTextures(1, &rt->sdf_texture_write); + glDeleteTextures(2, rt->sdf_texture_process); + glDeleteFramebuffers(1, &rt->sdf_texture_write_fb); + rt->sdf_texture_read = 0; + rt->sdf_texture_write = 0; + rt->sdf_texture_process[0] = 0; + rt->sdf_texture_process[1] = 0; + rt->sdf_texture_write_fb = 0; + } +} + +GLuint TextureStorage::render_target_get_sdf_framebuffer(RID p_render_target) { + RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_COND_V(!rt, 0); + + if (rt->sdf_texture_write_fb == 0) { + _render_target_allocate_sdf(rt); + } + + return rt->sdf_texture_write_fb; +} +void TextureStorage::render_target_sdf_process(RID p_render_target) { + CopyEffects *copy_effects = CopyEffects::get_singleton(); + + RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_COND(!rt); + ERR_FAIL_COND(rt->sdf_texture_write_fb == 0); + + Rect2i r = _render_target_get_sdf_rect(rt); + + Size2i size = r.size; + int32_t shift = 0; + + bool shrink = false; + + switch (rt->sdf_scale) { + case RS::VIEWPORT_SDF_SCALE_50_PERCENT: { + size[0] >>= 1; + size[1] >>= 1; + shift = 1; + shrink = true; + } break; + case RS::VIEWPORT_SDF_SCALE_25_PERCENT: { + size[0] >>= 2; + size[1] >>= 2; + shift = 2; + shrink = true; + } break; + default: { + }; + } + + GLuint temp_fb; + glGenFramebuffers(1, &temp_fb); + glBindFramebuffer(GL_FRAMEBUFFER, temp_fb); + + // Load + CanvasSdfShaderGLES3::ShaderVariant variant = shrink ? CanvasSdfShaderGLES3::MODE_LOAD_SHRINK : CanvasSdfShaderGLES3::MODE_LOAD; + bool success = sdf_shader.shader.version_bind_shader(sdf_shader.shader_version, variant); + if (!success) { + return; + } + + sdf_shader.shader.version_set_uniform(CanvasSdfShaderGLES3::BASE_SIZE, r.size, sdf_shader.shader_version, variant); + sdf_shader.shader.version_set_uniform(CanvasSdfShaderGLES3::SIZE, size, sdf_shader.shader_version, variant); + sdf_shader.shader.version_set_uniform(CanvasSdfShaderGLES3::STRIDE, 0, sdf_shader.shader_version, variant); + sdf_shader.shader.version_set_uniform(CanvasSdfShaderGLES3::SHIFT, shift, sdf_shader.shader_version, variant); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, rt->sdf_texture_write); + + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rt->sdf_texture_process[0], 0); + glViewport(0, 0, size.width, size.height); + glEnable(GL_SCISSOR_TEST); + glScissor(0, 0, size.width, size.height); + + copy_effects->draw_screen_triangle(); + + // Process + + int stride = nearest_power_of_2_templated(MAX(size.width, size.height) / 2); + + variant = CanvasSdfShaderGLES3::MODE_PROCESS; + success = sdf_shader.shader.version_bind_shader(sdf_shader.shader_version, variant); + if (!success) { + return; + } + + sdf_shader.shader.version_set_uniform(CanvasSdfShaderGLES3::BASE_SIZE, r.size, sdf_shader.shader_version, variant); + sdf_shader.shader.version_set_uniform(CanvasSdfShaderGLES3::SIZE, size, sdf_shader.shader_version, variant); + sdf_shader.shader.version_set_uniform(CanvasSdfShaderGLES3::STRIDE, stride, sdf_shader.shader_version, variant); + sdf_shader.shader.version_set_uniform(CanvasSdfShaderGLES3::SHIFT, shift, sdf_shader.shader_version, variant); + + bool swap = false; + + //jumpflood + while (stride > 0) { + glBindTexture(GL_TEXTURE_2D, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rt->sdf_texture_process[swap ? 0 : 1], 0); + glBindTexture(GL_TEXTURE_2D, rt->sdf_texture_process[swap ? 1 : 0]); + + sdf_shader.shader.version_set_uniform(CanvasSdfShaderGLES3::STRIDE, stride, sdf_shader.shader_version, variant); + + copy_effects->draw_screen_triangle(); + + stride /= 2; + swap = !swap; + } + + // Store + variant = shrink ? CanvasSdfShaderGLES3::MODE_STORE_SHRINK : CanvasSdfShaderGLES3::MODE_STORE; + success = sdf_shader.shader.version_bind_shader(sdf_shader.shader_version, variant); + if (!success) { + return; + } + + sdf_shader.shader.version_set_uniform(CanvasSdfShaderGLES3::BASE_SIZE, r.size, sdf_shader.shader_version, variant); + sdf_shader.shader.version_set_uniform(CanvasSdfShaderGLES3::SIZE, size, sdf_shader.shader_version, variant); + sdf_shader.shader.version_set_uniform(CanvasSdfShaderGLES3::STRIDE, stride, sdf_shader.shader_version, variant); + sdf_shader.shader.version_set_uniform(CanvasSdfShaderGLES3::SHIFT, shift, sdf_shader.shader_version, variant); + + glBindTexture(GL_TEXTURE_2D, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rt->sdf_texture_read, 0); + glBindTexture(GL_TEXTURE_2D, rt->sdf_texture_process[swap ? 1 : 0]); + + copy_effects->draw_screen_triangle(); + + glBindTexture(GL_TEXTURE_2D, 0); + glBindFramebuffer(GL_FRAMEBUFFER, system_fbo); + glDeleteFramebuffers(1, &temp_fb); + glDisable(GL_SCISSOR_TEST); } void TextureStorage::render_target_copy_to_back_buffer(RID p_render_target, const Rect2i &p_region, bool p_gen_mipmaps) { diff --git a/drivers/gles3/storage/texture_storage.h b/drivers/gles3/storage/texture_storage.h index 7e083e48e8..169c50638d 100644 --- a/drivers/gles3/storage/texture_storage.h +++ b/drivers/gles3/storage/texture_storage.h @@ -39,6 +39,8 @@ #include "servers/rendering/renderer_compositor.h" #include "servers/rendering/storage/texture_storage.h" +#include "../shaders/canvas_sdf.glsl.gen.h" + // This must come first to avoid windows.h mess #include "platform_config.h" #ifndef OPENGL_INCLUDE_H @@ -84,18 +86,8 @@ namespace GLES3 { #define _GL_TEXTURE_EXTERNAL_OES 0x8D65 -#ifdef GLES_OVER_GL -#define _GL_HALF_FLOAT_OES 0x140B -#else -#define _GL_HALF_FLOAT_OES 0x8D61 -#endif - #define _EXT_TEXTURE_CUBE_MAP_SEAMLESS 0x884F -#define _RED_OES 0x1903 - -#define _DEPTH_COMPONENT24_OES 0x81A6 - #ifndef GLES_OVER_GL #define glClearDepth glClearDepthf #endif //!GLES_OVER_GL @@ -128,26 +120,16 @@ struct CanvasTexture { RS::CanvasItemTextureRepeat texture_repeat = RS::CANVAS_ITEM_TEXTURE_REPEAT_DEFAULT; }; -/* CANVAS SHADOW */ - -struct CanvasLightShadow { - RID self; - int size; - int height; - GLuint fbo; - GLuint depth; - GLuint distance; //for older devices -}; - struct RenderTarget; struct Texture { RID self; bool is_proxy = false; + bool is_external = false; bool is_render_target = false; - RID proxy_to = RID(); + RID proxy_to; Vector<RID> proxies; String path; @@ -206,6 +188,7 @@ struct Texture { void copy_from(const Texture &o) { proxy_to = o.proxy_to; is_proxy = o.is_proxy; + is_external = o.is_external; width = o.width; height = o.height; alloc_width = o.alloc_width; @@ -324,10 +307,12 @@ private: struct RenderTarget { Point2i position = Point2i(0, 0); Size2i size = Size2i(0, 0); + uint32_t view_count = 1; int mipmap_count = 1; RID self; GLuint fbo = 0; GLuint color = 0; + GLuint depth = 0; GLuint backbuffer_fbo = 0; GLuint backbuffer = 0; @@ -336,12 +321,37 @@ struct RenderTarget { GLuint color_type = GL_UNSIGNED_BYTE; Image::Format image_format = Image::FORMAT_RGBA8; + GLuint sdf_texture_write = 0; + GLuint sdf_texture_write_fb = 0; + GLuint sdf_texture_process[2] = { 0, 0 }; + GLuint sdf_texture_read = 0; + RS::ViewportSDFOversize sdf_oversize = RS::VIEWPORT_SDF_OVERSIZE_120_PERCENT; + RS::ViewportSDFScale sdf_scale = RS::VIEWPORT_SDF_SCALE_50_PERCENT; + Size2i process_size; + bool sdf_enabled = false; + bool is_transparent = false; bool direct_to_screen = false; bool used_in_frame = false; RS::ViewportMSAA msaa = RS::VIEWPORT_MSAA_DISABLED; + struct RTOverridden { + bool is_overridden = false; + RID color; + RID depth; + RID velocity; + + struct FBOCacheEntry { + GLuint fbo; + GLuint color; + GLuint depth; + Size2i size; + Vector<GLuint> allocated_textures; + }; + RBMap<uint32_t, FBOCacheEntry> fbo_cache; + } overridden; + RID texture; Color clear_color = Color(1, 1, 1, 1); @@ -361,16 +371,44 @@ private: RID_Owner<CanvasTexture, true> canvas_texture_owner; - /* CANVAS SHADOW */ - - RID_PtrOwner<CanvasLightShadow> canvas_light_shadow_owner; - /* Texture API */ mutable RID_Owner<Texture> texture_owner; Ref<Image> _get_gl_image_and_format(const Ref<Image> &p_image, Image::Format p_format, Image::Format &r_real_format, GLenum &r_gl_format, GLenum &r_gl_internal_format, GLenum &r_gl_type, bool &r_compressed, bool p_force_decompress) const; + /* TEXTURE ATLAS API */ + + struct TextureAtlas { + struct Texture { + int users; + Rect2 uv_rect; + }; + + struct SortItem { + RID texture; + Size2i pixel_size; + Size2i size; + Point2i pos; + + bool operator<(const SortItem &p_item) const { + //sort larger to smaller + if (size.height == p_item.size.height) { + return size.width > p_item.size.width; + } else { + return size.height > p_item.size.height; + } + } + }; + + HashMap<RID, Texture> textures; + bool dirty = true; + + GLuint texture = 0; + GLuint framebuffer = 0; + Size2i size; + } texture_atlas; + /* Render Target API */ mutable RID_Owner<RenderTarget> render_target_owner; @@ -378,6 +416,14 @@ private: void _clear_render_target(RenderTarget *rt); void _update_render_target(RenderTarget *rt); void _create_render_target_backbuffer(RenderTarget *rt); + void _render_target_allocate_sdf(RenderTarget *rt); + void _render_target_clear_sdf(RenderTarget *rt); + Rect2i _render_target_get_sdf_rect(const RenderTarget *rt) const; + + struct RenderTargetSDF { + CanvasSdfShaderGLES3 shader; + RID shader_version; + } sdf_shader; public: static TextureStorage *get_singleton(); @@ -404,10 +450,6 @@ public: virtual void canvas_texture_set_texture_filter(RID p_item, RS::CanvasItemTextureFilter p_filter) override; virtual void canvas_texture_set_texture_repeat(RID p_item, RS::CanvasItemTextureRepeat p_repeat) override; - /* CANVAS SHADOW */ - - RID canvas_light_shadow_buffer_create(int p_width); - /* Texture API */ Texture *get_texture(RID p_rid) { @@ -431,6 +473,8 @@ public: virtual void texture_3d_initialize(RID p_texture, Image::Format, int p_width, int p_height, int p_depth, bool p_mipmaps, const Vector<Ref<Image>> &p_data) override; virtual void texture_proxy_initialize(RID p_texture, RID p_base) override; //all slices, then all the mipmaps, must be coherent + RID texture_create_external(Texture::Type p_type, Image::Format p_format, unsigned int p_image, int p_width, int p_height, int p_depth, int p_layers, RS::TextureLayeredType p_layered_type = RS::TEXTURE_LAYERED_2D_ARRAY); + virtual void texture_2d_update(RID p_texture, const Ref<Image> &p_image, int p_layer = 0) override; virtual void texture_3d_update(RID p_texture, const Vector<Ref<Image>> &p_data) override{}; virtual void texture_proxy_update(RID p_proxy, RID p_base) override; @@ -461,6 +505,8 @@ public: virtual Size2 texture_size_with_proxy(RID p_proxy) override; + virtual RID texture_get_rd_texture_rid(RID p_texture, bool p_srgb = false) const override; + void texture_set_data(RID p_texture, const Ref<Image> &p_image, int p_layer = 0); void texture_set_data_partial(RID p_texture, const Ref<Image> &p_image, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y, int p_dst_mip, int p_layer = 0); //Ref<Image> texture_get_data(RID p_texture, int p_layer = 0) const; @@ -473,6 +519,25 @@ public: void texture_bind(RID p_texture, uint32_t p_texture_no); RID texture_create_radiance_cubemap(RID p_source, int p_resolution = -1) const; + /* TEXTURE ATLAS API */ + + void update_texture_atlas(); + + GLuint texture_atlas_get_texture() const; + _FORCE_INLINE_ Rect2 texture_atlas_get_texture_rect(RID p_texture) { + TextureAtlas::Texture *t = texture_atlas.textures.getptr(p_texture); + if (!t) { + return Rect2(); + } + + return t->uv_rect; + } + + void texture_add_to_texture_atlas(RID p_texture); + void texture_remove_from_texture_atlas(RID p_texture); + void texture_atlas_mark_dirty_on_texture(RID p_texture); + void texture_atlas_remove_texture(RID p_texture); + /* DECAL API */ virtual RID decal_allocate() override; @@ -534,9 +599,13 @@ public: void render_target_disable_clear_request(RID p_render_target) override; void render_target_do_clear_request(RID p_render_target) override; - void render_target_set_sdf_size_and_scale(RID p_render_target, RS::ViewportSDFOversize p_size, RS::ViewportSDFScale p_scale) override; - Rect2i render_target_get_sdf_rect(RID p_render_target) const override; - void render_target_mark_sdf_enabled(RID p_render_target, bool p_enabled) override; + virtual void render_target_set_sdf_size_and_scale(RID p_render_target, RS::ViewportSDFOversize p_size, RS::ViewportSDFScale p_scale) override; + virtual Rect2i render_target_get_sdf_rect(RID p_render_target) const override; + GLuint render_target_get_sdf_texture(RID p_render_target); + GLuint render_target_get_sdf_framebuffer(RID p_render_target); + void render_target_sdf_process(RID p_render_target); + virtual void render_target_mark_sdf_enabled(RID p_render_target, bool p_enabled) override; + bool render_target_is_sdf_enabled(RID p_render_target) const; void render_target_copy_to_back_buffer(RID p_render_target, const Rect2i &p_region, bool p_gen_mipmaps); void render_target_clear_back_buffer(RID p_render_target, const Rect2i &p_region, const Color &p_color); @@ -547,12 +616,10 @@ public: virtual void render_target_set_vrs_texture(RID p_render_target, RID p_texture) override {} virtual RID render_target_get_vrs_texture(RID p_render_target) const override { return RID(); } - virtual void render_target_set_override_color(RID p_render_target, RID p_texture) override {} - virtual RID render_target_get_override_color(RID p_render_target) const override { return RID(); } - virtual void render_target_set_override_depth(RID p_render_target, RID p_texture) override {} - virtual RID render_target_get_override_depth(RID p_render_target) const override { return RID(); } - virtual void render_target_set_override_velocity(RID p_render_target, RID p_texture) override {} - virtual RID render_target_get_override_velocity(RID p_render_target) const override { return RID(); } + virtual void render_target_set_override(RID p_render_target, RID p_color_texture, RID p_depth_texture, RID p_velocity_texture) override; + virtual RID render_target_get_override_color(RID p_render_target) const override; + virtual RID render_target_get_override_depth(RID p_render_target) const override; + virtual RID render_target_get_override_velocity(RID p_render_target) const override; virtual RID render_target_get_texture(RID p_render_target) override; diff --git a/drivers/gles3/storage/utilities.cpp b/drivers/gles3/storage/utilities.cpp index 16bacf1829..fe900c7cfb 100644 --- a/drivers/gles3/storage/utilities.cpp +++ b/drivers/gles3/storage/utilities.cpp @@ -38,20 +38,44 @@ #include "particles_storage.h" #include "texture_storage.h" +#include "servers/rendering/rendering_server_globals.h" + using namespace GLES3; Utilities *Utilities::singleton = nullptr; Utilities::Utilities() { singleton = this; + frame = 0; + for (int i = 0; i < FRAME_COUNT; i++) { + frames[i].index = 0; + glGenQueries(max_timestamp_query_elements, frames[i].queries); + + frames[i].timestamp_names.resize(max_timestamp_query_elements); + frames[i].timestamp_cpu_values.resize(max_timestamp_query_elements); + frames[i].timestamp_count = 0; + + frames[i].timestamp_result_names.resize(max_timestamp_query_elements); + frames[i].timestamp_cpu_result_values.resize(max_timestamp_query_elements); + frames[i].timestamp_result_values.resize(max_timestamp_query_elements); + frames[i].timestamp_result_count = 0; + } } Utilities::~Utilities() { singleton = nullptr; + for (int i = 0; i < FRAME_COUNT; i++) { + glDeleteQueries(max_timestamp_query_elements, frames[i].queries); + } } Vector<uint8_t> Utilities::buffer_get_data(GLenum p_target, GLuint p_buffer, uint32_t p_buffer_size) { Vector<uint8_t> ret; + + if (p_buffer_size == 0) { + return ret; + } + ret.resize(p_buffer_size); glBindBuffer(p_target, p_buffer); @@ -84,6 +108,10 @@ RS::InstanceType Utilities::get_base_type(RID p_rid) const { return RS::INSTANCE_LIGHT; } else if (GLES3::LightStorage::get_singleton()->owns_lightmap(p_rid)) { return RS::INSTANCE_LIGHTMAP; + } else if (GLES3::ParticlesStorage::get_singleton()->owns_particles(p_rid)) { + return RS::INSTANCE_PARTICLES; + } else if (GLES3::ParticlesStorage::get_singleton()->owns_particles_collision(p_rid)) { + return RS::INSTANCE_PARTICLES_COLLISION; } return RS::INSTANCE_NONE; } @@ -119,53 +147,18 @@ bool Utilities::free(RID p_rid) { } else if (GLES3::LightStorage::get_singleton()->owns_lightmap(p_rid)) { GLES3::LightStorage::get_singleton()->lightmap_free(p_rid); return true; - } else { - return false; - } - /* - else if (reflection_probe_owner.owns(p_rid)) { - // delete the texture - ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_rid); - reflection_probe->instance_remove_deps(); - - reflection_probe_owner.free(p_rid); - memdelete(reflection_probe); - - return true; - } else if (lightmap_capture_data_owner.owns(p_rid)) { - // delete the texture - LightmapCapture *lightmap_capture = lightmap_capture_data_owner.get_or_null(p_rid); - lightmap_capture->instance_remove_deps(); - - lightmap_capture_data_owner.free(p_rid); - memdelete(lightmap_capture); + } else if (GLES3::ParticlesStorage::get_singleton()->owns_particles(p_rid)) { + GLES3::ParticlesStorage::get_singleton()->particles_free(p_rid); return true; - - } else if (canvas_occluder_owner.owns(p_rid)) { - CanvasOccluder *co = canvas_occluder_owner.get_or_null(p_rid); - if (co->index_id) { - glDeleteBuffers(1, &co->index_id); - } - if (co->vertex_id) { - glDeleteBuffers(1, &co->vertex_id); - } - - canvas_occluder_owner.free(p_rid); - memdelete(co); - + } else if (GLES3::ParticlesStorage::get_singleton()->owns_particles_collision(p_rid)) { + GLES3::ParticlesStorage::get_singleton()->particles_collision_free(p_rid); return true; - - } else if (canvas_light_shadow_owner.owns(p_rid)) { - CanvasLightShadow *cls = canvas_light_shadow_owner.get_or_null(p_rid); - glDeleteFramebuffers(1, &cls->fbo); - glDeleteRenderbuffers(1, &cls->depth); - glDeleteTextures(1, &cls->distance); - canvas_light_shadow_owner.free(p_rid); - memdelete(cls); - + } else if (GLES3::ParticlesStorage::get_singleton()->owns_particles_collision_instance(p_rid)) { + GLES3::ParticlesStorage::get_singleton()->particles_collision_instance_free(p_rid); return true; + } else { + return false; } - */ } /* DEPENDENCIES */ @@ -183,6 +176,12 @@ void Utilities::base_update_dependency(RID p_base, DependencyTracker *p_instance } else if (LightStorage::get_singleton()->owns_light(p_base)) { Light *l = LightStorage::get_singleton()->get_light(p_base); p_instance->update_dependency(&l->dependency); + } else if (ParticlesStorage::get_singleton()->owns_particles(p_base)) { + Dependency *dependency = ParticlesStorage::get_singleton()->particles_get_dependency(p_base); + p_instance->update_dependency(dependency); + } else if (ParticlesStorage::get_singleton()->owns_particles_collision(p_base)) { + Dependency *dependency = ParticlesStorage::get_singleton()->particles_collision_get_dependency(p_base); + p_instance->update_dependency(dependency); } } @@ -213,95 +212,78 @@ void Utilities::visibility_notifier_call(RID p_notifier, bool p_enter, bool p_de /* TIMING */ -//void Utilities::render_info_begin_capture() { -// info.snap = info.render; -//} - -//void Utilities::render_info_end_capture() { -// info.snap.object_count = info.render.object_count - info.snap.object_count; -// info.snap.draw_call_count = info.render.draw_call_count - info.snap.draw_call_count; -// info.snap.material_switch_count = info.render.material_switch_count - info.snap.material_switch_count; -// info.snap.surface_switch_count = info.render.surface_switch_count - info.snap.surface_switch_count; -// info.snap.shader_rebind_count = info.render.shader_rebind_count - info.snap.shader_rebind_count; -// info.snap.vertices_count = info.render.vertices_count - info.snap.vertices_count; -// info.snap._2d_item_count = info.render._2d_item_count - info.snap._2d_item_count; -// info.snap._2d_draw_call_count = info.render._2d_draw_call_count - info.snap._2d_draw_call_count; -//} - -//int Utilities::get_captured_render_info(RS::RenderInfo p_info) { -// switch (p_info) { -// case RS::INFO_OBJECTS_IN_FRAME: { -// return info.snap.object_count; -// } break; -// case RS::INFO_VERTICES_IN_FRAME: { -// return info.snap.vertices_count; -// } break; -// case RS::INFO_MATERIAL_CHANGES_IN_FRAME: { -// return info.snap.material_switch_count; -// } break; -// case RS::INFO_SHADER_CHANGES_IN_FRAME: { -// return info.snap.shader_rebind_count; -// } break; -// case RS::INFO_SURFACE_CHANGES_IN_FRAME: { -// return info.snap.surface_switch_count; -// } break; -// case RS::INFO_DRAW_CALLS_IN_FRAME: { -// return info.snap.draw_call_count; -// } break; -// /* -// case RS::INFO_2D_ITEMS_IN_FRAME: { -// return info.snap._2d_item_count; -// } break; -// case RS::INFO_2D_DRAW_CALLS_IN_FRAME: { -// return info.snap._2d_draw_call_count; -// } break; -// */ -// default: { -// return get_render_info(p_info); -// } -// } -//} - -//int Utilities::get_render_info(RS::RenderInfo p_info) { -// switch (p_info) { -// case RS::INFO_OBJECTS_IN_FRAME: -// return info.render_final.object_count; -// case RS::INFO_VERTICES_IN_FRAME: -// return info.render_final.vertices_count; -// case RS::INFO_MATERIAL_CHANGES_IN_FRAME: -// return info.render_final.material_switch_count; -// case RS::INFO_SHADER_CHANGES_IN_FRAME: -// return info.render_final.shader_rebind_count; -// case RS::INFO_SURFACE_CHANGES_IN_FRAME: -// return info.render_final.surface_switch_count; -// case RS::INFO_DRAW_CALLS_IN_FRAME: -// return info.render_final.draw_call_count; -// /* -// case RS::INFO_2D_ITEMS_IN_FRAME: -// return info.render_final._2d_item_count; -// case RS::INFO_2D_DRAW_CALLS_IN_FRAME: -// return info.render_final._2d_draw_call_count; -//*/ -// case RS::INFO_USAGE_VIDEO_MEM_TOTAL: -// return 0; //no idea -// case RS::INFO_VIDEO_MEM_USED: -// return info.vertex_mem + info.texture_mem; -// case RS::INFO_TEXTURE_MEM_USED: -// return info.texture_mem; -// case RS::INFO_VERTEX_MEM_USED: -// return info.vertex_mem; -// default: -// return 0; //no idea either -// } -//} +void Utilities::capture_timestamps_begin() { + capture_timestamp("Frame Begin"); +} + +void Utilities::capture_timestamp(const String &p_name) { + ERR_FAIL_COND(frames[frame].timestamp_count >= max_timestamp_query_elements); + +#ifdef GLES_OVER_GL + glQueryCounter(frames[frame].queries[frames[frame].timestamp_count], GL_TIMESTAMP); +#endif + + frames[frame].timestamp_names[frames[frame].timestamp_count] = p_name; + frames[frame].timestamp_cpu_values[frames[frame].timestamp_count] = OS::get_singleton()->get_ticks_usec(); + frames[frame].timestamp_count++; +} + +void Utilities::_capture_timestamps_begin() { + // frame is incremented at the end of the frame so this gives us the queries for frame - 2. By then they should be ready. + if (frames[frame].timestamp_count) { +#ifdef GLES_OVER_GL + for (uint32_t i = 0; i < frames[frame].timestamp_count; i++) { + uint64_t temp = 0; + glGetQueryObjectui64v(frames[frame].queries[i], GL_QUERY_RESULT, &temp); + frames[frame].timestamp_result_values[i] = temp; + } +#endif + SWAP(frames[frame].timestamp_names, frames[frame].timestamp_result_names); + SWAP(frames[frame].timestamp_cpu_values, frames[frame].timestamp_cpu_result_values); + } + + frames[frame].timestamp_result_count = frames[frame].timestamp_count; + frames[frame].timestamp_count = 0; + frames[frame].index = Engine::get_singleton()->get_frames_drawn(); + capture_timestamp("Internal Begin"); +} + +void Utilities::capture_timestamps_end() { + capture_timestamp("Internal End"); + frame = (frame + 1) % FRAME_COUNT; +} + +uint32_t Utilities::get_captured_timestamps_count() const { + return frames[frame].timestamp_result_count; +} + +uint64_t Utilities::get_captured_timestamps_frame() const { + return frames[frame].index; +} + +uint64_t Utilities::get_captured_timestamp_gpu_time(uint32_t p_index) const { + ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, 0); + return frames[frame].timestamp_result_values[p_index]; +} + +uint64_t Utilities::get_captured_timestamp_cpu_time(uint32_t p_index) const { + ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, 0); + return frames[frame].timestamp_cpu_result_values[p_index]; +} + +String Utilities::get_captured_timestamp_name(uint32_t p_index) const { + ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, String()); + return frames[frame].timestamp_result_names[p_index]; +} /* MISC */ void Utilities::update_dirty_resources() { MaterialStorage::get_singleton()->_update_global_shader_uniforms(); MaterialStorage::get_singleton()->_update_queued_materials(); - //MeshStorage::get_singleton()->_update_dirty_skeletons(); + MeshStorage::get_singleton()->_update_dirty_skeletons(); MeshStorage::get_singleton()->_update_dirty_multimeshes(); + TextureStorage::get_singleton()->update_texture_atlas(); } void Utilities::set_debug_generate_wireframes(bool p_generate) { @@ -355,4 +337,13 @@ String Utilities::get_video_adapter_api_version() const { return (const char *)glGetString(GL_VERSION); } +Size2i Utilities::get_maximum_viewport_size() const { + Config *config = Config::get_singleton(); + if (!config) { + return Size2i(); + } + + return Size2i(config->max_viewport_size[0], config->max_viewport_size[1]); +} + #endif // GLES3_ENABLED diff --git a/drivers/gles3/storage/utilities.h b/drivers/gles3/storage/utilities.h index e054f2f816..55a875958e 100644 --- a/drivers/gles3/storage/utilities.h +++ b/drivers/gles3/storage/utilities.h @@ -79,62 +79,35 @@ public: /* TIMING */ - struct Info { - uint64_t texture_mem = 0; - uint64_t vertex_mem = 0; - - struct Render { - uint32_t object_count; - uint32_t draw_call_count; - uint32_t material_switch_count; - uint32_t surface_switch_count; - uint32_t shader_rebind_count; - uint32_t vertices_count; - uint32_t _2d_item_count; - uint32_t _2d_draw_call_count; - - void reset() { - object_count = 0; - draw_call_count = 0; - material_switch_count = 0; - surface_switch_count = 0; - shader_rebind_count = 0; - vertices_count = 0; - _2d_item_count = 0; - _2d_draw_call_count = 0; - } - } render, render_final, snap; - - Info() { - render.reset(); - render_final.reset(); - } - - } info; - - virtual void capture_timestamps_begin() override {} - virtual void capture_timestamp(const String &p_name) override {} - virtual uint32_t get_captured_timestamps_count() const override { - return 0; - } - virtual uint64_t get_captured_timestamps_frame() const override { - return 0; - } - virtual uint64_t get_captured_timestamp_gpu_time(uint32_t p_index) const override { - return 0; - } - virtual uint64_t get_captured_timestamp_cpu_time(uint32_t p_index) const override { - return 0; - } - virtual String get_captured_timestamp_name(uint32_t p_index) const override { - return String(); - } - - // void render_info_begin_capture() override; - // void render_info_end_capture() override; - // int get_captured_render_info(RS::RenderInfo p_info) override; - - // int get_render_info(RS::RenderInfo p_info) override; +#define MAX_QUERIES 256 +#define FRAME_COUNT 3 + + struct Frame { + GLuint queries[MAX_QUERIES]; + TightLocalVector<String> timestamp_names; + TightLocalVector<uint64_t> timestamp_cpu_values; + uint32_t timestamp_count = 0; + TightLocalVector<String> timestamp_result_names; + TightLocalVector<uint64_t> timestamp_cpu_result_values; + TightLocalVector<uint64_t> timestamp_result_values; + uint32_t timestamp_result_count = 0; + uint64_t index = 0; + }; + + const uint32_t max_timestamp_query_elements = MAX_QUERIES; + + Frame frames[FRAME_COUNT]; // Frames for capturing timestamps. We use 3 so we don't need to wait for commands to complete + uint32_t frame = 0; + + virtual void capture_timestamps_begin() override; + virtual void capture_timestamp(const String &p_name) override; + virtual uint32_t get_captured_timestamps_count() const override; + virtual uint64_t get_captured_timestamps_frame() const override; + virtual uint64_t get_captured_timestamp_gpu_time(uint32_t p_index) const override; + virtual uint64_t get_captured_timestamp_cpu_time(uint32_t p_index) const override; + virtual String get_captured_timestamp_name(uint32_t p_index) const override; + void _capture_timestamps_begin(); + void capture_timestamps_end(); /* MISC */ @@ -150,6 +123,8 @@ public: virtual String get_video_adapter_vendor() const override; virtual RenderingDevice::DeviceType get_video_adapter_type() const override; virtual String get_video_adapter_api_version() const override; + + virtual Size2i get_maximum_viewport_size() const override; }; } // namespace GLES3 diff --git a/drivers/png/png_driver_common.cpp b/drivers/png/png_driver_common.cpp index bc4bb3782b..79641464d8 100644 --- a/drivers/png/png_driver_common.cpp +++ b/drivers/png/png_driver_common.cpp @@ -119,7 +119,7 @@ Error png_to_image(const uint8_t *p_source, size_t p_size, bool p_force_linear, ERR_FAIL_COND_V(!success, ERR_FILE_CORRUPT); //print_line("png width: "+itos(png_img.width)+" height: "+itos(png_img.height)); - p_image->create(png_img.width, png_img.height, false, dest_format, buffer); + p_image->set_data(png_img.width, png_img.height, false, dest_format, buffer); return OK; } diff --git a/drivers/unix/net_socket_posix.cpp b/drivers/unix/net_socket_posix.cpp index 72ae609fb4..c6b327eeee 100644 --- a/drivers/unix/net_socket_posix.cpp +++ b/drivers/unix/net_socket_posix.cpp @@ -179,8 +179,8 @@ NetSocketPosix::~NetSocketPosix() { close(); } -// Silent a warning reported in #27594 - +// Silence a warning reported in GH-27594. +// EAGAIN and EWOULDBLOCK have the same value on most platforms, but it's not guaranteed. #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wlogical-op" diff --git a/drivers/unix/os_unix.cpp b/drivers/unix/os_unix.cpp index 10d65b83db..b02a100784 100644 --- a/drivers/unix/os_unix.cpp +++ b/drivers/unix/os_unix.cpp @@ -145,6 +145,10 @@ void OS_Unix::finalize_core() { NetSocketPosix::cleanup(); } +Vector<String> OS_Unix::get_video_adapter_driver_info() const { + return Vector<String>(); +} + String OS_Unix::get_stdin_string(bool p_block) { if (p_block) { char buff[1024]; @@ -501,11 +505,11 @@ bool OS_Unix::set_environment(const String &p_var, const String &p_value) const } String OS_Unix::get_user_data_dir() const { - String appname = get_safe_dir_name(ProjectSettings::get_singleton()->get("application/config/name")); + String appname = get_safe_dir_name(GLOBAL_GET("application/config/name")); if (!appname.is_empty()) { - bool use_custom_dir = ProjectSettings::get_singleton()->get("application/config/use_custom_user_dir"); + bool use_custom_dir = GLOBAL_GET("application/config/use_custom_user_dir"); if (use_custom_dir) { - String custom_dir = get_safe_dir_name(ProjectSettings::get_singleton()->get("application/config/custom_user_dir_name"), true); + String custom_dir = get_safe_dir_name(GLOBAL_GET("application/config/custom_user_dir_name"), true); if (custom_dir.is_empty()) { custom_dir = appname; } @@ -561,7 +565,7 @@ String OS_Unix::get_executable_path() const { WARN_PRINT("MAXPATHLEN is too small"); } - String path(resolved_path); + String path = String::utf8(resolved_path); delete[] resolved_path; return path; diff --git a/drivers/unix/os_unix.h b/drivers/unix/os_unix.h index fce962e32c..ce06a52a95 100644 --- a/drivers/unix/os_unix.h +++ b/drivers/unix/os_unix.h @@ -51,6 +51,8 @@ protected: public: OS_Unix(); + virtual Vector<String> get_video_adapter_driver_info() const override; + virtual String get_stdin_string(bool p_block) override; virtual Error get_entropy(uint8_t *r_buffer, int p_bytes) override; diff --git a/drivers/unix/thread_posix.cpp b/drivers/unix/thread_posix.cpp index f6adbee108..5154feb478 100644 --- a/drivers/unix/thread_posix.cpp +++ b/drivers/unix/thread_posix.cpp @@ -28,7 +28,7 @@ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /*************************************************************************/ -#if defined(UNIX_ENABLED) || defined(PTHREAD_ENABLED) +#if defined(UNIX_ENABLED) #include "thread_posix.h" @@ -73,4 +73,4 @@ void init_thread_posix() { Thread::_set_platform_functions({ .set_name = set_name }); } -#endif // UNIX_ENABLED || PTHREAD_ENABLED +#endif // UNIX_ENABLED diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index 9b193056c2..7f5bac30f1 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -47,7 +47,7 @@ static const uint32_t SMALL_ALLOCATION_MAX_SIZE = 4096; // Get the Vulkan object information and possible stage access types (bitwise OR'd with incoming values). -RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &r_stage_mask, VkAccessFlags &r_access_mask, uint32_t p_post_barrier) { +RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &r_stage_mask, VkAccessFlags &r_access_mask, BitField<BarrierMask> p_post_barrier) { Buffer *buffer = nullptr; if (vertex_buffer_owner.owns(p_buffer)) { buffer = vertex_buffer_owner.get_or_null(p_buffer); @@ -55,11 +55,11 @@ RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; r_access_mask |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; if (buffer->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) { - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; } - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; } @@ -69,20 +69,20 @@ RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID r_access_mask |= VK_ACCESS_INDEX_READ_BIT; buffer = index_buffer_owner.get_or_null(p_buffer); } else if (uniform_buffer_owner.owns(p_buffer)) { - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; } - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; } r_access_mask |= VK_ACCESS_UNIFORM_READ_BIT; buffer = uniform_buffer_owner.get_or_null(p_buffer); } else if (texture_buffer_owner.owns(p_buffer)) { - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; r_access_mask |= VK_ACCESS_SHADER_READ_BIT; } - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; r_access_mask |= VK_ACCESS_SHADER_READ_BIT; } @@ -90,11 +90,11 @@ RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID buffer = &texture_buffer_owner.get_or_null(p_buffer)->buffer; } else if (storage_buffer_owner.owns(p_buffer)) { buffer = storage_buffer_owner.get_or_null(p_buffer); - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } @@ -1655,9 +1655,7 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T image_create_info.pNext = nullptr; image_create_info.flags = 0; -#ifndef _MSC_VER -#warning TODO check for support via RenderingDevice to enable on mobile when possible -#endif + // TODO: Check for support via RenderingDevice to enable on mobile when possible. #ifndef ANDROID_ENABLED @@ -2011,7 +2009,7 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T if (p_data.size()) { for (uint32_t i = 0; i < image_create_info.arrayLayers; i++) { - _texture_update(id, i, p_data[i], RD::BARRIER_MASK_ALL, true); + _texture_update(id, i, p_data[i], RD::BARRIER_MASK_ALL_BARRIERS, true); } } return id; @@ -2162,14 +2160,35 @@ RID RenderingDeviceVulkan::texture_create_from_extension(TextureType p_type, Dat texture.height = p_height; texture.depth = p_depth; texture.layers = p_layers; - texture.mipmaps = 0; // Maybe make this settable too? + texture.mipmaps = 1; texture.usage_flags = p_flags; texture.base_mipmap = 0; texture.base_layer = 0; texture.allowed_shared_formats.push_back(RD::DATA_FORMAT_R8G8B8A8_UNORM); texture.allowed_shared_formats.push_back(RD::DATA_FORMAT_R8G8B8A8_SRGB); - // Do we need to do something with texture.layout? + // Set base layout based on usage priority. + + if (texture.usage_flags & TEXTURE_USAGE_SAMPLING_BIT) { + // First priority, readable. + texture.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + } else if (texture.usage_flags & TEXTURE_USAGE_STORAGE_BIT) { + // Second priority, storage. + + texture.layout = VK_IMAGE_LAYOUT_GENERAL; + + } else if (texture.usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { + // Third priority, color or depth. + + texture.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + } else if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + texture.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + } else { + texture.layout = VK_IMAGE_LAYOUT_GENERAL; + } if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { texture.read_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT; @@ -2395,7 +2414,7 @@ RID RenderingDeviceVulkan::texture_create_shared_from_slice(const TextureView &p return id; } -Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, uint32_t p_post_barrier) { +Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, BitField<BarrierMask> p_post_barrier) { return _texture_update(p_texture, p_layer, p_data, p_post_barrier, false); } @@ -2415,7 +2434,7 @@ static _ALWAYS_INLINE_ void _copy_region(uint8_t const *__restrict p_src, uint8_ } } -Error RenderingDeviceVulkan::_texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, uint32_t p_post_barrier, bool p_use_setup_queue) { +Error RenderingDeviceVulkan::_texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, BitField<BarrierMask> p_post_barrier, bool p_use_setup_queue) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG((draw_list || compute_list) && !p_use_setup_queue, ERR_INVALID_PARAMETER, @@ -2589,15 +2608,15 @@ Error RenderingDeviceVulkan::_texture_update(RID p_texture, uint32_t p_layer, co { uint32_t barrier_flags = 0; uint32_t access_flags = 0; - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_TRANSFER) { + if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } @@ -2850,7 +2869,7 @@ Size2i RenderingDeviceVulkan::texture_size(RID p_texture) { return Size2i(tex->width, tex->height); } -Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, uint32_t p_post_barrier) { +Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, BitField<BarrierMask> p_post_barrier) { _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.get_or_null(p_from_texture); @@ -2975,15 +2994,15 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, uint32_t barrier_flags = 0; uint32_t access_flags = 0; - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_TRANSFER) { + if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } @@ -3045,7 +3064,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, return OK; } -Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID p_to_texture, uint32_t p_post_barrier) { +Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID p_to_texture, BitField<BarrierMask> p_post_barrier) { _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.get_or_null(p_from_texture); @@ -3153,15 +3172,15 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID uint32_t barrier_flags = 0; uint32_t access_flags = 0; - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_TRANSFER) { + if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } @@ -3216,7 +3235,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID return OK; } -Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, uint32_t p_post_barrier) { +Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, BitField<BarrierMask> p_post_barrier) { _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.get_or_null(p_texture); @@ -3289,15 +3308,15 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, uint32_t barrier_flags = 0; uint32_t access_flags = 0; - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_TRANSFER) { + if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } @@ -3336,7 +3355,7 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, return OK; } -bool RenderingDeviceVulkan::texture_is_format_supported_for_usage(DataFormat p_format, uint32_t p_usage) const { +bool RenderingDeviceVulkan::texture_is_format_supported_for_usage(DataFormat p_format, BitField<RenderingDevice::TextureUsageBits> p_usage) const { ERR_FAIL_INDEX_V(p_format, DATA_FORMAT_MAX, false); _THREAD_SAFE_METHOD_ @@ -3346,34 +3365,34 @@ bool RenderingDeviceVulkan::texture_is_format_supported_for_usage(DataFormat p_f vkGetPhysicalDeviceFormatProperties(context->get_physical_device(), vulkan_formats[p_format], &properties); VkFormatFeatureFlags flags; - if (p_usage & TEXTURE_USAGE_CPU_READ_BIT) { + if (p_usage.has_flag(TEXTURE_USAGE_CPU_READ_BIT)) { flags = properties.linearTilingFeatures; } else { flags = properties.optimalTilingFeatures; } - if (p_usage & TEXTURE_USAGE_SAMPLING_BIT && !(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + if (p_usage.has_flag(TEXTURE_USAGE_SAMPLING_BIT) && !(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { return false; } - if (p_usage & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT && !(flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) { + if (p_usage.has_flag(TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) && !(flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) { return false; } - if (p_usage & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT && !(flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { + if (p_usage.has_flag(TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) && !(flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { return false; } - if (p_usage & TEXTURE_USAGE_STORAGE_BIT && !(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) { + if (p_usage.has_flag(TEXTURE_USAGE_STORAGE_BIT) && !(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) { return false; } - if (p_usage & TEXTURE_USAGE_STORAGE_ATOMIC_BIT && !(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT)) { + if (p_usage.has_flag(TEXTURE_USAGE_STORAGE_ATOMIC_BIT) && !(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT)) { return false; } // Validation via VK_FORMAT_FEATURE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR fails if VRS attachment is not supported. - if (p_usage & TEXTURE_USAGE_VRS_ATTACHMENT_BIT && p_format != DATA_FORMAT_R8_UINT) { + if (p_usage.has_flag(TEXTURE_USAGE_VRS_ATTACHMENT_BIT) && p_format != DATA_FORMAT_R8_UINT) { return false; } @@ -3403,6 +3422,16 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF LocalVector<int32_t> attachment_last_pass; attachment_last_pass.resize(p_attachments.size()); + if (p_view_count > 1) { + const VulkanContext::MultiviewCapabilities capabilities = context->get_multiview_capabilities(); + + // This only works with multiview! + ERR_FAIL_COND_V_MSG(!capabilities.is_supported, VK_NULL_HANDLE, "Multiview not supported"); + + // Make sure we limit this to the number of views we support. + ERR_FAIL_COND_V_MSG(p_view_count > capabilities.max_view_count, VK_NULL_HANDLE, "Hardware does not support requested number of views for Multiview render pass"); + } + // These are only used if we use multiview but we need to define them in scope. const uint32_t view_mask = (1 << p_view_count) - 1; const uint32_t correlation_mask = (1 << p_view_count) - 1; @@ -3703,7 +3732,7 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; attachment_last_pass[attachment] = i; } - reference.aspectMask = 0; + reference.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; color_references.push_back(reference); } @@ -3725,7 +3754,7 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF reference.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; attachment_last_pass[attachment] = i; } - reference.aspectMask = 0; // TODO: We need to set this here, possibly VK_IMAGE_ASPECT_COLOR_BIT? + reference.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; input_references.push_back(reference); } @@ -3754,7 +3783,7 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; // VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL attachment_last_pass[attachment] = i; } - reference.aspectMask = 0; + reference.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; resolve_references.push_back(reference); } @@ -3769,7 +3798,7 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF ERR_FAIL_COND_V_MSG(attachment_last_pass[attachment] == i, VK_NULL_HANDLE, "Invalid framebuffer depth format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it already was used for something else before in this pass."); depth_stencil_reference.attachment = attachment_remap[attachment]; depth_stencil_reference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - depth_stencil_reference.aspectMask = 0; + depth_stencil_reference.aspectMask = VK_IMAGE_ASPECT_NONE; attachment_last_pass[attachment] = i; if (is_multisample_first) { @@ -3795,9 +3824,9 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF vrs_reference.pNext = nullptr; vrs_reference.attachment = attachment_remap[attachment]; vrs_reference.layout = VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR; - vrs_reference.aspectMask = 0; + vrs_reference.aspectMask = VK_IMAGE_ASPECT_NONE; - Size2i texel_size = context->get_vrs_capabilities().max_texel_size; + Size2i texel_size = context->get_vrs_capabilities().texel_size; VkFragmentShadingRateAttachmentInfoKHR &vrs_attachment_info = vrs_attachment_info_array[i]; vrs_attachment_info.sType = VK_STRUCTURE_TYPE_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR; @@ -3936,16 +3965,9 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF Vector<uint32_t> view_masks; VkRenderPassMultiviewCreateInfo render_pass_multiview_create_info; - if (p_view_count > 1) { - // This may no longer be needed with the new settings already including this. - - const VulkanContext::MultiviewCapabilities capabilities = context->get_multiview_capabilities(); - - // For now this only works with multiview! - ERR_FAIL_COND_V_MSG(!capabilities.is_supported, VK_NULL_HANDLE, "Multiview not supported"); - - // Make sure we limit this to the number of views we support. - ERR_FAIL_COND_V_MSG(p_view_count > capabilities.max_view_count, VK_NULL_HANDLE, "Hardware does not support requested number of views for Multiview render pass"); + if ((p_view_count > 1) && !context->supports_renderpass2()) { + // This is only required when using vkCreateRenderPass, we add it if vkCreateRenderPass2KHR is not supported + // resulting this in being passed to our vkCreateRenderPass fallback. // Set view masks for each subpass. for (uint32_t i = 0; i < subpasses.size(); i++) { @@ -4356,7 +4378,7 @@ RenderingDevice::VertexFormatID RenderingDeviceVulkan::vertex_format_create(cons return id; } -RID RenderingDeviceVulkan::vertex_array_create(uint32_t p_vertex_count, VertexFormatID p_vertex_format, const Vector<RID> &p_src_buffers) { +RID RenderingDeviceVulkan::vertex_array_create(uint32_t p_vertex_count, VertexFormatID p_vertex_format, const Vector<RID> &p_src_buffers, const Vector<uint64_t> &p_offsets) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V(!vertex_formats.has(p_vertex_format), RID()); @@ -4370,6 +4392,13 @@ RID RenderingDeviceVulkan::vertex_array_create(uint32_t p_vertex_count, VertexFo VertexArray vertex_array; + if (p_offsets.is_empty()) { + vertex_array.offsets.resize_zeroed(p_src_buffers.size()); + } else { + ERR_FAIL_COND_V(p_offsets.size() != p_src_buffers.size(), RID()); + vertex_array.offsets = p_offsets; + } + vertex_array.vertex_count = p_vertex_count; vertex_array.description = p_vertex_format; vertex_array.max_instances_allowed = 0xFFFFFFFF; // By default as many as you want. @@ -4401,7 +4430,6 @@ RID RenderingDeviceVulkan::vertex_array_create(uint32_t p_vertex_count, VertexFo } vertex_array.buffers.push_back(buffer->buffer); - vertex_array.offsets.push_back(0); // Offset unused, but passing anyway. } RID id = vertex_array_owner.make_rid(vertex_array); @@ -5172,9 +5200,9 @@ Vector<uint8_t> RenderingDeviceVulkan::shader_compile_binary_from_spirv(const Ve uint32_t offset = 0; uint8_t *binptr = ret.ptrw(); binptr[0] = 'G'; - binptr[1] = 'V'; + binptr[1] = 'S'; binptr[2] = 'B'; - binptr[3] = 'D'; // Godot vulkan binary data. + binptr[3] = 'D'; // Godot Shader Binary Data. offset += 4; encode_uint32(SHADER_BINARY_VERSION, binptr + offset); offset += sizeof(uint32_t); @@ -5235,7 +5263,7 @@ RID RenderingDeviceVulkan::shader_create_from_bytecode(const Vector<uint8_t> &p_ uint32_t read_offset = 0; // Consistency check. ERR_FAIL_COND_V(binsize < sizeof(uint32_t) * 3 + sizeof(RenderingDeviceVulkanShaderBinaryData), RID()); - ERR_FAIL_COND_V(binptr[0] != 'G' || binptr[1] != 'V' || binptr[2] != 'B' || binptr[3] != 'D', RID()); + ERR_FAIL_COND_V(binptr[0] != 'G' || binptr[1] != 'S' || binptr[2] != 'B' || binptr[3] != 'D', RID()); uint32_t bin_version = decode_uint32(binptr + 4); ERR_FAIL_COND_V(bin_version != SHADER_BINARY_VERSION, RID()); @@ -6293,7 +6321,7 @@ void RenderingDeviceVulkan::uniform_set_set_invalidation_callback(RID p_uniform_ us->invalidated_callback_userdata = p_userdata; } -Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, uint32_t p_post_barrier) { +Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, BitField<BarrierMask> p_post_barrier) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER, @@ -6303,7 +6331,7 @@ Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint VkPipelineStageFlags dst_stage_mask = 0; VkAccessFlags dst_access = 0; - if (p_post_barrier & BARRIER_MASK_TRANSFER) { + if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { // Protect subsequent updates. dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; dst_access = VK_ACCESS_TRANSFER_WRITE_BIT; @@ -6339,7 +6367,7 @@ Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint return err; } -Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, uint32_t p_post_barrier) { +Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, BitField<BarrierMask> p_post_barrier) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG((p_size % 4) != 0, ERR_INVALID_PARAMETER, @@ -6351,7 +6379,7 @@ Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint3 VkPipelineStageFlags dst_stage_mask = 0; VkAccessFlags dst_access = 0; - if (p_post_barrier & BARRIER_MASK_TRANSFER) { + if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { // Protect subsequent updates. dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; dst_access = VK_ACCESS_TRANSFER_WRITE_BIT; @@ -6390,7 +6418,7 @@ Vector<uint8_t> RenderingDeviceVulkan::buffer_get_data(RID p_buffer) { VkPipelineShaderStageCreateFlags src_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; VkAccessFlags src_access_mask = VK_ACCESS_TRANSFER_WRITE_BIT; // Get the vulkan buffer and the potential stage/access possible. - Buffer *buffer = _get_buffer_from_owner(p_buffer, src_stage_mask, src_access_mask, BARRIER_MASK_ALL); + Buffer *buffer = _get_buffer_from_owner(p_buffer, src_stage_mask, src_access_mask, BARRIER_MASK_ALL_BARRIERS); if (!buffer) { ERR_FAIL_V_MSG(Vector<uint8_t>(), "Buffer is either invalid or this type of buffer can't be retrieved. Only Index and Vertex buffers allow retrieving."); } @@ -6561,7 +6589,7 @@ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferForma ERR_FAIL_INDEX_V(p_rasterization_state.cull_mode, 3, RID()); rasterization_state_create_info.cullMode = cull_mode[p_rasterization_state.cull_mode]; rasterization_state_create_info.frontFace = (p_rasterization_state.front_face == POLYGON_FRONT_FACE_CLOCKWISE ? VK_FRONT_FACE_CLOCKWISE : VK_FRONT_FACE_COUNTER_CLOCKWISE); - rasterization_state_create_info.depthBiasEnable = p_rasterization_state.depth_bias_enable; + rasterization_state_create_info.depthBiasEnable = p_rasterization_state.depth_bias_enabled; rasterization_state_create_info.depthBiasConstantFactor = p_rasterization_state.depth_bias_constant_factor; rasterization_state_create_info.depthBiasClamp = p_rasterization_state.depth_bias_clamp; rasterization_state_create_info.depthBiasSlopeFactor = p_rasterization_state.depth_bias_slope_factor; @@ -7729,7 +7757,7 @@ void RenderingDeviceVulkan::draw_list_bind_index_array(DrawListID p_list, RID p_ dl->validation.index_array_size = index_array->indices; dl->validation.index_array_offset = index_array->offset; - vkCmdBindIndexBuffer(dl->command_buffer, index_array->buffer, index_array->offset, index_array->index_type); + vkCmdBindIndexBuffer(dl->command_buffer, index_array->buffer, 0, index_array->index_type); } void RenderingDeviceVulkan::draw_list_set_line_width(DrawListID p_list, float p_width) { @@ -8062,7 +8090,7 @@ void RenderingDeviceVulkan::_draw_list_free(Rect2i *r_last_viewport) { _THREAD_SAFE_UNLOCK_ } -void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) { +void RenderingDeviceVulkan::draw_list_end(BitField<BarrierMask> p_post_barrier) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_MSG(!draw_list, "Immediate draw list is already inactive."); @@ -8084,15 +8112,15 @@ void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) { uint32_t barrier_flags = 0; uint32_t access_flags = 0; - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT /*| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT*/; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT /*| VK_ACCESS_INDIRECT_COMMAND_READ_BIT*/; } - if (p_post_barrier & BARRIER_MASK_TRANSFER) { + if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; } @@ -8570,20 +8598,20 @@ void RenderingDeviceVulkan::compute_list_add_barrier(ComputeListID p_list) { #endif } -void RenderingDeviceVulkan::compute_list_end(uint32_t p_post_barrier) { +void RenderingDeviceVulkan::compute_list_end(BitField<BarrierMask> p_post_barrier) { ERR_FAIL_COND(!compute_list); uint32_t barrier_flags = 0; uint32_t access_flags = 0; - if (p_post_barrier & BARRIER_MASK_COMPUTE) { + if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } - if (p_post_barrier & BARRIER_MASK_RASTER) { + if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) { barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; } - if (p_post_barrier & BARRIER_MASK_TRANSFER) { + if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; } @@ -8651,43 +8679,45 @@ void RenderingDeviceVulkan::compute_list_end(uint32_t p_post_barrier) { _THREAD_SAFE_UNLOCK_ } -void RenderingDeviceVulkan::barrier(uint32_t p_from, uint32_t p_to) { +void RenderingDeviceVulkan::barrier(BitField<BarrierMask> p_from, BitField<BarrierMask> p_to) { uint32_t src_barrier_flags = 0; uint32_t src_access_flags = 0; - if (p_from & BARRIER_MASK_COMPUTE) { - src_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - src_access_flags |= VK_ACCESS_SHADER_WRITE_BIT; - } - if (p_from & BARRIER_MASK_RASTER) { - src_barrier_flags |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - src_access_flags |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - } - if (p_from & BARRIER_MASK_TRANSFER) { - src_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; - src_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; - } if (p_from == 0) { src_barrier_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + } else { + if (p_from.has_flag(BARRIER_MASK_COMPUTE)) { + src_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + src_access_flags |= VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_from.has_flag(BARRIER_MASK_RASTER)) { + src_barrier_flags |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + src_access_flags |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + if (p_from.has_flag(BARRIER_MASK_TRANSFER)) { + src_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + src_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + } } uint32_t dst_barrier_flags = 0; uint32_t dst_access_flags = 0; - if (p_to & BARRIER_MASK_COMPUTE) { - dst_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - } - if (p_to & BARRIER_MASK_RASTER) { - dst_barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; - dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; - } - if (p_to & BARRIER_MASK_TRANSFER) { - dst_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; - dst_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; - } if (p_to == 0) { dst_barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } else { + if (p_to.has_flag(BARRIER_MASK_COMPUTE)) { + dst_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_to.has_flag(BARRIER_MASK_RASTER)) { + dst_barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + } + if (p_to.has_flag(BARRIER_MASK_TRANSFER)) { + dst_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + dst_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; + } } _memory_barrier(src_barrier_flags, dst_barrier_flags, src_access_flags, dst_access_flags, true); @@ -9164,7 +9194,7 @@ void RenderingDeviceVulkan::_free_pending_resources(int p_frame) { Texture *texture = &frames[p_frame].textures_to_dispose_of.front()->get(); if (texture->bound) { - WARN_PRINT("Deleted a texture while it was bound.."); + WARN_PRINT("Deleted a texture while it was bound."); } vkDestroyImageView(device, texture->view, nullptr); if (texture->owner.is_null()) { @@ -9365,12 +9395,10 @@ void RenderingDeviceVulkan::initialize(VulkanContext *p_context, bool p_local_de } } - // NOTE: If adding new project settings here, also duplicate their definition in - // rendering_server.cpp for headless doctool. - staging_buffer_block_size = GLOBAL_DEF("rendering/rendering_device/staging_buffer/block_size_kb", 256); + staging_buffer_block_size = GLOBAL_GET("rendering/rendering_device/staging_buffer/block_size_kb"); staging_buffer_block_size = MAX(4u, staging_buffer_block_size); staging_buffer_block_size *= 1024; // Kb -> bytes. - staging_buffer_max_size = GLOBAL_DEF("rendering/rendering_device/staging_buffer/max_size_mb", 128); + staging_buffer_max_size = GLOBAL_GET("rendering/rendering_device/staging_buffer/max_size_mb"); staging_buffer_max_size = MAX(1u, staging_buffer_max_size); staging_buffer_max_size *= 1024 * 1024; @@ -9378,7 +9406,7 @@ void RenderingDeviceVulkan::initialize(VulkanContext *p_context, bool p_local_de // Validate enough blocks. staging_buffer_max_size = staging_buffer_block_size * 4; } - texture_upload_region_size_px = GLOBAL_DEF("rendering/rendering_device/staging_buffer/texture_upload_region_size_px", 64); + texture_upload_region_size_px = GLOBAL_GET("rendering/rendering_device/staging_buffer/texture_upload_region_size_px"); texture_upload_region_size_px = nearest_power_of_2_templated(texture_upload_region_size_px); frames_drawn = frame_count; // Start from frame count, so everything else is immediately old. @@ -9393,7 +9421,7 @@ void RenderingDeviceVulkan::initialize(VulkanContext *p_context, bool p_local_de ERR_CONTINUE(err != OK); } - max_descriptors_per_pool = GLOBAL_DEF("rendering/rendering_device/descriptor_pools/max_descriptors_per_pool", 64); + max_descriptors_per_pool = GLOBAL_GET("rendering/rendering_device/vulkan/max_descriptors_per_pool"); // Check to make sure DescriptorPoolKey is good. static_assert(sizeof(uint64_t) * 3 >= UNIFORM_TYPE_MAX * sizeof(uint16_t)); @@ -9688,6 +9716,10 @@ uint64_t RenderingDeviceVulkan::limit_get(Limit p_limit) const { return limits.maxComputeWorkGroupSize[1]; case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z: return limits.maxComputeWorkGroupSize[2]; + case LIMIT_MAX_VIEWPORT_DIMENSIONS_X: + return limits.maxViewportDimensions[0]; + case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y: + return limits.maxViewportDimensions[1]; case LIMIT_SUBGROUP_SIZE: { VulkanContext::SubgroupCapabilities subgroup_capabilities = context->get_subgroup_capabilities(); return subgroup_capabilities.size; @@ -9700,6 +9732,12 @@ uint64_t RenderingDeviceVulkan::limit_get(Limit p_limit) const { VulkanContext::SubgroupCapabilities subgroup_capabilities = context->get_subgroup_capabilities(); return subgroup_capabilities.supported_operations_flags_rd(); } + case LIMIT_VRS_TEXEL_WIDTH: { + return context->get_vrs_capabilities().texel_size.x; + } + case LIMIT_VRS_TEXEL_HEIGHT: { + return context->get_vrs_capabilities().texel_size.y; + } default: ERR_FAIL_V(0); } diff --git a/drivers/vulkan/rendering_device_vulkan.h b/drivers/vulkan/rendering_device_vulkan.h index 7c75e9bb2e..c6e1830e90 100644 --- a/drivers/vulkan/rendering_device_vulkan.h +++ b/drivers/vulkan/rendering_device_vulkan.h @@ -162,7 +162,7 @@ class RenderingDeviceVulkan : public RenderingDevice { uint32_t texture_upload_region_size_px = 0; Vector<uint8_t> _texture_get_data_from_image(Texture *tex, VkImage p_image, VmaAllocation p_allocation, uint32_t p_layer, bool p_2d = false); - Error _texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, uint32_t p_post_barrier, bool p_use_setup_queue); + Error _texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, BitField<BarrierMask> p_post_barrier, bool p_use_setup_queue); /*****************/ /**** SAMPLER ****/ @@ -909,7 +909,7 @@ class RenderingDeviceVulkan : public RenderingDevice { Error _draw_list_setup_framebuffer(Framebuffer *p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, VkFramebuffer *r_framebuffer, VkRenderPass *r_render_pass, uint32_t *r_subpass_count); Error _draw_list_render_pass_begin(Framebuffer *framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_colors, float p_clear_depth, uint32_t p_clear_stencil, Point2i viewport_offset, Point2i viewport_size, VkFramebuffer vkframebuffer, VkRenderPass render_pass, VkCommandBuffer command_buffer, VkSubpassContents subpass_contents, const Vector<RID> &p_storage_textures); _FORCE_INLINE_ DrawList *_get_draw_list_ptr(DrawListID p_id); - Buffer *_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &dst_stage_mask, VkAccessFlags &dst_access, uint32_t p_post_barrier); + Buffer *_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &dst_stage_mask, VkAccessFlags &dst_access, BitField<BarrierMask> p_post_barrier); Error _draw_list_allocate(const Rect2i &p_viewport, uint32_t p_splits, uint32_t p_subpass); void _draw_list_free(Rect2i *r_last_viewport = nullptr); @@ -1052,17 +1052,17 @@ public: virtual RID texture_create_from_extension(TextureType p_type, DataFormat p_format, TextureSamples p_samples, uint64_t p_flags, uint64_t p_image, uint64_t p_width, uint64_t p_height, uint64_t p_depth, uint64_t p_layers); virtual RID texture_create_shared_from_slice(const TextureView &p_view, RID p_with_texture, uint32_t p_layer, uint32_t p_mipmap, uint32_t p_mipmaps = 1, TextureSliceType p_slice_type = TEXTURE_SLICE_2D); - virtual Error texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, uint32_t p_post_barrier = BARRIER_MASK_ALL); + virtual Error texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); virtual Vector<uint8_t> texture_get_data(RID p_texture, uint32_t p_layer); - virtual bool texture_is_format_supported_for_usage(DataFormat p_format, uint32_t p_usage) const; + virtual bool texture_is_format_supported_for_usage(DataFormat p_format, BitField<RenderingDevice::TextureUsageBits> p_usage) const; virtual bool texture_is_shared(RID p_texture); virtual bool texture_is_valid(RID p_texture); virtual Size2i texture_size(RID p_texture); - virtual Error texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, uint32_t p_post_barrier = BARRIER_MASK_ALL); - virtual Error texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, uint32_t p_post_barrier = BARRIER_MASK_ALL); - virtual Error texture_resolve_multisample(RID p_from_texture, RID p_to_texture, uint32_t p_post_barrier = BARRIER_MASK_ALL); + virtual Error texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); + virtual Error texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); + virtual Error texture_resolve_multisample(RID p_from_texture, RID p_to_texture, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); /*********************/ /**** FRAMEBUFFER ****/ @@ -1095,7 +1095,7 @@ public: // Internally reference counted, this ID is warranted to be unique for the same description, but needs to be freed as many times as it was allocated. virtual VertexFormatID vertex_format_create(const Vector<VertexAttribute> &p_vertex_formats); - virtual RID vertex_array_create(uint32_t p_vertex_count, VertexFormatID p_vertex_format, const Vector<RID> &p_src_buffers); + virtual RID vertex_array_create(uint32_t p_vertex_count, VertexFormatID p_vertex_format, const Vector<RID> &p_src_buffers, const Vector<uint64_t> &p_offsets = Vector<uint64_t>()); virtual RID index_buffer_create(uint32_t p_size_indices, IndexBufferFormat p_format, const Vector<uint8_t> &p_data = Vector<uint8_t>(), bool p_use_restart_indices = false); @@ -1124,8 +1124,8 @@ public: virtual bool uniform_set_is_valid(RID p_uniform_set); virtual void uniform_set_set_invalidation_callback(RID p_uniform_set, InvalidationCallback p_callback, void *p_userdata); - virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, uint32_t p_post_barrier = BARRIER_MASK_ALL); // Works for any buffer. - virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, uint32_t p_post_barrier = BARRIER_MASK_ALL); + virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); // Works for any buffer. + virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); virtual Vector<uint8_t> buffer_get_data(RID p_buffer); /*************************/ @@ -1176,7 +1176,7 @@ public: virtual DrawListID draw_list_switch_to_next_pass(); virtual Error draw_list_switch_to_next_pass_split(uint32_t p_splits, DrawListID *r_split_ids); - virtual void draw_list_end(uint32_t p_post_barrier = BARRIER_MASK_ALL); + virtual void draw_list_end(BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); /***********************/ /**** COMPUTE LISTS ****/ @@ -1191,9 +1191,9 @@ public: virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); virtual void compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads); virtual void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset); - virtual void compute_list_end(uint32_t p_post_barrier = BARRIER_MASK_ALL); + virtual void compute_list_end(BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); - virtual void barrier(uint32_t p_from = BARRIER_MASK_ALL, uint32_t p_to = BARRIER_MASK_ALL); + virtual void barrier(BitField<BarrierMask> p_from = BARRIER_MASK_ALL_BARRIERS, BitField<BarrierMask> p_to = BARRIER_MASK_ALL_BARRIERS); virtual void full_barrier(); /**************/ diff --git a/drivers/vulkan/vulkan_context.cpp b/drivers/vulkan/vulkan_context.cpp index 1ab8914624..028c7dca6f 100644 --- a/drivers/vulkan/vulkan_context.cpp +++ b/drivers/vulkan/vulkan_context.cpp @@ -48,15 +48,119 @@ VulkanHooks *VulkanContext::vulkan_hooks = nullptr; -VkResult VulkanContext::vkCreateRenderPass2KHR(VkDevice p_device, const VkRenderPassCreateInfo2 *p_create_info, const VkAllocationCallbacks *p_allocator, VkRenderPass *p_render_pass) { - if (fpCreateRenderPass2KHR == nullptr) { - fpCreateRenderPass2KHR = (PFN_vkCreateRenderPass2KHR)vkGetInstanceProcAddr(inst, "vkCreateRenderPass2KHR"); +Vector<VkAttachmentReference> VulkanContext::_convert_VkAttachmentReference2(uint32_t p_count, const VkAttachmentReference2 *p_refs) { + Vector<VkAttachmentReference> att_refs; + + if (p_refs != nullptr) { + for (uint32_t i = 0; i < p_count; i++) { + // We lose aspectMask in this conversion but we don't use it currently. + + VkAttachmentReference ref = { + p_refs[i].attachment, /* attachment */ + p_refs[i].layout /* layout */ + }; + + att_refs.push_back(ref); + } } - if (fpCreateRenderPass2KHR == nullptr) { - return VK_ERROR_EXTENSION_NOT_PRESENT; + return att_refs; +} + +VkResult VulkanContext::vkCreateRenderPass2KHR(VkDevice p_device, const VkRenderPassCreateInfo2 *p_create_info, const VkAllocationCallbacks *p_allocator, VkRenderPass *p_render_pass) { + if (is_device_extension_enabled(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME)) { + if (fpCreateRenderPass2KHR == nullptr) { + fpCreateRenderPass2KHR = (PFN_vkCreateRenderPass2KHR)vkGetDeviceProcAddr(p_device, "vkCreateRenderPass2KHR"); + } + + if (fpCreateRenderPass2KHR == nullptr) { + return VK_ERROR_EXTENSION_NOT_PRESENT; + } else { + return (fpCreateRenderPass2KHR)(p_device, p_create_info, p_allocator, p_render_pass); + } } else { - return (fpCreateRenderPass2KHR)(p_device, p_create_info, p_allocator, p_render_pass); + // need to fall back on vkCreateRenderPass + + const void *next = p_create_info->pNext; // ATM we only support multiview which should work if supported. + + Vector<VkAttachmentDescription> attachments; + for (uint32_t i = 0; i < p_create_info->attachmentCount; i++) { + // Basically the old layout just misses type and next. + VkAttachmentDescription att = { + p_create_info->pAttachments[i].flags, /* flags */ + p_create_info->pAttachments[i].format, /* format */ + p_create_info->pAttachments[i].samples, /* samples */ + p_create_info->pAttachments[i].loadOp, /* loadOp */ + p_create_info->pAttachments[i].storeOp, /* storeOp */ + p_create_info->pAttachments[i].stencilLoadOp, /* stencilLoadOp */ + p_create_info->pAttachments[i].stencilStoreOp, /* stencilStoreOp */ + p_create_info->pAttachments[i].initialLayout, /* initialLayout */ + p_create_info->pAttachments[i].finalLayout /* finalLayout */ + }; + + attachments.push_back(att); + } + + Vector<VkSubpassDescription> subpasses; + for (uint32_t i = 0; i < p_create_info->subpassCount; i++) { + // Here we need to do more, again it's just stripping out type and next + // but we have VkAttachmentReference2 to convert to VkAttachmentReference. + // Also viewmask is not supported but we don't use it outside of multiview. + + Vector<VkAttachmentReference> input_attachments = _convert_VkAttachmentReference2(p_create_info->pSubpasses[i].inputAttachmentCount, p_create_info->pSubpasses[i].pInputAttachments); + Vector<VkAttachmentReference> color_attachments = _convert_VkAttachmentReference2(p_create_info->pSubpasses[i].colorAttachmentCount, p_create_info->pSubpasses[i].pColorAttachments); + Vector<VkAttachmentReference> resolve_attachments = _convert_VkAttachmentReference2(p_create_info->pSubpasses[i].colorAttachmentCount, p_create_info->pSubpasses[i].pResolveAttachments); + Vector<VkAttachmentReference> depth_attachments = _convert_VkAttachmentReference2(p_create_info->pSubpasses[i].colorAttachmentCount, p_create_info->pSubpasses[i].pDepthStencilAttachment); + + VkSubpassDescription subpass = { + p_create_info->pSubpasses[i].flags, /* flags */ + p_create_info->pSubpasses[i].pipelineBindPoint, /* pipelineBindPoint */ + p_create_info->pSubpasses[i].inputAttachmentCount, /* inputAttachmentCount */ + input_attachments.size() == 0 ? nullptr : input_attachments.ptr(), /* pInputAttachments */ + p_create_info->pSubpasses[i].colorAttachmentCount, /* colorAttachmentCount */ + color_attachments.size() == 0 ? nullptr : color_attachments.ptr(), /* pColorAttachments */ + resolve_attachments.size() == 0 ? nullptr : resolve_attachments.ptr(), /* pResolveAttachments */ + depth_attachments.size() == 0 ? nullptr : depth_attachments.ptr(), /* pDepthStencilAttachment */ + p_create_info->pSubpasses[i].preserveAttachmentCount, /* preserveAttachmentCount */ + p_create_info->pSubpasses[i].pPreserveAttachments /* pPreserveAttachments */ + }; + + subpasses.push_back(subpass); + } + + Vector<VkSubpassDependency> dependencies; + for (uint32_t i = 0; i < p_create_info->dependencyCount; i++) { + // We lose viewOffset here but again I don't believe we use this anywhere. + VkSubpassDependency dep = { + p_create_info->pDependencies[i].srcSubpass, /* srcSubpass */ + p_create_info->pDependencies[i].dstSubpass, /* dstSubpass */ + p_create_info->pDependencies[i].srcStageMask, /* srcStageMask */ + p_create_info->pDependencies[i].dstStageMask, /* dstStageMask */ + p_create_info->pDependencies[i].srcAccessMask, /* srcAccessMask */ + p_create_info->pDependencies[i].dstAccessMask, /* dstAccessMask */ + p_create_info->pDependencies[i].dependencyFlags, /* dependencyFlags */ + }; + + dependencies.push_back(dep); + } + + // CorrelatedViewMask is not supported in vkCreateRenderPass but we + // currently only use this for multiview. + // We'll need to look into this. + + VkRenderPassCreateInfo create_info = { + VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, /* sType */ + next, /* pNext*/ + p_create_info->flags, /* flags */ + (uint32_t)attachments.size(), /* attachmentCount */ + attachments.ptr(), /* pAttachments */ + (uint32_t)subpasses.size(), /* subpassCount */ + subpasses.ptr(), /* pSubpasses */ + (uint32_t)dependencies.size(), /* */ + dependencies.ptr(), /* */ + }; + + return vkCreateRenderPass(device, &create_info, p_allocator, p_render_pass); } } @@ -296,16 +400,28 @@ Error VulkanContext::_obtain_vulkan_version() { return OK; } -Error VulkanContext::_initialize_extensions() { - uint32_t instance_extension_count = 0; +bool VulkanContext::instance_extensions_initialized = false; +HashMap<CharString, bool> VulkanContext::requested_instance_extensions; + +void VulkanContext::register_requested_instance_extension(const CharString &extension_name, bool p_required) { + ERR_FAIL_COND_MSG(instance_extensions_initialized, "You can only registered extensions before the Vulkan instance is created"); + ERR_FAIL_COND(requested_instance_extensions.has(extension_name)); - enabled_extension_count = 0; - enabled_debug_utils = false; - enabled_debug_report = false; - // Look for instance extensions. - VkBool32 surfaceExtFound = 0; - VkBool32 platformSurfaceExtFound = 0; - memset(extension_names, 0, sizeof(extension_names)); + requested_instance_extensions[extension_name] = p_required; +} + +Error VulkanContext::_initialize_instance_extensions() { + enabled_instance_extension_names.clear(); + + // Make sure our core extensions are here + register_requested_instance_extension(VK_KHR_SURFACE_EXTENSION_NAME, true); + register_requested_instance_extension(_get_platform_surface_extension(), true); + + if (_use_validation_layers()) { + register_requested_instance_extension(VK_EXT_DEBUG_REPORT_EXTENSION_NAME, false); + } + + register_requested_instance_extension(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, false); // Only enable debug utils in verbose mode or DEV_ENABLED. // End users would get spammed with messages of varying verbosity due to the @@ -316,54 +432,141 @@ Error VulkanContext::_initialize_extensions() { #else bool want_debug_utils = OS::get_singleton()->is_stdout_verbose(); #endif + if (want_debug_utils) { + register_requested_instance_extension(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, false); + } + // Load instance extensions that are available... + uint32_t instance_extension_count = 0; VkResult err = vkEnumerateInstanceExtensionProperties(nullptr, &instance_extension_count, nullptr); ERR_FAIL_COND_V(err != VK_SUCCESS && err != VK_INCOMPLETE, ERR_CANT_CREATE); + ERR_FAIL_COND_V_MSG(instance_extension_count == 0, ERR_CANT_CREATE, "No instance extensions found, is a driver installed?"); - if (instance_extension_count > 0) { - VkExtensionProperties *instance_extensions = (VkExtensionProperties *)malloc(sizeof(VkExtensionProperties) * instance_extension_count); - err = vkEnumerateInstanceExtensionProperties(nullptr, &instance_extension_count, instance_extensions); - if (err != VK_SUCCESS && err != VK_INCOMPLETE) { - free(instance_extensions); - ERR_FAIL_V(ERR_CANT_CREATE); + VkExtensionProperties *instance_extensions = (VkExtensionProperties *)malloc(sizeof(VkExtensionProperties) * instance_extension_count); + err = vkEnumerateInstanceExtensionProperties(nullptr, &instance_extension_count, instance_extensions); + if (err != VK_SUCCESS && err != VK_INCOMPLETE) { + free(instance_extensions); + ERR_FAIL_V(ERR_CANT_CREATE); + } +#ifdef DEV_ENABLED + for (uint32_t i = 0; i < instance_extension_count; i++) { + print_verbose(String("VULKAN: Found instance extension ") + String(instance_extensions[i].extensionName)); + } +#endif + + // Enable all extensions that are supported and requested + for (uint32_t i = 0; i < instance_extension_count; i++) { + CharString extension_name(instance_extensions[i].extensionName); + if (requested_instance_extensions.has(extension_name)) { + enabled_instance_extension_names.insert(extension_name); } - for (uint32_t i = 0; i < instance_extension_count; i++) { - if (!strcmp(VK_KHR_SURFACE_EXTENSION_NAME, instance_extensions[i].extensionName)) { - surfaceExtFound = 1; - extension_names[enabled_extension_count++] = VK_KHR_SURFACE_EXTENSION_NAME; - } + } - if (!strcmp(_get_platform_surface_extension(), instance_extensions[i].extensionName)) { - platformSurfaceExtFound = 1; - extension_names[enabled_extension_count++] = _get_platform_surface_extension(); - } - if (!strcmp(VK_EXT_DEBUG_REPORT_EXTENSION_NAME, instance_extensions[i].extensionName)) { - if (_use_validation_layers()) { - extension_names[enabled_extension_count++] = VK_EXT_DEBUG_REPORT_EXTENSION_NAME; - enabled_debug_report = true; - } - } - if (!strcmp(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, instance_extensions[i].extensionName)) { - if (want_debug_utils) { - extension_names[enabled_extension_count++] = VK_EXT_DEBUG_UTILS_EXTENSION_NAME; - enabled_debug_utils = true; - } - } - if (!strcmp(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, instance_extensions[i].extensionName)) { - extension_names[enabled_extension_count++] = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME; - } - if (enabled_extension_count >= MAX_EXTENSIONS) { + // Now check our requested extensions + for (KeyValue<CharString, bool> &requested_extension : requested_instance_extensions) { + if (!enabled_instance_extension_names.has(requested_extension.key)) { + if (requested_extension.value) { free(instance_extensions); - ERR_FAIL_V_MSG(ERR_BUG, "Enabled extension count reaches MAX_EXTENSIONS, BUG"); + ERR_FAIL_V_MSG(ERR_BUG, String("Required extension ") + String(requested_extension.key) + String(" not found, is a driver installed?")); + } else { + print_verbose(String("Optional extension ") + String(requested_extension.key) + String(" not found")); } } + } - free(instance_extensions); + free(instance_extensions); + + instance_extensions_initialized = true; + return OK; +} + +bool VulkanContext::device_extensions_initialized = false; +HashMap<CharString, bool> VulkanContext::requested_device_extensions; + +void VulkanContext::register_requested_device_extension(const CharString &extension_name, bool p_required) { + ERR_FAIL_COND_MSG(device_extensions_initialized, "You can only registered extensions before the Vulkan instance is created"); + ERR_FAIL_COND(requested_device_extensions.has(extension_name)); + + requested_device_extensions[extension_name] = p_required; +} + +Error VulkanContext::_initialize_device_extensions() { + // Look for device extensions. + enabled_device_extension_names.clear(); + + // Make sure our core extensions are here + register_requested_device_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true); + + register_requested_device_extension(VK_KHR_MULTIVIEW_EXTENSION_NAME, false); + register_requested_device_extension(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME, false); + register_requested_device_extension(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME, false); + register_requested_device_extension(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); + register_requested_device_extension(VK_KHR_16BIT_STORAGE_EXTENSION_NAME, false); + + // TODO consider the following extensions: + // - VK_KHR_spirv_1_4 + // - VK_KHR_swapchain_mutable_format + // - VK_EXT_full_screen_exclusive + // - VK_EXT_hdr_metadata + // - VK_KHR_depth_stencil_resolve + + // Even though the user "enabled" the extension via the command + // line, we must make sure that it's enumerated for use with the + // device. Therefore, disable it here, and re-enable it again if + // enumerated. + if (VK_KHR_incremental_present_enabled) { + register_requested_device_extension(VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME, false); + } + if (VK_GOOGLE_display_timing_enabled) { + register_requested_device_extension(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME, false); } - ERR_FAIL_COND_V_MSG(!surfaceExtFound, ERR_CANT_CREATE, "No surface extension found, is a driver installed?"); - ERR_FAIL_COND_V_MSG(!platformSurfaceExtFound, ERR_CANT_CREATE, "No platform surface extension found, is a driver installed?"); + // obtain available device extensions + uint32_t device_extension_count = 0; + VkResult err = vkEnumerateDeviceExtensionProperties(gpu, nullptr, &device_extension_count, nullptr); + ERR_FAIL_COND_V(err, ERR_CANT_CREATE); + ERR_FAIL_COND_V_MSG(device_extension_count == 0, ERR_CANT_CREATE, + "vkEnumerateDeviceExtensionProperties failed to find any extensions\n\n" + "Do you have a compatible Vulkan installable client driver (ICD) installed?\n" + "vkCreateInstance Failure"); + + VkExtensionProperties *device_extensions = (VkExtensionProperties *)malloc(sizeof(VkExtensionProperties) * device_extension_count); + err = vkEnumerateDeviceExtensionProperties(gpu, nullptr, &device_extension_count, device_extensions); + if (err) { + free(device_extensions); + ERR_FAIL_V(ERR_CANT_CREATE); + } + +#ifdef DEV_ENABLED + for (uint32_t i = 0; i < device_extension_count; i++) { + print_verbose(String("VULKAN: Found device extension ") + String(device_extensions[i].extensionName)); + } +#endif + + // Enable all extensions that are supported and requested + for (uint32_t i = 0; i < device_extension_count; i++) { + CharString extension_name(device_extensions[i].extensionName); + if (requested_device_extensions.has(extension_name)) { + enabled_device_extension_names.insert(extension_name); + } + } + + // Now check our requested extensions + for (KeyValue<CharString, bool> &requested_extension : requested_device_extensions) { + if (!enabled_device_extension_names.has(requested_extension.key)) { + if (requested_extension.value) { + free(device_extensions); + ERR_FAIL_V_MSG(ERR_BUG, + String("vkEnumerateDeviceExtensionProperties failed to find the ") + String(requested_extension.key) + String(" extension.\n\nDo you have a compatible Vulkan installable client driver (ICD) installed?\nvkCreateInstance Failure")); + } else { + print_verbose(String("Optional extension ") + String(requested_extension.key) + String(" not found")); + } + } + } + free(device_extensions); + + device_extensions_initialized = true; return OK; } @@ -521,6 +724,9 @@ Error VulkanContext::_check_capabilities() { vrs_capabilities.pipeline_vrs_supported = false; vrs_capabilities.primitive_vrs_supported = false; vrs_capabilities.attachment_vrs_supported = false; + vrs_capabilities.min_texel_size = Size2i(); + vrs_capabilities.max_texel_size = Size2i(); + vrs_capabilities.texel_size = Size2i(); multiview_capabilities.is_supported = false; multiview_capabilities.geometry_shader_is_supported = false; multiview_capabilities.tessellation_shader_is_supported = false; @@ -537,155 +743,176 @@ Error VulkanContext::_check_capabilities() { storage_buffer_capabilities.storage_push_constant_16_is_supported = false; storage_buffer_capabilities.storage_input_output_16 = false; - // Check for extended features. - PFN_vkGetPhysicalDeviceFeatures2 vkGetPhysicalDeviceFeatures2_func = (PFN_vkGetPhysicalDeviceFeatures2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceFeatures2"); - if (vkGetPhysicalDeviceFeatures2_func == nullptr) { - // In Vulkan 1.0 might be accessible under its original extension name. - vkGetPhysicalDeviceFeatures2_func = (PFN_vkGetPhysicalDeviceFeatures2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceFeatures2KHR"); - } - if (vkGetPhysicalDeviceFeatures2_func != nullptr) { - // Check our extended features. - VkPhysicalDeviceFragmentShadingRateFeaturesKHR vrs_features = { - /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR, - /*pNext*/ nullptr, - /*pipelineFragmentShadingRate*/ false, - /*primitiveFragmentShadingRate*/ false, - /*attachmentFragmentShadingRate*/ false, - }; - - VkPhysicalDeviceShaderFloat16Int8FeaturesKHR shader_features = { - /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR, - /*pNext*/ &vrs_features, - /*shaderFloat16*/ false, - /*shaderInt8*/ false, - }; + if (is_instance_extension_enabled(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME)) { + // Check for extended features. + PFN_vkGetPhysicalDeviceFeatures2 vkGetPhysicalDeviceFeatures2_func = (PFN_vkGetPhysicalDeviceFeatures2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceFeatures2"); + if (vkGetPhysicalDeviceFeatures2_func == nullptr) { + // In Vulkan 1.0 might be accessible under its original extension name. + vkGetPhysicalDeviceFeatures2_func = (PFN_vkGetPhysicalDeviceFeatures2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceFeatures2KHR"); + } + if (vkGetPhysicalDeviceFeatures2_func != nullptr) { + // Check our extended features. + VkPhysicalDeviceFragmentShadingRateFeaturesKHR vrs_features = { + /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR, + /*pNext*/ nullptr, + /*pipelineFragmentShadingRate*/ false, + /*primitiveFragmentShadingRate*/ false, + /*attachmentFragmentShadingRate*/ false, + }; - VkPhysicalDevice16BitStorageFeaturesKHR storage_feature = { - /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, - /*pNext*/ &shader_features, - /*storageBuffer16BitAccess*/ false, - /*uniformAndStorageBuffer16BitAccess*/ false, - /*storagePushConstant16*/ false, - /*storageInputOutput16*/ false, - }; + VkPhysicalDeviceShaderFloat16Int8FeaturesKHR shader_features = { + /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR, + /*pNext*/ &vrs_features, + /*shaderFloat16*/ false, + /*shaderInt8*/ false, + }; - VkPhysicalDeviceMultiviewFeatures multiview_features = { - /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES, - /*pNext*/ &storage_feature, - /*multiview*/ false, - /*multiviewGeometryShader*/ false, - /*multiviewTessellationShader*/ false, - }; + VkPhysicalDevice16BitStorageFeaturesKHR storage_feature = { + /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, + /*pNext*/ &shader_features, + /*storageBuffer16BitAccess*/ false, + /*uniformAndStorageBuffer16BitAccess*/ false, + /*storagePushConstant16*/ false, + /*storageInputOutput16*/ false, + }; - VkPhysicalDeviceFeatures2 device_features; - device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; - device_features.pNext = &multiview_features; + VkPhysicalDeviceMultiviewFeatures multiview_features = { + /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES, + /*pNext*/ &storage_feature, + /*multiview*/ false, + /*multiviewGeometryShader*/ false, + /*multiviewTessellationShader*/ false, + }; - vkGetPhysicalDeviceFeatures2_func(gpu, &device_features); + VkPhysicalDeviceFeatures2 device_features; + device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + device_features.pNext = &multiview_features; - vrs_capabilities.pipeline_vrs_supported = vrs_features.pipelineFragmentShadingRate; - vrs_capabilities.primitive_vrs_supported = vrs_features.primitiveFragmentShadingRate; - vrs_capabilities.attachment_vrs_supported = vrs_features.attachmentFragmentShadingRate; + vkGetPhysicalDeviceFeatures2_func(gpu, &device_features); - multiview_capabilities.is_supported = multiview_features.multiview; - multiview_capabilities.geometry_shader_is_supported = multiview_features.multiviewGeometryShader; - multiview_capabilities.tessellation_shader_is_supported = multiview_features.multiviewTessellationShader; + // We must check that the relative extension is present before assuming a + // feature as enabled. Actually, according to the spec we shouldn't add the + // structs in pNext at all, but this works fine. + // See also: https://github.com/godotengine/godot/issues/65409 + if (is_device_extension_enabled(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { + vrs_capabilities.pipeline_vrs_supported = vrs_features.pipelineFragmentShadingRate; + vrs_capabilities.primitive_vrs_supported = vrs_features.primitiveFragmentShadingRate; + vrs_capabilities.attachment_vrs_supported = vrs_features.attachmentFragmentShadingRate; + } - shader_capabilities.shader_float16_is_supported = shader_features.shaderFloat16; - shader_capabilities.shader_int8_is_supported = shader_features.shaderInt8; + if (is_device_extension_enabled(VK_KHR_MULTIVIEW_EXTENSION_NAME)) { + multiview_capabilities.is_supported = multiview_features.multiview; + multiview_capabilities.geometry_shader_is_supported = multiview_features.multiviewGeometryShader; + multiview_capabilities.tessellation_shader_is_supported = multiview_features.multiviewTessellationShader; + } - storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported = storage_feature.storageBuffer16BitAccess; - storage_buffer_capabilities.uniform_and_storage_buffer_16_bit_access_is_supported = storage_feature.uniformAndStorageBuffer16BitAccess; - storage_buffer_capabilities.storage_push_constant_16_is_supported = storage_feature.storagePushConstant16; - storage_buffer_capabilities.storage_input_output_16 = storage_feature.storageInputOutput16; - } + if (is_device_extension_enabled(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) { + shader_capabilities.shader_float16_is_supported = shader_features.shaderFloat16; + shader_capabilities.shader_int8_is_supported = shader_features.shaderInt8; + } - // Check extended properties. - PFN_vkGetPhysicalDeviceProperties2 device_properties_func = (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceProperties2"); - if (device_properties_func == nullptr) { - // In Vulkan 1.0 might be accessible under its original extension name. - device_properties_func = (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceProperties2KHR"); - } - if (device_properties_func != nullptr) { - VkPhysicalDeviceFragmentShadingRatePropertiesKHR vrsProperties{}; - VkPhysicalDeviceMultiviewProperties multiviewProperties{}; - VkPhysicalDeviceSubgroupProperties subgroupProperties{}; - VkPhysicalDeviceProperties2 physicalDeviceProperties{}; - void *nextptr = nullptr; + if (is_device_extension_enabled(VK_KHR_16BIT_STORAGE_EXTENSION_NAME)) { + storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported = storage_feature.storageBuffer16BitAccess; + storage_buffer_capabilities.uniform_and_storage_buffer_16_bit_access_is_supported = storage_feature.uniformAndStorageBuffer16BitAccess; + storage_buffer_capabilities.storage_push_constant_16_is_supported = storage_feature.storagePushConstant16; + storage_buffer_capabilities.storage_input_output_16 = storage_feature.storageInputOutput16; + } + } - subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; - subgroupProperties.pNext = nextptr; - nextptr = &subgroupProperties; + // Check extended properties. + PFN_vkGetPhysicalDeviceProperties2 device_properties_func = (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceProperties2"); + if (device_properties_func == nullptr) { + // In Vulkan 1.0 might be accessible under its original extension name. + device_properties_func = (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceProperties2KHR"); + } + if (device_properties_func != nullptr) { + VkPhysicalDeviceFragmentShadingRatePropertiesKHR vrsProperties{}; + VkPhysicalDeviceMultiviewProperties multiviewProperties{}; + VkPhysicalDeviceSubgroupProperties subgroupProperties{}; + VkPhysicalDeviceProperties2 physicalDeviceProperties{}; + void *nextptr = nullptr; + + if (!(vulkan_major == 1 && vulkan_minor == 0)) { + subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; + subgroupProperties.pNext = nextptr; + + nextptr = &subgroupProperties; + } - if (multiview_capabilities.is_supported) { - multiviewProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES; - multiviewProperties.pNext = nextptr; + if (multiview_capabilities.is_supported) { + multiviewProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES; + multiviewProperties.pNext = nextptr; - nextptr = &multiviewProperties; - } + nextptr = &multiviewProperties; + } - if (vrs_capabilities.attachment_vrs_supported) { - vrsProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; - vrsProperties.pNext = nextptr; + if (vrs_capabilities.attachment_vrs_supported) { + vrsProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; + vrsProperties.pNext = nextptr; - nextptr = &vrsProperties; - } + nextptr = &vrsProperties; + } - physicalDeviceProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - physicalDeviceProperties.pNext = nextptr; + physicalDeviceProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + physicalDeviceProperties.pNext = nextptr; - device_properties_func(gpu, &physicalDeviceProperties); + device_properties_func(gpu, &physicalDeviceProperties); - subgroup_capabilities.size = subgroupProperties.subgroupSize; - subgroup_capabilities.supportedStages = subgroupProperties.supportedStages; - subgroup_capabilities.supportedOperations = subgroupProperties.supportedOperations; - // Note: quadOperationsInAllStages will be true if: - // - supportedStages has VK_SHADER_STAGE_ALL_GRAPHICS + VK_SHADER_STAGE_COMPUTE_BIT. - // - supportedOperations has VK_SUBGROUP_FEATURE_QUAD_BIT. - subgroup_capabilities.quadOperationsInAllStages = subgroupProperties.quadOperationsInAllStages; + subgroup_capabilities.size = subgroupProperties.subgroupSize; + subgroup_capabilities.supportedStages = subgroupProperties.supportedStages; + subgroup_capabilities.supportedOperations = subgroupProperties.supportedOperations; + // Note: quadOperationsInAllStages will be true if: + // - supportedStages has VK_SHADER_STAGE_ALL_GRAPHICS + VK_SHADER_STAGE_COMPUTE_BIT. + // - supportedOperations has VK_SUBGROUP_FEATURE_QUAD_BIT. + subgroup_capabilities.quadOperationsInAllStages = subgroupProperties.quadOperationsInAllStages; - if (vrs_capabilities.pipeline_vrs_supported || vrs_capabilities.primitive_vrs_supported || vrs_capabilities.attachment_vrs_supported) { - print_verbose("- Vulkan Variable Rate Shading supported:"); - if (vrs_capabilities.pipeline_vrs_supported) { - print_verbose(" Pipeline fragment shading rate"); - } - if (vrs_capabilities.primitive_vrs_supported) { - print_verbose(" Primitive fragment shading rate"); - } - if (vrs_capabilities.attachment_vrs_supported) { - // TODO expose these somehow to the end user. - vrs_capabilities.min_texel_size.x = vrsProperties.minFragmentShadingRateAttachmentTexelSize.width; - vrs_capabilities.min_texel_size.y = vrsProperties.minFragmentShadingRateAttachmentTexelSize.height; - vrs_capabilities.max_texel_size.x = vrsProperties.maxFragmentShadingRateAttachmentTexelSize.width; - vrs_capabilities.max_texel_size.y = vrsProperties.maxFragmentShadingRateAttachmentTexelSize.height; + if (vrs_capabilities.pipeline_vrs_supported || vrs_capabilities.primitive_vrs_supported || vrs_capabilities.attachment_vrs_supported) { + print_verbose("- Vulkan Variable Rate Shading supported:"); + if (vrs_capabilities.pipeline_vrs_supported) { + print_verbose(" Pipeline fragment shading rate"); + } + if (vrs_capabilities.primitive_vrs_supported) { + print_verbose(" Primitive fragment shading rate"); + } + if (vrs_capabilities.attachment_vrs_supported) { + // TODO expose these somehow to the end user. + vrs_capabilities.min_texel_size.x = vrsProperties.minFragmentShadingRateAttachmentTexelSize.width; + vrs_capabilities.min_texel_size.y = vrsProperties.minFragmentShadingRateAttachmentTexelSize.height; + vrs_capabilities.max_texel_size.x = vrsProperties.maxFragmentShadingRateAttachmentTexelSize.width; + vrs_capabilities.max_texel_size.y = vrsProperties.maxFragmentShadingRateAttachmentTexelSize.height; + + // We'll attempt to default to a texel size of 16x16 + vrs_capabilities.texel_size.x = CLAMP(16, vrs_capabilities.min_texel_size.x, vrs_capabilities.max_texel_size.x); + vrs_capabilities.texel_size.y = CLAMP(16, vrs_capabilities.min_texel_size.y, vrs_capabilities.max_texel_size.y); + + print_verbose(String(" Attachment fragment shading rate") + String(", min texel size: (") + itos(vrs_capabilities.min_texel_size.x) + String(", ") + itos(vrs_capabilities.min_texel_size.y) + String(")") + String(", max texel size: (") + itos(vrs_capabilities.max_texel_size.x) + String(", ") + itos(vrs_capabilities.max_texel_size.y) + String(")")); + } - print_verbose(String(" Attachment fragment shading rate") + String(", min texel size: (") + itos(vrs_capabilities.min_texel_size.x) + String(", ") + itos(vrs_capabilities.min_texel_size.y) + String(")") + String(", max texel size: (") + itos(vrs_capabilities.max_texel_size.x) + String(", ") + itos(vrs_capabilities.max_texel_size.y) + String(")")); + } else { + print_verbose("- Vulkan Variable Rate Shading not supported"); } - } else { - print_verbose("- Vulkan Variable Rate Shading not supported"); - } + if (multiview_capabilities.is_supported) { + multiview_capabilities.max_view_count = multiviewProperties.maxMultiviewViewCount; + multiview_capabilities.max_instance_count = multiviewProperties.maxMultiviewInstanceIndex; - if (multiview_capabilities.is_supported) { - multiview_capabilities.max_view_count = multiviewProperties.maxMultiviewViewCount; - multiview_capabilities.max_instance_count = multiviewProperties.maxMultiviewInstanceIndex; + print_verbose("- Vulkan multiview supported:"); + print_verbose(" max view count: " + itos(multiview_capabilities.max_view_count)); + print_verbose(" max instances: " + itos(multiview_capabilities.max_instance_count)); + } else { + print_verbose("- Vulkan multiview not supported"); + } - print_verbose("- Vulkan multiview supported:"); - print_verbose(" max view count: " + itos(multiview_capabilities.max_view_count)); - print_verbose(" max instances: " + itos(multiview_capabilities.max_instance_count)); + print_verbose("- Vulkan subgroup:"); + print_verbose(" size: " + itos(subgroup_capabilities.size)); + print_verbose(" stages: " + subgroup_capabilities.supported_stages_desc()); + print_verbose(" supported ops: " + subgroup_capabilities.supported_operations_desc()); + if (subgroup_capabilities.quadOperationsInAllStages) { + print_verbose(" quad operations in all stages"); + } } else { - print_verbose("- Vulkan multiview not supported"); + print_verbose("- Couldn't call vkGetPhysicalDeviceProperties2"); } - - print_verbose("- Vulkan subgroup:"); - print_verbose(" size: " + itos(subgroup_capabilities.size)); - print_verbose(" stages: " + subgroup_capabilities.supported_stages_desc()); - print_verbose(" supported ops: " + subgroup_capabilities.supported_operations_desc()); - if (subgroup_capabilities.quadOperationsInAllStages) { - print_verbose(" quad operations in all stages"); - } - } else { - print_verbose("- Couldn't call vkGetPhysicalDeviceProperties2"); } return OK; @@ -697,13 +924,20 @@ Error VulkanContext::_create_instance() { // Initialize extensions. { - Error err = _initialize_extensions(); + Error err = _initialize_instance_extensions(); if (err != OK) { return err; } } - CharString cs = ProjectSettings::get_singleton()->get("application/config/name").operator String().utf8(); + int enabled_extension_count = 0; + const char *enabled_extension_names[MAX_EXTENSIONS]; + ERR_FAIL_COND_V(enabled_instance_extension_names.size() > MAX_EXTENSIONS, ERR_CANT_CREATE); + for (const CharString &extension_name : enabled_instance_extension_names) { + enabled_extension_names[enabled_extension_count++] = extension_name.ptr(); + } + + CharString cs = GLOBAL_GET("application/config/name").operator String().utf8(); const VkApplicationInfo app = { /*sType*/ VK_STRUCTURE_TYPE_APPLICATION_INFO, /*pNext*/ nullptr, @@ -717,7 +951,7 @@ Error VulkanContext::_create_instance() { inst_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; inst_info.pApplicationInfo = &app; inst_info.enabledExtensionCount = enabled_extension_count; - inst_info.ppEnabledExtensionNames = (const char *const *)extension_names; + inst_info.ppEnabledExtensionNames = (const char *const *)enabled_extension_names; if (_use_validation_layers()) { _get_preferred_validation_layers(&inst_info.enabledLayerCount, &inst_info.ppEnabledLayerNames); } @@ -727,9 +961,9 @@ Error VulkanContext::_create_instance() { * After the instance is created, we use the instance-based * function to register the final callback. */ - VkDebugUtilsMessengerCreateInfoEXT dbg_messenger_create_info; - VkDebugReportCallbackCreateInfoEXT dbg_report_callback_create_info{}; - if (enabled_debug_utils) { + VkDebugUtilsMessengerCreateInfoEXT dbg_messenger_create_info = {}; + VkDebugReportCallbackCreateInfoEXT dbg_report_callback_create_info = {}; + if (is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { // VK_EXT_debug_utils style. dbg_messenger_create_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; dbg_messenger_create_info.pNext = nullptr; @@ -742,7 +976,7 @@ Error VulkanContext::_create_instance() { dbg_messenger_create_info.pfnUserCallback = _debug_messenger_callback; dbg_messenger_create_info.pUserData = this; inst_info.pNext = &dbg_messenger_create_info; - } else if (enabled_debug_report) { + } else if (is_instance_extension_enabled(VK_EXT_DEBUG_REPORT_EXTENSION_NAME)) { dbg_report_callback_create_info.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT; dbg_report_callback_create_info.flags = VK_DEBUG_REPORT_INFORMATION_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT | @@ -782,7 +1016,7 @@ Error VulkanContext::_create_instance() { volkLoadInstance(inst); #endif - if (enabled_debug_utils) { + if (is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { // Setup VK_EXT_debug_utils function pointers always (we use them for debug labels and names). CreateDebugUtilsMessengerEXT = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(inst, "vkCreateDebugUtilsMessengerEXT"); @@ -823,7 +1057,7 @@ Error VulkanContext::_create_instance() { ERR_FAIL_V(ERR_CANT_CREATE); break; } - } else if (enabled_debug_report) { + } else if (is_instance_extension_enabled(VK_EXT_DEBUG_REPORT_EXTENSION_NAME)) { CreateDebugReportCallbackEXT = (PFN_vkCreateDebugReportCallbackEXT)vkGetInstanceProcAddr(inst, "vkCreateDebugReportCallbackEXT"); DebugReportMessageEXT = (PFN_vkDebugReportMessageEXT)vkGetInstanceProcAddr(inst, "vkDebugReportMessageEXT"); DestroyDebugReportCallbackEXT = (PFN_vkDestroyDebugReportCallbackEXT)vkGetInstanceProcAddr(inst, "vkDestroyDebugReportCallbackEXT"); @@ -1004,12 +1238,6 @@ Error VulkanContext::_create_physical_device(VkSurfaceKHR p_surface) { free(physical_devices); - // Look for device extensions. - uint32_t device_extension_count = 0; - VkBool32 swapchainExtFound = 0; - enabled_extension_count = 0; - memset(extension_names, 0, sizeof(extension_names)); - // Get identifier properties. vkGetPhysicalDeviceProperties(gpu, &gpu_props); @@ -1035,83 +1263,13 @@ Error VulkanContext::_create_physical_device(VkSurfaceKHR p_surface) { device_api_version = gpu_props.apiVersion; - err = vkEnumerateDeviceExtensionProperties(gpu, nullptr, &device_extension_count, nullptr); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - - if (device_extension_count > 0) { - VkExtensionProperties *device_extensions = (VkExtensionProperties *)malloc(sizeof(VkExtensionProperties) * device_extension_count); - err = vkEnumerateDeviceExtensionProperties(gpu, nullptr, &device_extension_count, device_extensions); - if (err) { - free(device_extensions); - ERR_FAIL_V(ERR_CANT_CREATE); - } - - for (uint32_t i = 0; i < device_extension_count; i++) { - if (!strcmp(VK_KHR_SWAPCHAIN_EXTENSION_NAME, device_extensions[i].extensionName)) { - swapchainExtFound = 1; - extension_names[enabled_extension_count++] = VK_KHR_SWAPCHAIN_EXTENSION_NAME; - } - if (!strcmp(VK_KHR_MULTIVIEW_EXTENSION_NAME, device_extensions[i].extensionName)) { - // If multiview is supported, enable it. - extension_names[enabled_extension_count++] = VK_KHR_MULTIVIEW_EXTENSION_NAME; - } - if (!strcmp(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME, device_extensions[i].extensionName)) { - // if shading rate image is supported, enable it - extension_names[enabled_extension_count++] = VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME; - } - if (!strcmp(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME, device_extensions[i].extensionName)) { - extension_names[enabled_extension_count++] = VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME; - } - if (enabled_extension_count >= MAX_EXTENSIONS) { - free(device_extensions); - ERR_FAIL_V_MSG(ERR_BUG, "Enabled extension count reaches MAX_EXTENSIONS, BUG"); - } - } - - if (VK_KHR_incremental_present_enabled) { - // Even though the user "enabled" the extension via the command - // line, we must make sure that it's enumerated for use with the - // device. Therefore, disable it here, and re-enable it again if - // enumerated. - VK_KHR_incremental_present_enabled = false; - for (uint32_t i = 0; i < device_extension_count; i++) { - if (!strcmp(VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME, device_extensions[i].extensionName)) { - extension_names[enabled_extension_count++] = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME; - VK_KHR_incremental_present_enabled = true; - } - if (enabled_extension_count >= MAX_EXTENSIONS) { - free(device_extensions); - ERR_FAIL_V_MSG(ERR_BUG, "Enabled extension count reaches MAX_EXTENSIONS, BUG"); - } - } - } - - if (VK_GOOGLE_display_timing_enabled) { - // Even though the user "enabled" the extension via the command - // line, we must make sure that it's enumerated for use with the - // device. Therefore, disable it here, and re-enable it again if - // enumerated. - VK_GOOGLE_display_timing_enabled = false; - for (uint32_t i = 0; i < device_extension_count; i++) { - if (!strcmp(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME, device_extensions[i].extensionName)) { - extension_names[enabled_extension_count++] = VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME; - VK_GOOGLE_display_timing_enabled = true; - } - if (enabled_extension_count >= MAX_EXTENSIONS) { - free(device_extensions); - ERR_FAIL_V_MSG(ERR_BUG, "Enabled extension count reaches MAX_EXTENSIONS, BUG"); - } - } + { + Error _err = _initialize_device_extensions(); + if (_err != OK) { + return _err; } - - free(device_extensions); } - ERR_FAIL_COND_V_MSG(!swapchainExtFound, ERR_CANT_CREATE, - "vkEnumerateDeviceExtensionProperties failed to find the " VK_KHR_SWAPCHAIN_EXTENSION_NAME - " extension.\n\nDo you have a compatible Vulkan installable client driver (ICD) installed?\n" - "vkCreateInstance Failure"); - // Call with nullptr data to get count. vkGetPhysicalDeviceQueueFamilyProperties(gpu, &queue_family_count, nullptr); ERR_FAIL_COND_V(queue_family_count == 0, ERR_CANT_CREATE); @@ -1172,7 +1330,7 @@ Error VulkanContext::_create_device() { }; nextptr = &shader_features; - VkPhysicalDeviceFragmentShadingRateFeaturesKHR vrs_features; + VkPhysicalDeviceFragmentShadingRateFeaturesKHR vrs_features = {}; if (vrs_capabilities.pipeline_vrs_supported || vrs_capabilities.primitive_vrs_supported || vrs_capabilities.attachment_vrs_supported) { // Insert into our chain to enable these features if they are available. vrs_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR; @@ -1184,9 +1342,9 @@ Error VulkanContext::_create_device() { nextptr = &vrs_features; } - VkPhysicalDeviceVulkan11Features vulkan11features; - VkPhysicalDevice16BitStorageFeaturesKHR storage_feature; - VkPhysicalDeviceMultiviewFeatures multiview_features; + VkPhysicalDeviceVulkan11Features vulkan11features = {}; + VkPhysicalDevice16BitStorageFeaturesKHR storage_feature = {}; + VkPhysicalDeviceMultiviewFeatures multiview_features = {}; if (vulkan_major > 1 || vulkan_minor >= 2) { // In Vulkan 1.2 and newer we use a newer struct to enable various features. @@ -1206,7 +1364,7 @@ Error VulkanContext::_create_device() { vulkan11features.shaderDrawParameters = 0; nextptr = &vulkan11features; } else { - // On Vulkan 1.0 and 1.1 we use our older structs to initialise these features. + // On Vulkan 1.0 and 1.1 we use our older structs to initialize these features. storage_feature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR; storage_feature.pNext = nextptr; storage_feature.storageBuffer16BitAccess = storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported; @@ -1225,6 +1383,13 @@ Error VulkanContext::_create_device() { } } + uint32_t enabled_extension_count = 0; + const char *enabled_extension_names[MAX_EXTENSIONS]; + ERR_FAIL_COND_V(enabled_device_extension_names.size() > MAX_EXTENSIONS, ERR_CANT_CREATE); + for (const CharString &extension_name : enabled_device_extension_names) { + enabled_extension_names[enabled_extension_count++] = extension_name.ptr(); + } + VkDeviceCreateInfo sdevice = { /*sType*/ VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, /*pNext*/ nextptr, @@ -1234,7 +1399,7 @@ Error VulkanContext::_create_device() { /*enabledLayerCount*/ 0, /*ppEnabledLayerNames*/ nullptr, /*enabledExtensionCount*/ enabled_extension_count, - /*ppEnabledExtensionNames*/ (const char *const *)extension_names, + /*ppEnabledExtensionNames*/ (const char *const *)enabled_extension_names, /*pEnabledFeatures*/ &physical_device_features, // If specific features are required, pass them in here. }; if (separate_present_queue) { @@ -1322,7 +1487,7 @@ Error VulkanContext::_initialize_queues(VkSurfaceKHR p_surface) { GET_DEVICE_PROC_ADDR(device, GetSwapchainImagesKHR); GET_DEVICE_PROC_ADDR(device, AcquireNextImageKHR); GET_DEVICE_PROC_ADDR(device, QueuePresentKHR); - if (VK_GOOGLE_display_timing_enabled) { + if (is_device_extension_enabled(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME)) { GET_DEVICE_PROC_ADDR(device, GetRefreshCycleDurationGOOGLE); GET_DEVICE_PROC_ADDR(device, GetPastPresentationTimingGOOGLE); } @@ -1353,7 +1518,7 @@ Error VulkanContext::_initialize_queues(VkSurfaceKHR p_surface) { color_space = surfFormats[0].colorSpace; } else { // These should be ordered with the ones we want to use on top and fallback modes further down - // we want a 32bit RGBA unsigned normalised buffer or similar. + // we want a 32bit RGBA unsigned normalized buffer or similar. const VkFormat allowed_formats[] = { VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_R8G8B8A8_UNORM @@ -1702,18 +1867,22 @@ Error VulkanContext::_update_swap_chain(Window *window) { preTransform = surfCapabilities.currentTransform; } - // Find a supported composite alpha mode - one of these is guaranteed to be set. VkCompositeAlphaFlagBitsKHR compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; - VkCompositeAlphaFlagBitsKHR compositeAlphaFlags[4] = { - VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR, - VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR, - VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR, - VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, - }; - for (uint32_t i = 0; i < ARRAY_SIZE(compositeAlphaFlags); i++) { - if (surfCapabilities.supportedCompositeAlpha & compositeAlphaFlags[i]) { - compositeAlpha = compositeAlphaFlags[i]; - break; + + if (OS::get_singleton()->is_layered_allowed() || !(surfCapabilities.supportedCompositeAlpha & compositeAlpha)) { + // Find a supported composite alpha mode - one of these is guaranteed to be set. + VkCompositeAlphaFlagBitsKHR compositeAlphaFlags[4] = { + VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR, + VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR, + VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR, + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, + }; + + for (uint32_t i = 0; i < ARRAY_SIZE(compositeAlphaFlags); i++) { + if (surfCapabilities.supportedCompositeAlpha & compositeAlphaFlags[i]) { + compositeAlpha = compositeAlphaFlags[i]; + break; + } } } @@ -2073,7 +2242,7 @@ Error VulkanContext::swap_buffers() { VkResult err; #if 0 - if (VK_GOOGLE_display_timing_enabled) { + if (is_device_extension_enabled(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME)) { // Look at what happened to previous presents, and make appropriate // adjustments in timing. DemoUpdateTargetIPD(demo); @@ -2194,7 +2363,7 @@ Error VulkanContext::swap_buffers() { } #if 0 - if (VK_KHR_incremental_present_enabled) { + if (is_device_extension_enabled(VK_KHR_incremental_present_enabled)) { // If using VK_KHR_incremental_present, we provide a hint of the region // that contains changed content relative to the previously-presented // image. The implementation can use this hint in order to save @@ -2225,7 +2394,7 @@ Error VulkanContext::swap_buffers() { #endif #if 0 - if (VK_GOOGLE_display_timing_enabled) { + if (is_device_extension_enabled(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME)) { VkPresentTimeGOOGLE ptime; if (prev_desired_present_time == 0) { // This must be the first present for this swapchain. @@ -2255,7 +2424,7 @@ Error VulkanContext::swap_buffers() { /*swapchainCount*/ present.swapchainCount, /*pTimes*/ &ptime, }; - if (VK_GOOGLE_display_timing_enabled) { + if (is_device_extension_enabled(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME)) { present.pNext = &present_time; } } @@ -2328,6 +2497,13 @@ RID VulkanContext::local_device_create() { queues[0].pQueuePriorities = queue_priorities; queues[0].flags = 0; + uint32_t enabled_extension_count = 0; + const char *enabled_extension_names[MAX_EXTENSIONS]; + ERR_FAIL_COND_V(enabled_device_extension_names.size() > MAX_EXTENSIONS, RID()); + for (const CharString &extension_name : enabled_device_extension_names) { + enabled_extension_names[enabled_extension_count++] = extension_name.ptr(); + } + VkDeviceCreateInfo sdevice = { /*sType =*/VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, /*pNext */ nullptr, @@ -2337,7 +2513,7 @@ RID VulkanContext::local_device_create() { /*enabledLayerCount */ 0, /*ppEnabledLayerNames */ nullptr, /*enabledExtensionCount */ enabled_extension_count, - /*ppEnabledExtensionNames */ (const char *const *)extension_names, + /*ppEnabledExtensionNames */ (const char *const *)enabled_extension_names, /*pEnabledFeatures */ &physical_device_features, // If specific features are required, pass them in here. }; err = vkCreateDevice(gpu, &sdevice, nullptr, &ld.device); @@ -2402,7 +2578,7 @@ void VulkanContext::local_device_free(RID p_local_device) { } void VulkanContext::command_begin_label(VkCommandBuffer p_command_buffer, String p_label_name, const Color p_color) { - if (!enabled_debug_utils) { + if (!is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { return; } @@ -2419,7 +2595,7 @@ void VulkanContext::command_begin_label(VkCommandBuffer p_command_buffer, String } void VulkanContext::command_insert_label(VkCommandBuffer p_command_buffer, String p_label_name, const Color p_color) { - if (!enabled_debug_utils) { + if (!is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { return; } CharString cs = p_label_name.utf8(); @@ -2435,14 +2611,14 @@ void VulkanContext::command_insert_label(VkCommandBuffer p_command_buffer, Strin } void VulkanContext::command_end_label(VkCommandBuffer p_command_buffer) { - if (!enabled_debug_utils) { + if (!is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { return; } CmdEndDebugUtilsLabelEXT(p_command_buffer); } void VulkanContext::set_object_name(VkObjectType p_object_type, uint64_t p_object_handle, String p_object_name) { - if (!enabled_debug_utils) { + if (!is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { return; } CharString obj_data = p_object_name.utf8(); @@ -2503,7 +2679,7 @@ VulkanContext::~VulkanContext() { vkDestroySemaphore(device, image_ownership_semaphores[i], nullptr); } } - if (inst_initialized && enabled_debug_utils) { + if (inst_initialized && is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { DestroyDebugUtilsMessengerEXT(inst, dbg_messenger, nullptr); } if (inst_initialized && dbg_debug_report != VK_NULL_HANDLE) { diff --git a/drivers/vulkan/vulkan_context.h b/drivers/vulkan/vulkan_context.h index 7389e86ad7..0d49f5fe9f 100644 --- a/drivers/vulkan/vulkan_context.h +++ b/drivers/vulkan/vulkan_context.h @@ -34,6 +34,7 @@ #include "core/error/error_list.h" #include "core/os/mutex.h" #include "core/string/ustring.h" +#include "core/templates/hash_map.h" #include "core/templates/rb_map.h" #include "core/templates/rid_owner.h" #include "servers/display_server.h" @@ -76,6 +77,8 @@ public: Size2i min_texel_size; Size2i max_texel_size; + + Size2i texel_size; // The texel size we'll use }; struct ShaderCapabilities { @@ -182,18 +185,15 @@ private: int command_buffer_count = 1; // Extensions. + static bool instance_extensions_initialized; + static HashMap<CharString, bool> requested_instance_extensions; + HashSet<CharString> enabled_instance_extension_names; + static bool device_extensions_initialized; + static HashMap<CharString, bool> requested_device_extensions; + HashSet<CharString> enabled_device_extension_names; bool VK_KHR_incremental_present_enabled = true; bool VK_GOOGLE_display_timing_enabled = true; - uint32_t enabled_extension_count = 0; - const char *extension_names[MAX_EXTENSIONS]; - bool enabled_debug_utils = false; - - /** - * True if VK_EXT_debug_report extension is used. VK_EXT_debug_report is deprecated but it is - * still used if VK_EXT_debug_utils is not available. - */ - bool enabled_debug_report = false; PFN_vkCreateDebugUtilsMessengerEXT CreateDebugUtilsMessengerEXT = nullptr; PFN_vkDestroyDebugUtilsMessengerEXT DestroyDebugUtilsMessengerEXT = nullptr; @@ -222,7 +222,8 @@ private: VkDebugReportCallbackEXT dbg_debug_report = VK_NULL_HANDLE; Error _obtain_vulkan_version(); - Error _initialize_extensions(); + Error _initialize_instance_extensions(); + Error _initialize_device_extensions(); Error _check_capabilities(); VkBool32 _check_layers(uint32_t check_count, const char *const *check_names, uint32_t layer_count, VkLayerProperties *layers); @@ -257,6 +258,8 @@ private: Error _create_swap_chain(); Error _create_semaphores(); + Vector<VkAttachmentReference> _convert_VkAttachmentReference2(uint32_t p_count, const VkAttachmentReference2 *p_refs); + protected: virtual const char *_get_platform_surface_extension() const = 0; @@ -270,6 +273,7 @@ protected: public: // Extension calls. + bool supports_renderpass2() const { return is_device_extension_enabled(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME); } VkResult vkCreateRenderPass2KHR(VkDevice p_device, const VkRenderPassCreateInfo2 *p_create_info, const VkAllocationCallbacks *p_allocator, VkRenderPass *p_render_pass); uint32_t get_vulkan_major() const { return vulkan_major; }; @@ -289,6 +293,16 @@ public: static void set_vulkan_hooks(VulkanHooks *p_vulkan_hooks) { vulkan_hooks = p_vulkan_hooks; }; + static void register_requested_instance_extension(const CharString &extension_name, bool p_required); + bool is_instance_extension_enabled(const CharString &extension_name) const { + return enabled_instance_extension_names.has(extension_name); + } + + static void register_requested_device_extension(const CharString &extension_name, bool p_required); + bool is_device_extension_enabled(const CharString &extension_name) const { + return enabled_device_extension_names.has(extension_name); + } + void window_resize(DisplayServer::WindowID p_window_id, int p_width, int p_height); int window_get_width(DisplayServer::WindowID p_window = 0); int window_get_height(DisplayServer::WindowID p_window = 0); diff --git a/drivers/wasapi/audio_driver_wasapi.cpp b/drivers/wasapi/audio_driver_wasapi.cpp index fb90b776cf..f196530e51 100644 --- a/drivers/wasapi/audio_driver_wasapi.cpp +++ b/drivers/wasapi/audio_driver_wasapi.cpp @@ -201,7 +201,7 @@ public: static CMMNotificationClient notif_client; -Error AudioDriverWASAPI::audio_device_init(AudioDeviceWASAPI *p_device, bool p_capture, bool reinit) { +Error AudioDriverWASAPI::audio_device_init(AudioDeviceWASAPI *p_device, bool p_capture, bool p_reinit, bool p_no_audio_client_3) { WAVEFORMATEX *pwfex; IMMDeviceEnumerator *enumerator = nullptr; IMMDevice *device = nullptr; @@ -267,7 +267,7 @@ Error AudioDriverWASAPI::audio_device_init(AudioDeviceWASAPI *p_device, bool p_c } } - if (reinit) { + if (p_reinit) { // In case we're trying to re-initialize the device prevent throwing this error on the console, // otherwise if there is currently no device available this will spam the console. if (hr != S_OK) { @@ -285,6 +285,11 @@ Error AudioDriverWASAPI::audio_device_init(AudioDeviceWASAPI *p_device, bool p_c } using_audio_client_3 = !p_capture; // IID_IAudioClient3 is only used for adjustable output latency (not input) + + if (p_no_audio_client_3) { + using_audio_client_3 = false; + } + if (using_audio_client_3) { hr = device->Activate(IID_IAudioClient3, CLSCTX_ALL, nullptr, (void **)&p_device->audio_client); if (hr != S_OK) { @@ -302,7 +307,7 @@ Error AudioDriverWASAPI::audio_device_init(AudioDeviceWASAPI *p_device, bool p_c SAFE_RELEASE(device) - if (reinit) { + if (p_reinit) { if (hr != S_OK) { return ERR_CANT_OPEN; } @@ -413,7 +418,12 @@ Error AudioDriverWASAPI::audio_device_init(AudioDeviceWASAPI *p_device, bool p_c &fundamental_period_frames, &min_period_frames, &max_period_frames); - ERR_FAIL_COND_V_MSG(hr != S_OK, ERR_CANT_OPEN, "WASAPI: GetSharedModeEnginePeriod failed with error 0x" + String::num_uint64(hr, 16) + "."); + if (hr != S_OK) { + print_verbose("WASAPI: GetSharedModeEnginePeriod failed with error 0x" + String::num_uint64(hr, 16) + ", falling back to IAudioClient."); + CoTaskMemFree(pwfex); + SAFE_RELEASE(device) + return audio_device_init(p_device, p_capture, p_reinit, true); + } // Period frames must be an integral multiple of fundamental_period_frames or IAudioClient3 initialization will fail, // so we need to select the closest multiple to the user-specified latency. @@ -430,12 +440,25 @@ Error AudioDriverWASAPI::audio_device_init(AudioDeviceWASAPI *p_device, bool p_c buffer_frames = period_frames; hr = device_audio_client_3->InitializeSharedAudioStream(0, period_frames, pwfex, nullptr); - ERR_FAIL_COND_V_MSG(hr != S_OK, ERR_CANT_OPEN, "WASAPI: InitializeSharedAudioStream failed with error 0x" + String::num_uint64(hr, 16) + "."); - uint32_t output_latency_in_frames; - WAVEFORMATEX *current_pwfex; - device_audio_client_3->GetCurrentSharedModeEnginePeriod(¤t_pwfex, &output_latency_in_frames); - real_latency = (float)output_latency_in_frames / (float)current_pwfex->nSamplesPerSec; - CoTaskMemFree(current_pwfex); + if (hr != S_OK) { + print_verbose("WASAPI: InitializeSharedAudioStream failed with error 0x" + String::num_uint64(hr, 16) + ", falling back to IAudioClient."); + CoTaskMemFree(pwfex); + SAFE_RELEASE(device); + return audio_device_init(p_device, p_capture, p_reinit, true); + } else { + uint32_t output_latency_in_frames; + WAVEFORMATEX *current_pwfex; + hr = device_audio_client_3->GetCurrentSharedModeEnginePeriod(¤t_pwfex, &output_latency_in_frames); + if (hr == OK) { + real_latency = (float)output_latency_in_frames / (float)current_pwfex->nSamplesPerSec; + CoTaskMemFree(current_pwfex); + } else { + print_verbose("WASAPI: GetCurrentSharedModeEnginePeriod failed with error 0x" + String::num_uint64(hr, 16) + ", falling back to IAudioClient."); + CoTaskMemFree(pwfex); + SAFE_RELEASE(device); + return audio_device_init(p_device, p_capture, p_reinit, true); + } + } } if (p_capture) { @@ -452,8 +475,8 @@ Error AudioDriverWASAPI::audio_device_init(AudioDeviceWASAPI *p_device, bool p_c return OK; } -Error AudioDriverWASAPI::init_render_device(bool reinit) { - Error err = audio_device_init(&audio_output, false, reinit); +Error AudioDriverWASAPI::init_render_device(bool p_reinit) { + Error err = audio_device_init(&audio_output, false, p_reinit); if (err != OK) { return err; } @@ -484,8 +507,8 @@ Error AudioDriverWASAPI::init_render_device(bool reinit) { return OK; } -Error AudioDriverWASAPI::init_capture_device(bool reinit) { - Error err = audio_device_init(&audio_input, true, reinit); +Error AudioDriverWASAPI::init_capture_device(bool p_reinit) { + Error err = audio_device_init(&audio_input, true, p_reinit); if (err != OK) { return err; } diff --git a/drivers/wasapi/audio_driver_wasapi.h b/drivers/wasapi/audio_driver_wasapi.h index c30a54c042..fb6a55734d 100644 --- a/drivers/wasapi/audio_driver_wasapi.h +++ b/drivers/wasapi/audio_driver_wasapi.h @@ -83,13 +83,13 @@ class AudioDriverWASAPI : public AudioDriver { static _FORCE_INLINE_ int32_t read_sample(WORD format_tag, int bits_per_sample, BYTE *buffer, int i); static void thread_func(void *p_udata); - Error init_render_device(bool reinit = false); - Error init_capture_device(bool reinit = false); + Error init_render_device(bool p_reinit = false); + Error init_capture_device(bool p_reinit = false); Error finish_render_device(); Error finish_capture_device(); - Error audio_device_init(AudioDeviceWASAPI *p_device, bool p_capture, bool reinit); + Error audio_device_init(AudioDeviceWASAPI *p_device, bool p_capture, bool p_reinit, bool p_no_audio_client_3 = false); Error audio_device_finish(AudioDeviceWASAPI *p_device); PackedStringArray audio_device_get_list(bool p_capture); |