diff options
author | clayjohn <claynjohn@gmail.com> | 2022-09-20 14:21:31 -0700 |
---|---|---|
committer | clayjohn <claynjohn@gmail.com> | 2022-09-20 23:40:01 -0700 |
commit | 27a3014f5052ae40f89684a5559c17fbebe7aa8d (patch) | |
tree | 7bf20c7a8150f15e1732c020d05ba0654d3402fd /servers/rendering/renderer_rd | |
parent | 6f5704d86f95171ba8b6b2ac9f56e284c4d35d7a (diff) |
Emulate double precision for regular rendering operation.
We calculate the lost precision on the CPU and pass it into the GPU
so that it can calculate an error-corrected version of the vertex position
Diffstat (limited to 'servers/rendering/renderer_rd')
8 files changed, 205 insertions, 12 deletions
diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp index 2ad73960a0..6f0da7879e 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp @@ -711,6 +711,14 @@ void RenderForwardClustered::_fill_instance_data(RenderListType p_render_list, i instance_data.lightmap_uv_scale[2] = inst->lightmap_uv_scale.size.x; instance_data.lightmap_uv_scale[3] = inst->lightmap_uv_scale.size.y; +#ifdef REAL_T_IS_DOUBLE + // Split the origin into two components, the float approximation and the missing precision + // In the shader we will combine these back together to restore the lost precision. + RendererRD::MaterialStorage::split_double(inst->transform.origin.x, &instance_data.transform[12], &instance_data.transform[3]); + RendererRD::MaterialStorage::split_double(inst->transform.origin.y, &instance_data.transform[13], &instance_data.transform[7]); + RendererRD::MaterialStorage::split_double(inst->transform.origin.z, &instance_data.transform[14], &instance_data.transform[11]); +#endif + bool cant_repeat = instance_data.flags & INSTANCE_DATA_FLAG_MULTIMESH || inst->mesh_instance.is_valid(); if (prev_surface != nullptr && !cant_repeat && prev_surface->sort.sort_key1 == surface->sort.sort_key1 && prev_surface->sort.sort_key2 == surface->sort.sort_key2 && repeats < RenderElementInfo::MAX_REPEATS) { @@ -3036,6 +3044,11 @@ RenderForwardClustered::RenderForwardClustered() { { defines += "\n#define MATERIAL_UNIFORM_SET " + itos(MATERIAL_UNIFORM_SET) + "\n"; } +#ifdef REAL_T_IS_DOUBLE + { + defines += "\n#define USE_DOUBLE_PRECISION \n"; + } +#endif scene_shader.init(defines); } diff --git a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp index 75ccf1add5..1987577464 100644 --- a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp @@ -621,10 +621,10 @@ void SceneShaderForwardClustered::init(const String p_defines) { //shader compiler ShaderCompiler::DefaultIdentifierActions actions; - actions.renames["MODEL_MATRIX"] = "model_matrix"; + actions.renames["MODEL_MATRIX"] = "read_model_matrix"; actions.renames["MODEL_NORMAL_MATRIX"] = "model_normal_matrix"; actions.renames["VIEW_MATRIX"] = "scene_data.view_matrix"; - actions.renames["INV_VIEW_MATRIX"] = "scene_data.inv_view_matrix"; + actions.renames["INV_VIEW_MATRIX"] = "inv_view_matrix"; actions.renames["PROJECTION_MATRIX"] = "projection_matrix"; actions.renames["INV_PROJECTION_MATRIX"] = "inv_projection_matrix"; actions.renames["MODELVIEW_MATRIX"] = "modelview"; @@ -757,6 +757,8 @@ void SceneShaderForwardClustered::init(const String p_defines) { actions.usage_defines["RADIANCE"] = "#define CUSTOM_RADIANCE_USED\n"; actions.usage_defines["IRRADIANCE"] = "#define CUSTOM_IRRADIANCE_USED\n"; + actions.usage_defines["MODEL_MATRIX"] = "#define MODEL_MATRIX_USED\n"; + actions.render_mode_defines["skip_vertex_transform"] = "#define SKIP_TRANSFORM_USED\n"; actions.render_mode_defines["world_vertex_coords"] = "#define VERTEX_WORLD_COORDS_USED\n"; actions.render_mode_defines["ensure_correct_normals"] = "#define ENSURE_CORRECT_NORMALS\n"; diff --git a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp index bea31dc927..3a4c62b301 100644 --- a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp +++ b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp @@ -1757,6 +1757,14 @@ void RenderForwardMobile::_render_list_template(RenderingDevice::DrawListID p_dr RendererRD::MaterialStorage::store_transform(Transform3D(), push_constant.transform); } +#ifdef REAL_T_IS_DOUBLE + // Split the origin into two components, the float approximation and the missing precision + // In the shader we will combine these back together to restore the lost precision. + RendererRD::MaterialStorage::split_double(inst->transform.origin.x, &push_constant.transform[12], &push_constant.transform[3]); + RendererRD::MaterialStorage::split_double(inst->transform.origin.y, &push_constant.transform[13], &push_constant.transform[7]); + RendererRD::MaterialStorage::split_double(inst->transform.origin.z, &push_constant.transform[14], &push_constant.transform[11]); +#endif + push_constant.flags = inst->flags_cache; push_constant.gi_offset = inst->gi_offset_cache; push_constant.layer_mask = inst->layer_mask; @@ -2472,6 +2480,11 @@ RenderForwardMobile::RenderForwardMobile() { { defines += "\n#define MATERIAL_UNIFORM_SET " + itos(MATERIAL_UNIFORM_SET) + "\n"; } +#ifdef REAL_T_IS_DOUBLE + { + defines += "\n#define USE_DOUBLE_PRECISION \n"; + } +#endif scene_shader.init(defines); diff --git a/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp b/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp index 383ed9247d..691d431b82 100644 --- a/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp +++ b/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp @@ -521,10 +521,10 @@ void SceneShaderForwardMobile::init(const String p_defines) { //shader compiler ShaderCompiler::DefaultIdentifierActions actions; - actions.renames["MODEL_MATRIX"] = "model_matrix"; + actions.renames["MODEL_MATRIX"] = "read_model_matrix"; actions.renames["MODEL_NORMAL_MATRIX"] = "model_normal_matrix"; actions.renames["VIEW_MATRIX"] = "scene_data.view_matrix"; - actions.renames["INV_VIEW_MATRIX"] = "scene_data.inv_view_matrix"; + actions.renames["INV_VIEW_MATRIX"] = "inv_view_matrix"; actions.renames["PROJECTION_MATRIX"] = "projection_matrix"; actions.renames["INV_PROJECTION_MATRIX"] = "inv_projection_matrix"; actions.renames["MODELVIEW_MATRIX"] = "modelview"; @@ -657,6 +657,8 @@ void SceneShaderForwardMobile::init(const String p_defines) { actions.usage_defines["RADIANCE"] = "#define CUSTOM_RADIANCE_USED\n"; actions.usage_defines["IRRADIANCE"] = "#define CUSTOM_IRRADIANCE_USED\n"; + actions.usage_defines["MODEL_MATRIX"] = "#define MODEL_MATRIX_USED\n"; + actions.render_mode_defines["skip_vertex_transform"] = "#define SKIP_TRANSFORM_USED\n"; actions.render_mode_defines["world_vertex_coords"] = "#define VERTEX_WORLD_COORDS_USED\n"; actions.render_mode_defines["ensure_correct_normals"] = "#define ENSURE_CORRECT_NORMALS\n"; diff --git a/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl b/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl index d41474118d..adc4d9d01e 100644 --- a/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl +++ b/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl @@ -129,12 +129,52 @@ invariant gl_Position; #GLOBALS +#ifdef USE_DOUBLE_PRECISION +// Helper functions for emulating double precision when adding floats. +vec3 quick_two_sum(vec3 a, vec3 b, out vec3 out_p) { + vec3 s = a + b; + out_p = b - (s - a); + return s; +} + +vec3 two_sum(vec3 a, vec3 b, out vec3 out_p) { + vec3 s = a + b; + vec3 v = s - a; + out_p = (a - (s - v)) + (b - v); + return s; +} + +vec3 double_add_vec3(vec3 base_a, vec3 prec_a, vec3 base_b, vec3 prec_b, out vec3 out_precision) { + vec3 s, t, se, te; + s = two_sum(base_a, base_b, se); + t = two_sum(prec_a, prec_b, te); + se += t; + s = quick_two_sum(s, se, se); + se += te; + s = quick_two_sum(s, se, out_precision); + return s; +} +#endif + void vertex_shader(in uint instance_index, in bool is_multimesh, in uint multimesh_offset, in SceneData scene_data, in mat4 model_matrix, out vec4 screen_pos) { vec4 instance_custom = vec4(0.0); #if defined(COLOR_USED) color_interp = color_attrib; #endif + mat4 inv_view_matrix = scene_data.inv_view_matrix; + +#ifdef USE_DOUBLE_PRECISION + vec3 model_precision = vec3(model_matrix[0][3], model_matrix[1][3], model_matrix[2][3]); + model_matrix[0][3] = 0.0; + model_matrix[1][3] = 0.0; + model_matrix[2][3] = 0.0; + vec3 view_precision = vec3(inv_view_matrix[0][3], inv_view_matrix[1][3], inv_view_matrix[2][3]); + inv_view_matrix[0][3] = 0.0; + inv_view_matrix[1][3] = 0.0; + inv_view_matrix[2][3] = 0.0; +#endif + mat3 model_normal_matrix; if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) { model_normal_matrix = transpose(inverse(mat3(model_matrix))); @@ -142,11 +182,12 @@ void vertex_shader(in uint instance_index, in bool is_multimesh, in uint multime model_normal_matrix = mat3(model_matrix); } + mat4 matrix; + mat4 read_model_matrix = model_matrix; + if (is_multimesh) { //multimesh, instances are for it - mat4 matrix; - #ifdef USE_PARTICLE_TRAILS uint trail_size = (instances.data[instance_index].flags >> INSTANCE_FLAGS_PARTICLE_TRAIL_SHIFT) & INSTANCE_FLAGS_PARTICLE_TRAIL_MASK; uint stride = 3 + 1 + 1; //particles always uses this format @@ -232,7 +273,14 @@ void vertex_shader(in uint instance_index, in bool is_multimesh, in uint multime #endif //transpose matrix = transpose(matrix); - model_matrix = model_matrix * matrix; +#if !defined(USE_DOUBLE_PRECISION) || defined(SKIP_TRANSFORM_USED) || defined(VERTEX_WORLD_COORDS_USED) || defined(MODEL_MATRIX_USED) + // Normally we can bake the multimesh transform into the model matrix, but when using double precision + // we avoid baking it in so we can emulate high precision. + read_model_matrix = model_matrix * matrix; +#if !defined(USE_DOUBLE_PRECISION) || defined(SKIP_TRANSFORM_USED) || defined(VERTEX_WORLD_COORDS_USED) + model_matrix = read_model_matrix; +#endif // !defined(USE_DOUBLE_PRECISION) || defined(SKIP_TRANSFORM_USED) || defined(VERTEX_WORLD_COORDS_USED) +#endif // !defined(USE_DOUBLE_PRECISION) || defined(SKIP_TRANSFORM_USED) || defined(VERTEX_WORLD_COORDS_USED) || defined(MODEL_MATRIX_USED) model_normal_matrix = model_normal_matrix * mat3(matrix); } @@ -297,7 +345,22 @@ void vertex_shader(in uint instance_index, in bool is_multimesh, in uint multime // using local coordinates (default) #if !defined(SKIP_TRANSFORM_USED) && !defined(VERTEX_WORLD_COORDS_USED) +#ifdef USE_DOUBLE_PRECISION + // We separate the basis from the origin becasue the basis is fine with single point precision. + // Then we combine the translations from the model matrix and the view matrix using emulated doubles. + // We add the result to the vertex and ignore the final lost precision. + vec3 model_origin = model_matrix[3].xyz; + if (is_multimesh) { + vertex = mat3(matrix) * vertex; + model_origin = double_add_vec3(model_origin, model_precision, matrix[3].xyz, vec3(0.0), model_precision); + } + vertex = mat3(model_matrix) * vertex; + vec3 temp_precision; // Will be ignored. + vertex += double_add_vec3(model_origin, model_precision, scene_data.inv_view_matrix[3].xyz, view_precision, temp_precision); + vertex = mat3(scene_data.view_matrix) * vertex; +#else vertex = (modelview * vec4(vertex, 1.0)).xyz; +#endif #ifdef NORMAL_USED normal = modelview_normal * normal; #endif @@ -490,7 +553,6 @@ layout(location = 10) in flat uint instance_index_interp; //defines to keep compatibility with vertex -#define model_matrix instances.data[draw_call.instance_index].transform #ifdef USE_MULTIVIEW #define projection_matrix scene_data.projection_matrix_view[ViewIndex] #define inv_projection_matrix scene_data.inv_projection_matrix_view[ViewIndex] @@ -737,6 +799,17 @@ void fragment_shader(in SceneData scene_data) { vec2 alpha_texture_coordinate = vec2(0.0, 0.0); #endif // ALPHA_ANTIALIASING_EDGE_USED + mat4 inv_view_matrix = scene_data.inv_view_matrix; + mat4 read_model_matrix = instances.data[instance_index].transform; +#ifdef USE_DOUBLE_PRECISION + read_model_matrix[0][3] = 0.0; + read_model_matrix[1][3] = 0.0; + read_model_matrix[2][3] = 0.0; + inv_view_matrix[0][3] = 0.0; + inv_view_matrix[1][3] = 0.0; + inv_view_matrix[2][3] = 0.0; +#endif + { #CODE : FRAGMENT } diff --git a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl index a109fd4d75..4e5c5b6c5b 100644 --- a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl +++ b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl @@ -123,6 +123,33 @@ invariant gl_Position; #define scene_data scene_data_block.data +#ifdef USE_DOUBLE_PRECISION +// Helper functions for emulating double precision when adding floats. +vec3 quick_two_sum(vec3 a, vec3 b, out vec3 out_p) { + vec3 s = a + b; + out_p = b - (s - a); + return s; +} + +vec3 two_sum(vec3 a, vec3 b, out vec3 out_p) { + vec3 s = a + b; + vec3 v = s - a; + out_p = (a - (s - v)) + (b - v); + return s; +} + +vec3 double_add_vec3(vec3 base_a, vec3 prec_a, vec3 base_b, vec3 prec_b, out vec3 out_precision) { + vec3 s, t, se, te; + s = two_sum(base_a, base_b, se); + t = two_sum(prec_a, prec_b, te); + se += t; + s = quick_two_sum(s, se, se); + se += te; + s = quick_two_sum(s, se, out_precision); + return s; +} +#endif + void main() { vec4 instance_custom = vec4(0.0); #if defined(COLOR_USED) @@ -132,6 +159,17 @@ void main() { bool is_multimesh = bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH); mat4 model_matrix = draw_call.transform; + mat4 inv_view_matrix = scene_data.inv_view_matrix; +#ifdef USE_DOUBLE_PRECISION + vec3 model_precision = vec3(model_matrix[0][3], model_matrix[1][3], model_matrix[2][3]); + model_matrix[0][3] = 0.0; + model_matrix[1][3] = 0.0; + model_matrix[2][3] = 0.0; + vec3 view_precision = vec3(inv_view_matrix[0][3], inv_view_matrix[1][3], inv_view_matrix[2][3]); + inv_view_matrix[0][3] = 0.0; + inv_view_matrix[1][3] = 0.0; + inv_view_matrix[2][3] = 0.0; +#endif mat3 model_normal_matrix; if (bool(draw_call.flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) { @@ -140,11 +178,12 @@ void main() { model_normal_matrix = mat3(model_matrix); } + mat4 matrix; + mat4 read_model_matrix = model_matrix; + if (is_multimesh) { //multimesh, instances are for it - mat4 matrix; - #ifdef USE_PARTICLE_TRAILS uint trail_size = (draw_call.flags >> INSTANCE_FLAGS_PARTICLE_TRAIL_SHIFT) & INSTANCE_FLAGS_PARTICLE_TRAIL_MASK; uint stride = 3 + 1 + 1; //particles always uses this format @@ -230,7 +269,15 @@ void main() { #endif //transpose matrix = transpose(matrix); - model_matrix = model_matrix * matrix; + +#if !defined(USE_DOUBLE_PRECISION) || defined(SKIP_TRANSFORM_USED) || defined(VERTEX_WORLD_COORDS_USED) || defined(MODEL_MATRIX_USED) + // Normally we can bake the multimesh transform into the model matrix, but when using double precision + // we avoid baking it in so we can emulate high precision. + read_model_matrix = model_matrix * matrix; +#if !defined(USE_DOUBLE_PRECISION) || defined(SKIP_TRANSFORM_USED) || defined(VERTEX_WORLD_COORDS_USED) + model_matrix = read_model_matrix; +#endif // !defined(USE_DOUBLE_PRECISION) || defined(SKIP_TRANSFORM_USED) || defined(VERTEX_WORLD_COORDS_USED) +#endif // !defined(USE_DOUBLE_PRECISION) || defined(SKIP_TRANSFORM_USED) || defined(VERTEX_WORLD_COORDS_USED) || defined(MODEL_MATRIX_USED) model_normal_matrix = model_normal_matrix * mat3(matrix); } @@ -297,7 +344,22 @@ void main() { // using local coordinates (default) #if !defined(SKIP_TRANSFORM_USED) && !defined(VERTEX_WORLD_COORDS_USED) +#ifdef USE_DOUBLE_PRECISION + // We separate the basis from the origin becasue the basis is fine with single point precision. + // Then we combine the translations from the model matrix and the view matrix using emulated doubles. + // We add the result to the vertex and ignore the final lost precision. + vec3 model_origin = model_matrix[3].xyz; + if (is_multimesh) { + vertex = mat3(matrix) * vertex; + model_origin = double_add_vec3(model_origin, model_precision, matrix[3].xyz, vec3(0.0), model_precision); + } + vertex = mat3(model_matrix) * vertex; + vec3 temp_precision; + vertex += double_add_vec3(model_origin, model_precision, scene_data.inv_view_matrix[3].xyz, view_precision, temp_precision); + vertex = mat3(scene_data.view_matrix) * vertex; +#else vertex = (modelview * vec4(vertex, 1.0)).xyz; +#endif #ifdef NORMAL_USED normal = modelview_normal * normal; #endif @@ -468,7 +530,6 @@ layout(location = 9) highp in float dp_clip; //defines to keep compatibility with vertex -#define model_matrix draw_call.transform #ifdef USE_MULTIVIEW #define projection_matrix scene_data.projection_matrix_view[ViewIndex] #define inv_projection_matrix scene_data.inv_projection_matrix_view[ViewIndex] @@ -685,6 +746,17 @@ void main() { vec2 alpha_texture_coordinate = vec2(0.0, 0.0); #endif // ALPHA_ANTIALIASING_EDGE_USED + mat4 inv_view_matrix = scene_data.inv_view_matrix; + mat4 read_model_matrix = draw_call.transform; +#ifdef USE_DOUBLE_PRECISION + read_model_matrix[0][3] = 0.0; + read_model_matrix[1][3] = 0.0; + read_model_matrix[2][3] = 0.0; + inv_view_matrix[0][3] = 0.0; + inv_view_matrix[1][3] = 0.0; + inv_view_matrix[2][3] = 0.0; +#endif + { #CODE : FRAGMENT } diff --git a/servers/rendering/renderer_rd/storage_rd/material_storage.h b/servers/rendering/renderer_rd/storage_rd/material_storage.h index db2e4cfa2a..2ce6550cc1 100644 --- a/servers/rendering/renderer_rd/storage_rd/material_storage.h +++ b/servers/rendering/renderer_rd/storage_rd/material_storage.h @@ -311,6 +311,18 @@ public: } } + // http://andrewthall.org/papers/df64_qf128.pdf +#ifdef REAL_T_IS_DOUBLE + static _FORCE_INLINE_ void split_double(double a, float *ahi, float *alo) { + const double SPLITTER = (1 << 29) + 1; + double t = a * SPLITTER; + double thi = t - (t - a); + double tlo = a - thi; + *ahi = (float)thi; + *alo = (float)tlo; + } +#endif + /* Samplers */ _FORCE_INLINE_ RID sampler_rd_get_default(RS::CanvasItemTextureFilter p_filter, RS::CanvasItemTextureRepeat p_repeat) { diff --git a/servers/rendering/renderer_rd/storage_rd/render_scene_data_rd.cpp b/servers/rendering/renderer_rd/storage_rd/render_scene_data_rd.cpp index f925f87cbe..7dd790d1da 100644 --- a/servers/rendering/renderer_rd/storage_rd/render_scene_data_rd.cpp +++ b/servers/rendering/renderer_rd/storage_rd/render_scene_data_rd.cpp @@ -59,6 +59,12 @@ void RenderSceneDataRD::update_ubo(RID p_uniform_buffer, RS::ViewportDebugDraw p RendererRD::MaterialStorage::store_transform(cam_transform, ubo.inv_view_matrix); RendererRD::MaterialStorage::store_transform(cam_transform.affine_inverse(), ubo.view_matrix); +#ifdef REAL_T_IS_DOUBLE + RendererRD::MaterialStorage::split_double(-cam_transform.origin.x, &ubo.inv_view_matrix[12], &ubo.inv_view_matrix[3]); + RendererRD::MaterialStorage::split_double(-cam_transform.origin.y, &ubo.inv_view_matrix[13], &ubo.inv_view_matrix[7]); + RendererRD::MaterialStorage::split_double(-cam_transform.origin.z, &ubo.inv_view_matrix[14], &ubo.inv_view_matrix[11]); +#endif + for (uint32_t v = 0; v < view_count; v++) { projection = correction * view_projection[v]; RendererRD::MaterialStorage::store_camera(projection, ubo.projection_matrix_view[v]); |