diff options
79 files changed, 2871 insertions, 1427 deletions
diff --git a/SConstruct b/SConstruct index 3795fc1c3c..065019d591 100644 --- a/SConstruct +++ b/SConstruct @@ -105,15 +105,14 @@ if profile: opts = Variables(customs, ARGUMENTS) # Target build options -opts.Add("arch", "Platform-dependent architecture (arm/arm64/x86/x64/mips/...)", "") -opts.Add(EnumVariable("bits", "Target platform bits", "default", ("default", "32", "64"))) opts.Add("p", "Platform (alias for 'platform')", "") opts.Add("platform", "Target platform (%s)" % ("|".join(platform_list),), "") +opts.Add(BoolVariable("tools", "Build the tools (a.k.a. the Godot editor)", True)) opts.Add(EnumVariable("target", "Compilation target", "debug", ("debug", "release_debug", "release"))) +opts.Add("arch", "Platform-dependent architecture (arm/arm64/x86/x64/mips/...)", "") +opts.Add(EnumVariable("bits", "Target platform bits", "default", ("default", "32", "64"))) opts.Add(EnumVariable("optimize", "Optimization type", "speed", ("speed", "size"))) - -opts.Add(BoolVariable("tools", "Build the tools (a.k.a. the Godot editor)", True)) -opts.Add(BoolVariable("tests", "Build the unit tests", False)) +opts.Add(BoolVariable("production", "Set defaults to build Godot for use in production", False)) opts.Add(BoolVariable("use_lto", "Use link-time optimization", False)) # Components @@ -123,11 +122,12 @@ opts.Add(BoolVariable("xaudio2", "Enable the XAudio2 audio driver", False)) opts.Add("custom_modules", "A list of comma-separated directory paths containing custom modules to build.", "") # Advanced options -opts.Add(BoolVariable("verbose", "Enable verbose output for the compilation", False)) +opts.Add(BoolVariable("dev", "If yes, alias for verbose=yes warnings=extra werror=yes", False)) opts.Add(BoolVariable("progress", "Show a progress indicator during compilation", True)) +opts.Add(BoolVariable("tests", "Build the unit tests", False)) +opts.Add(BoolVariable("verbose", "Enable verbose output for the compilation", False)) opts.Add(EnumVariable("warnings", "Level of compilation warnings", "all", ("extra", "all", "moderate", "no"))) opts.Add(BoolVariable("werror", "Treat compiler warnings as errors", False)) -opts.Add(BoolVariable("dev", "If yes, alias for verbose=yes warnings=extra werror=yes", False)) opts.Add("extra_suffix", "Custom extra suffix added to the base filename of all generated binary files", "") opts.Add(BoolVariable("vsproj", "Generate a Visual Studio solution", False)) opts.Add(BoolVariable("disable_3d", "Disable 3D nodes for a smaller executable", False)) @@ -317,12 +317,34 @@ if selected_platform in platform_list: env.Tool("compilation_db") env.Alias("compiledb", env.CompilationDatabase()) + # 'dev' and 'production' are aliases to set default options if they haven't been set + # manually by the user. We use `ARGUMENTS.get()` to check if they were manually set. if env["dev"]: - env["verbose"] = True - env["warnings"] = "extra" - env["werror"] = True + env["verbose"] = ARGUMENTS.get("verbose", True) + env["warnings"] = ARGUMENTS.get("warnings", "extra") + env["werror"] = ARGUMENTS.get("werror", True) if env["tools"]: - env["tests"] = True + env["tests"] = ARGUMENTS.get("tests", True) + if env["production"]: + env["use_static_cpp"] = ARGUMENTS.get("use_static_cpp", True) + env["use_lto"] = ARGUMENTS.get("use_lto", True) + env["debug_symbols"] = ARGUMENTS.get("debug_symbols", False) + if not env["tools"] and env["target"] == "debug": + print( + "WARNING: Requested `production` build with `tools=no target=debug`, " + "this will give you a full debug template (use `target=release_debug` " + "for an optimized template with debug features)." + ) + if env.msvc: + print( + "WARNING: For `production` Windows builds, you should use MinGW with GCC " + "or Clang instead of Visual Studio, as they can better optimize the " + "GDScript VM in a very significant way. MSVC LTO also doesn't work " + "reliably for our use case." + "If you want to use MSVC nevertheless for production builds, set " + "`debug_symbols=no use_lto=no` instead of the `production=yes` option." + ) + Exit(255) env.extra_suffix = "" diff --git a/core/math/audio_frame.h b/core/math/audio_frame.h index 5773da9211..a5616b8d79 100644 --- a/core/math/audio_frame.h +++ b/core/math/audio_frame.h @@ -47,6 +47,9 @@ static inline float undenormalise(volatile float f) { return (v.i & 0x7f800000) < 0x08000000 ? 0.0f : f; } +static const float AUDIO_PEAK_OFFSET = 0.0000000001f; +static const float AUDIO_MIN_PEAK_DB = -200.0f; // linear2db(AUDIO_PEAK_OFFSET) + struct AudioFrame { //left and right samples float l, r; diff --git a/core/variant/array.cpp b/core/variant/array.cpp index 48916f941e..9a2922a777 100644 --- a/core/variant/array.cpp +++ b/core/variant/array.cpp @@ -35,6 +35,7 @@ #include "core/object/script_language.h" #include "core/templates/hashfuncs.h" #include "core/templates/vector.h" +#include "core/variant/callable.h" #include "core/variant/variant.h" class ArrayPrivate { @@ -371,25 +372,22 @@ void Array::sort() { } struct _ArrayVariantSortCustom { - Object *obj = nullptr; - StringName func; + Callable func; _FORCE_INLINE_ bool operator()(const Variant &p_l, const Variant &p_r) const { const Variant *args[2] = { &p_l, &p_r }; Callable::CallError err; - bool res = obj->call(func, args, 2, err); - if (err.error != Callable::CallError::CALL_OK) { - res = false; - } + Variant res; + func.call(args, 2, res, err); + ERR_FAIL_COND_V_MSG(err.error != Callable::CallError::CALL_OK, false, + "Error calling sorting method: " + Variant::get_callable_error_text(func, args, 1, err)); return res; } }; -void Array::sort_custom(Object *p_obj, const StringName &p_function) { - ERR_FAIL_NULL(p_obj); +void Array::sort_custom(Callable p_callable) { SortArray<Variant, _ArrayVariantSortCustom, true> avs; - avs.compare.obj = p_obj; - avs.compare.func = p_function; + avs.compare.func = p_callable; avs.sort(_p->array.ptrw(), _p->array.size()); } @@ -438,13 +436,11 @@ int Array::bsearch(const Variant &p_value, bool p_before) { return bisect(_p->array, p_value, p_before, _ArrayVariantSort()); } -int Array::bsearch_custom(const Variant &p_value, Object *p_obj, const StringName &p_function, bool p_before) { +int Array::bsearch_custom(const Variant &p_value, Callable p_callable, bool p_before) { ERR_FAIL_COND_V(!_p->typed.validate(p_value, "custom binary search"), -1); - ERR_FAIL_NULL_V(p_obj, 0); _ArrayVariantSortCustom less; - less.obj = p_obj; - less.func = p_function; + less.func = p_callable; return bisect(_p->array, p_value, p_before, less); } diff --git a/core/variant/array.h b/core/variant/array.h index 26de5477b4..d8f2402330 100644 --- a/core/variant/array.h +++ b/core/variant/array.h @@ -37,6 +37,7 @@ class Variant; class ArrayPrivate; class Object; class StringName; +class Callable; class Array { mutable ArrayPrivate *_p; @@ -78,10 +79,10 @@ public: Variant back() const; void sort(); - void sort_custom(Object *p_obj, const StringName &p_function); + void sort_custom(Callable p_callable); void shuffle(); int bsearch(const Variant &p_value, bool p_before = true); - int bsearch_custom(const Variant &p_value, Object *p_obj, const StringName &p_function, bool p_before = true); + int bsearch_custom(const Variant &p_value, Callable p_callable, bool p_before = true); void invert(); int find(const Variant &p_value, int p_from = 0) const; diff --git a/core/variant/variant_call.cpp b/core/variant/variant_call.cpp index 2fd8134fbd..8f2cba138b 100644 --- a/core/variant/variant_call.cpp +++ b/core/variant/variant_call.cpp @@ -1298,10 +1298,10 @@ static void _register_variant_builtin_methods() { bind_method(Array, pop_back, sarray(), varray()); bind_method(Array, pop_front, sarray(), varray()); bind_method(Array, sort, sarray(), varray()); - bind_method(Array, sort_custom, sarray("obj", "func"), varray()); + bind_method(Array, sort_custom, sarray("func"), varray()); bind_method(Array, shuffle, sarray(), varray()); bind_method(Array, bsearch, sarray("value", "before"), varray(true)); - bind_method(Array, bsearch_custom, sarray("value", "obj", "func", "before"), varray(true)); + bind_method(Array, bsearch_custom, sarray("value", "func", "before"), varray(true)); bind_method(Array, invert, sarray(), varray()); bind_method(Array, duplicate, sarray("deep"), varray(false)); bind_method(Array, slice, sarray("begin", "end", "step", "deep"), varray(1, false)); diff --git a/doc/classes/Array.xml b/doc/classes/Array.xml index de3d89ee0f..cea5360234 100644 --- a/doc/classes/Array.xml +++ b/doc/classes/Array.xml @@ -191,11 +191,9 @@ </return> <argument index="0" name="value" type="Variant"> </argument> - <argument index="1" name="obj" type="Object"> + <argument index="1" name="func" type="Callable"> </argument> - <argument index="2" name="func" type="StringName"> - </argument> - <argument index="3" name="before" type="bool" default="true"> + <argument index="2" name="before" type="bool" default="true"> </argument> <description> Finds the index of an existing value (or the insertion index that maintains sorting order, if the value is not yet present in the array) using binary search and a custom comparison method. Optionally, a [code]before[/code] specifier can be passed. If [code]false[/code], the returned index comes after all existing entries of the value in the array. The custom method receives two arguments (an element from the array and the value searched for) and must return [code]true[/code] if the first argument is less than the second, and return [code]false[/code] otherwise. @@ -537,12 +535,10 @@ <method name="sort_custom"> <return type="void"> </return> - <argument index="0" name="obj" type="Object"> - </argument> - <argument index="1" name="func" type="StringName"> + <argument index="0" name="func" type="Callable"> </argument> <description> - Sorts the array using a custom method. The arguments are an object that holds the method and the name of such method. The custom method receives two arguments (a pair of elements from the array) and must return either [code]true[/code] or [code]false[/code]. + Sorts the array using a custom method. The custom method receives two arguments (a pair of elements from the array) and must return either [code]true[/code] or [code]false[/code]. [b]Note:[/b] you cannot randomize the return value as the heapsort algorithm expects a deterministic result. Doing so will result in unexpected behavior. [codeblocks] [gdscript] @@ -553,7 +549,7 @@ return false var my_items = [[5, "Potato"], [9, "Rice"], [4, "Tomato"]] - my_items.sort_custom(MyCustomSorter, "sort_ascending") + my_items.sort_custom(MyCustomSorter.sort_ascending) print(my_items) # Prints [[4, Tomato], [5, Potato], [9, Rice]]. [/gdscript] [csharp] diff --git a/doc/classes/AudioEffectCapture.xml b/doc/classes/AudioEffectCapture.xml new file mode 100644 index 0000000000..cf3d87c2e4 --- /dev/null +++ b/doc/classes/AudioEffectCapture.xml @@ -0,0 +1,75 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<class name="AudioEffectCapture" inherits="AudioEffect" version="4.0"> + <brief_description> + Captures audio from an audio bus in real-time. + </brief_description> + <description> + AudioEffectCapture is an AudioEffect which copies all audio frames from the attached audio effect bus into its internal ring buffer. + Application code should consume these audio frames from this ring buffer using [method get_buffer] and process it as needed, for example to capture data from a microphone, implement application defined effects, or to transmit audio over the network. + </description> + <tutorials> + </tutorials> + <methods> + <method name="can_get_buffer" qualifiers="const"> + <return type="bool"> + </return> + <argument index="0" name="frames" type="int"> + </argument> + <description> + Returns [code]true[/code] if at least [code]frames[/code] audio frames are available to read in the internal ring buffer. + </description> + </method> + <method name="clear_buffer"> + <return type="void"> + </return> + <description> + Clears the internal ring buffer. + </description> + </method> + <method name="get_buffer"> + <return type="PackedVector2Array"> + </return> + <argument index="0" name="frames" type="int"> + </argument> + <description> + Gets the next [code]frames[/code] audio samples from the internal ring buffer. + Returns a [PackedVector2Array] containing exactly [code]frames[/code] audio samples if available, or an empty [PackedVector2Array] if insufficient data was available. + </description> + </method> + <method name="get_buffer_length_frames" qualifiers="const"> + <return type="int"> + </return> + <description> + Returns the total size of the internal ring buffer in frames. + </description> + </method> + <method name="get_discarded_frames" qualifiers="const"> + <return type="int"> + </return> + <description> + Returns the number of audio frames discarded from the audio bus due to full buffer. + </description> + </method> + <method name="get_frames_available" qualifiers="const"> + <return type="int"> + </return> + <description> + Returns the number of frames available to read using [method get_buffer]. + </description> + </method> + <method name="get_pushed_frames" qualifiers="const"> + <return type="int"> + </return> + <description> + Returns the number of audio frames inserted from the audio bus. + </description> + </method> + </methods> + <members> + <member name="buffer_length" type="float" setter="set_buffer_length" getter="get_buffer_length" default="0.1"> + Length of the internal ring buffer, in seconds. + </member> + </members> + <constants> + </constants> +</class> diff --git a/doc/classes/CodeHighlighter.xml b/doc/classes/CodeHighlighter.xml index 7a1dad547b..f078e4e5b0 100644 --- a/doc/classes/CodeHighlighter.xml +++ b/doc/classes/CodeHighlighter.xml @@ -1,8 +1,10 @@ <?xml version="1.0" encoding="UTF-8" ?> <class name="CodeHighlighter" inherits="SyntaxHighlighter" version="4.0"> <brief_description> + A syntax highlighter for code. </brief_description> <description> + A syntax highlighter for code. </description> <tutorials> </tutorials> @@ -10,15 +12,18 @@ <method name="add_color_region"> <return type="void"> </return> - <argument index="0" name="p_start_key" type="String"> + <argument index="0" name="start_key" type="String"> </argument> - <argument index="1" name="p_end_key" type="String"> + <argument index="1" name="end_key" type="String"> </argument> - <argument index="2" name="p_color" type="Color"> + <argument index="2" name="color" type="Color"> </argument> - <argument index="3" name="p_line_only" type="bool" default="false"> + <argument index="3" name="line_only" type="bool" default="false"> </argument> <description> + Adds a color region such as comments or strings. + Both the start and end keys must be symbols. Only the start key has to be unique. + Line only denotes if the region should continue until the end of the line or carry over on to the next line. If the end key is blank this is automatically set to [code]true[/code]. </description> </method> <method name="add_keyword_color"> @@ -29,6 +34,8 @@ <argument index="1" name="color" type="Color"> </argument> <description> + Sets the color for a keyword. + The keyword cannot contain any symbols except '_'. </description> </method> <method name="add_member_keyword_color"> @@ -39,24 +46,30 @@ <argument index="1" name="color" type="Color"> </argument> <description> + Sets the color for a member keyword. + The member keyword cannot contain any symbols except '_'. + It will not be highlighted if preceded by a '.'. </description> </method> <method name="clear_color_regions"> <return type="void"> </return> <description> + Removes all color regions. </description> </method> <method name="clear_keyword_colors"> <return type="void"> </return> <description> + Removes all keywords. </description> </method> <method name="clear_member_keyword_colors"> <return type="void"> </return> <description> + Removes all member keywords. </description> </method> <method name="get_keyword_color" qualifiers="const"> @@ -65,6 +78,7 @@ <argument index="0" name="keyword" type="String"> </argument> <description> + Returns the color for a keyword. </description> </method> <method name="get_member_keyword_color" qualifiers="const"> @@ -73,14 +87,16 @@ <argument index="0" name="member_keyword" type="String"> </argument> <description> + Returns the color for a member keyword. </description> </method> <method name="has_color_region" qualifiers="const"> <return type="bool"> </return> - <argument index="0" name="p_start_key" type="String"> + <argument index="0" name="start_key" type="String"> </argument> <description> + Return [code]true[/code] if the start key exists, else [code]false[/code]. </description> </method> <method name="has_keyword_color" qualifiers="const"> @@ -89,6 +105,7 @@ <argument index="0" name="keyword" type="String"> </argument> <description> + Return [code]true[/code] if the keyword exists, else [code]false[/code]. </description> </method> <method name="has_member_keyword_color" qualifiers="const"> @@ -97,14 +114,16 @@ <argument index="0" name="member_keyword" type="String"> </argument> <description> + Return [code]true[/code] if the member keyword exists, else [code]false[/code]. </description> </method> <method name="remove_color_region"> <return type="void"> </return> - <argument index="0" name="p_start_key" type="String"> + <argument index="0" name="start_key" type="String"> </argument> <description> + Removes the color region that uses that start key. </description> </method> <method name="remove_keyword_color"> @@ -113,6 +132,7 @@ <argument index="0" name="keyword" type="String"> </argument> <description> + Removes the keyword. </description> </method> <method name="remove_member_keyword_color"> @@ -121,23 +141,31 @@ <argument index="0" name="member_keyword" type="String"> </argument> <description> + Removes the member keyword. </description> </method> </methods> <members> <member name="color_regions" type="Dictionary" setter="set_color_regions" getter="get_color_regions" default="{}"> + Sets the color regions. All existing regions will be removed. The [Dictionary] key is the region start and end key, separated by a space. The value is the region color. </member> <member name="function_color" type="Color" setter="set_function_color" getter="get_function_color" default="Color( 0, 0, 0, 1 )"> + Sets color for functions. A function is a non-keyword string followed by a '('. </member> <member name="keyword_colors" type="Dictionary" setter="set_keyword_colors" getter="get_keyword_colors" default="{}"> + Sets the keyword colors. All existing keywords will be removed. The [Dictionary] key is the keyword. The value is the keyword color. </member> <member name="member_keyword_colors" type="Dictionary" setter="set_member_keyword_colors" getter="get_member_keyword_colors" default="{}"> + Sets the member keyword colors. All existing member keyword will be removed. The [Dictionary] key is the member keyword. The value is the member keyword color. </member> <member name="member_variable_color" type="Color" setter="set_member_variable_color" getter="get_member_variable_color" default="Color( 0, 0, 0, 1 )"> + Sets color for member variables. A member variable is non-keyword, non-function string proceeded with a '.'. </member> <member name="number_color" type="Color" setter="set_number_color" getter="get_number_color" default="Color( 0, 0, 0, 1 )"> + Sets the color for numbers. </member> <member name="symbol_color" type="Color" setter="set_symbol_color" getter="get_symbol_color" default="Color( 0, 0, 0, 1 )"> + Sets the color for symbols. </member> </members> <constants> diff --git a/doc/classes/EditorSyntaxHighlighter.xml b/doc/classes/EditorSyntaxHighlighter.xml index 103d95e1d6..b80e81928f 100644 --- a/doc/classes/EditorSyntaxHighlighter.xml +++ b/doc/classes/EditorSyntaxHighlighter.xml @@ -1,8 +1,11 @@ <?xml version="1.0" encoding="UTF-8" ?> <class name="EditorSyntaxHighlighter" inherits="SyntaxHighlighter" version="4.0"> <brief_description> + Base Syntax highlighter resource for the [ScriptEditor]. </brief_description> <description> + Base syntax highlighter resource all editor syntax highlighters extend from, it is used in the [ScriptEditor]. + Add a syntax highlighter to an individual script by calling [method ScriptEditorBase.add_syntax_highlighter]. To apply to all scripts on open, call [method ScriptEditor.register_syntax_highlighter] </description> <tutorials> </tutorials> @@ -11,18 +14,21 @@ <return type="String"> </return> <description> + Virtual method which can be overridden to return the syntax highlighter name. </description> </method> <method name="_get_supported_extentions" qualifiers="virtual"> <return type="Array"> </return> <description> + Virtual method which can be overridden to return the supported file extensions. </description> </method> <method name="_get_supported_languages" qualifiers="virtual"> <return type="Array"> </return> <description> + Virtual method which can be overridden to return the supported language names. </description> </method> </methods> diff --git a/doc/classes/File.xml b/doc/classes/File.xml index 2f7ac551cf..ff03f44789 100644 --- a/doc/classes/File.xml +++ b/doc/classes/File.xml @@ -481,8 +481,9 @@ </methods> <members> <member name="endian_swap" type="bool" setter="set_endian_swap" getter="get_endian_swap" default="false"> - If [code]true[/code], the file's endianness is swapped. Use this if you're dealing with files written on big-endian machines. - [b]Note:[/b] This is about the file format, not CPU type. This is always reset to [code]false[/code] whenever you open the file. + If [code]true[/code], the file is read with big-endian [url=https://en.wikipedia.org/wiki/Endianness]endianness[/url]. If [code]false[/code], the file is read with little-endian endianness. If in doubt, leave this to [code]false[/code] as most files are written with little-endian endianness. + [b]Note:[/b] [member endian_swap] is only about the file format, not the CPU type. The CPU endianness doesn't affect the default endianness for files written. + [b]Note:[/b] This is always reset to [code]false[/code] whenever you open the file. Therefore, you must set [member endian_swap] [i]after[/i] opening the file, not before. </member> </members> <constants> diff --git a/doc/classes/ScriptEditor.xml b/doc/classes/ScriptEditor.xml index d5a32dd20c..28620bd29b 100644 --- a/doc/classes/ScriptEditor.xml +++ b/doc/classes/ScriptEditor.xml @@ -37,6 +37,7 @@ <return type="ScriptEditorBase"> </return> <description> + Returns the [ScriptEditorBase] object that the user is currently editing. </description> </method> <method name="get_current_script"> @@ -60,6 +61,7 @@ <return type="Array"> </return> <description> + Returns an array with all [ScriptEditorBase] objects which are currently open in editor. </description> </method> <method name="get_open_scripts" qualifiers="const"> @@ -95,6 +97,8 @@ <argument index="0" name="syntax_highlighter" type="EditorSyntaxHighlighter"> </argument> <description> + Registers the [EditorSyntaxHighlighter] to the editor, the [EditorSyntaxHighlighter] will be available on all open scripts. + [b]Note:[/b] Does not apply to scripts that are already opened. </description> </method> <method name="unregister_syntax_highlighter"> @@ -103,6 +107,8 @@ <argument index="0" name="syntax_highlighter" type="EditorSyntaxHighlighter"> </argument> <description> + Unregisters the [EditorSyntaxHighlighter] from the editor. + [b]Note:[/b] The [EditorSyntaxHighlighter] will still be applied to scripts that are already opened. </description> </method> </methods> diff --git a/doc/classes/ScriptEditorBase.xml b/doc/classes/ScriptEditorBase.xml index 9968ae06c3..ee498de302 100644 --- a/doc/classes/ScriptEditorBase.xml +++ b/doc/classes/ScriptEditorBase.xml @@ -1,8 +1,10 @@ <?xml version="1.0" encoding="UTF-8" ?> <class name="ScriptEditorBase" inherits="VBoxContainer" version="4.0"> <brief_description> + Base editor for editing scripts in the [ScriptEditor]. </brief_description> <description> + Base editor for editing scripts in the [ScriptEditor], this does not include documentation items. </description> <tutorials> </tutorials> @@ -13,34 +15,40 @@ <argument index="0" name="highlighter" type="Object"> </argument> <description> + Adds a [EditorSyntaxHighlighter] to the open script. </description> </method> </methods> <signals> <signal name="edited_script_changed"> <description> + Emitted after script validation. For visual scripts on modification. </description> </signal> <signal name="go_to_help"> <argument index="0" name="what" type="String"> </argument> <description> + Emitted when the user requests a specific documentation page. </description> </signal> <signal name="name_changed"> <description> + Emitted after script validation or when the edited resource has changed. Not used by visual scripts. </description> </signal> <signal name="replace_in_files_requested"> <argument index="0" name="text" type="String"> </argument> <description> + Emitted when the user request to find and replace text in the file system. Not used by visual scripts. </description> </signal> <signal name="request_help"> <argument index="0" name="topic" type="String"> </argument> <description> + Emitted when the user requests contextual help. </description> </signal> <signal name="request_open_script_at_line"> @@ -49,16 +57,19 @@ <argument index="1" name="line" type="int"> </argument> <description> + Emitted when the user requests a script. </description> </signal> <signal name="request_save_history"> <description> + Emitted when the user contextual goto and the item is in the same script. </description> </signal> <signal name="search_in_files_requested"> <argument index="0" name="text" type="String"> </argument> <description> + Emitted when the user request to search text in the file system. Not used by visual scripts. </description> </signal> </signals> diff --git a/doc/classes/SyntaxHighlighter.xml b/doc/classes/SyntaxHighlighter.xml index 2d6e3de02a..3f87e4f61d 100644 --- a/doc/classes/SyntaxHighlighter.xml +++ b/doc/classes/SyntaxHighlighter.xml @@ -1,50 +1,83 @@ <?xml version="1.0" encoding="UTF-8" ?> <class name="SyntaxHighlighter" inherits="Resource" version="4.0"> <brief_description> + Base Syntax highlighter resource for [TextEdit]. </brief_description> <description> + Base syntax highlighter resource all syntax highlighters extend from, provides syntax highlighting data to [TextEdit]. + The associated [TextEdit] node will call into the [SyntaxHighlighter] on a as needed basis. + [b]Note:[/b] Each Syntax highlighter instance should not be shared across multiple [TextEdit] nodes. </description> <tutorials> </tutorials> <methods> + <method name="_clear_highlighting_cache" qualifiers="virtual"> + <return type="void"> + </return> + <description> + Virtual method which can be overridden to clear any local caches. + </description> + </method> <method name="_get_line_syntax_highlighting" qualifiers="virtual"> <return type="Dictionary"> </return> - <argument index="0" name="p_line" type="int"> + <argument index="0" name="line" type="int"> </argument> <description> + Virtual method which can be overridden to return syntax highlighting data. + See [method get_line_syntax_highlighting] for more details. </description> </method> <method name="_update_cache" qualifiers="virtual"> <return type="void"> </return> <description> + Virtual method which can be overridden to update any local caches. </description> </method> <method name="clear_highlighting_cache"> <return type="void"> </return> <description> + Clears all cached syntax highlighting data. + Then calls overridable method [method _clear_highlighting_cache]. </description> </method> <method name="get_line_syntax_highlighting"> <return type="Dictionary"> </return> - <argument index="0" name="p_line" type="int"> + <argument index="0" name="line" type="int"> </argument> <description> + Returns syntax highlighting data for a single line. If the line is not cached, calls [method _get_line_syntax_highlighting] to calculate the data. + The return [Dictionary] is column number to [Dictionary]. The column number notes the start of a region, the region will end if another region is found, or at the end of the line. The nested [Dictionary] contains the data for that region, currently only the key "color" is supported. + [b]Example return:[/b] + [codeblock] + var color_map = { + 0: { + "color": Color(1, 0, 0) + }, + 5: { + "color": Color(0, 1, 0) + } + } + [/codeblock] + This will color columns 0-4 red, and columns 5-eol in green. </description> </method> <method name="get_text_edit"> <return type="TextEdit"> </return> <description> + Returns the associated [TextEdit] node. </description> </method> <method name="update_cache"> <return type="void"> </return> <description> + Clears then updates the [SyntaxHighlighter] caches. Override [method _update_cache] for a callback. + [b]Note:[/b] This is called automatically when the associated [TextEdit] node, updates its own cache. </description> </method> </methods> diff --git a/doc/classes/TextEdit.xml b/doc/classes/TextEdit.xml index af4543374a..539f7afbd8 100644 --- a/doc/classes/TextEdit.xml +++ b/doc/classes/TextEdit.xml @@ -725,6 +725,7 @@ Set additional options for BiDi override. </member> <member name="syntax_highlighter" type="SyntaxHighlighter" setter="set_syntax_highlighter" getter="get_syntax_highlighter"> + Sets the [SyntaxHighlighter] to use. </member> <member name="text" type="String" setter="set_text" getter="get_text" default=""""> String value of the [TextEdit]. @@ -914,7 +915,7 @@ </constants> <theme_items> <theme_item name="background_color" type="Color" default="Color( 0, 0, 0, 0 )"> - Sets the background [Color] of this [TextEdit]. [member syntax_highlighting] has to be enabled. + Sets the background [Color] of this [TextEdit]. </theme_item> <theme_item name="brace_mismatch_color" type="Color" default="Color( 1, 0.2, 0.2, 1 )"> </theme_item> diff --git a/drivers/dummy/rasterizer_dummy.h b/drivers/dummy/rasterizer_dummy.h index 72ab18d115..e69f36e16f 100644 --- a/drivers/dummy/rasterizer_dummy.h +++ b/drivers/dummy/rasterizer_dummy.h @@ -566,6 +566,7 @@ public: AABB mesh_get_custom_aabb(RID p_mesh) const override { return AABB(); } AABB mesh_get_aabb(RID p_mesh, RID p_skeleton = RID()) override { return AABB(); } + void mesh_set_shadow_mesh(RID p_mesh, RID p_shadow_mesh) override {} void mesh_clear(RID p_mesh) override {} /* MULTIMESH API */ diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index ef331ec4b6..9584dd3f67 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -74,11 +74,13 @@ RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID } else if (texture_buffer_owner.owns(p_buffer)) { if (p_post_barrier & BARRIER_MASK_RASTER) { r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + r_access_mask |= VK_ACCESS_SHADER_READ_BIT; } if (p_post_barrier & BARRIER_MASK_COMPUTE) { r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + r_access_mask |= VK_ACCESS_SHADER_READ_BIT; } - r_access_mask |= VK_ACCESS_SHADER_READ_BIT; + buffer = &texture_buffer_owner.getornull(p_buffer)->buffer; } else if (storage_buffer_owner.owns(p_buffer)) { buffer = storage_buffer_owner.getornull(p_buffer); @@ -1627,6 +1629,9 @@ void RenderingDeviceVulkan::_memory_barrier(VkPipelineStageFlags p_src_stage_mas mem_barrier.srcAccessMask = p_src_access; mem_barrier.dstAccessMask = p_dst_sccess; + if (p_src_stage_mask == 0 || p_dst_stage_mask == 0) { + return; //no barrier, since this is invalid + } vkCmdPipelineBarrier(p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, p_src_stage_mask, p_dst_stage_mask, 0, 1, &mem_barrier, 0, nullptr, 0, nullptr); } @@ -2477,6 +2482,10 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; @@ -2496,6 +2505,13 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } + if (texture->used_in_frame != frames_drawn) { + texture->used_in_raster = false; + texture->used_in_compute = false; + texture->used_in_frame = frames_drawn; + } + texture->used_in_transfer = true; + return OK; } @@ -2844,6 +2860,10 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + { //restore src VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; @@ -3011,6 +3031,10 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + { //restore src VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; @@ -3143,6 +3167,10 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; @@ -3163,6 +3191,13 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } + if (src_tex->used_in_frame != frames_drawn) { + src_tex->used_in_raster = false; + src_tex->used_in_compute = false; + src_tex->used_in_frame = frames_drawn; + } + src_tex->used_in_transfer = true; + return OK; } @@ -3289,6 +3324,7 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF dependency_from_external.srcStageMask |= reading_stages; } } break; + case INITIAL_ACTION_CLEAR_REGION_CONTINUE: case INITIAL_ACTION_CONTINUE: { if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; @@ -3296,7 +3332,7 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; } else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - description.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; //don't care what is there + description.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; } else { description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; @@ -3425,8 +3461,13 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF render_pass_create_info.pAttachments = attachments.ptr(); render_pass_create_info.subpassCount = 1; render_pass_create_info.pSubpasses = &subpass; - render_pass_create_info.dependencyCount = 2; - render_pass_create_info.pDependencies = dependencies; + // Commenting this because it seems it just avoids raster and compute to work at the same time. + // Other barriers seem to be protecting the render pass fine. + // render_pass_create_info.dependencyCount = 2; + // render_pass_create_info.pDependencies = dependencies; + + render_pass_create_info.dependencyCount = 0; + render_pass_create_info.pDependencies = nullptr; VkRenderPass render_pass; VkResult res = vkCreateRenderPass(device, &render_pass_create_info, nullptr, &render_pass); @@ -4108,6 +4149,8 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages bool is_compute = false; + uint32_t compute_local_size[3] = { 0, 0, 0 }; + for (int i = 0; i < p_stages.size(); i++) { if (p_stages[i].shader_stage == SHADER_STAGE_COMPUTE) { is_compute = true; @@ -4124,6 +4167,11 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_stages[i].shader_stage]) + "' failed parsing shader."); + if (is_compute) { + compute_local_size[0] = module.entry_points->local_size.x; + compute_local_size[1] = module.entry_points->local_size.y; + compute_local_size[2] = module.entry_points->local_size.z; + } uint32_t binding_count = 0; result = spvReflectEnumerateDescriptorBindings(&module, &binding_count, nullptr); ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), @@ -4328,6 +4376,7 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages } } } + uint32_t pc_count = 0; result = spvReflectEnumeratePushConstantBlocks(&module, &pc_count, nullptr); ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), @@ -4376,6 +4425,9 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages shader.fragment_outputs = fragment_outputs; shader.push_constant = push_constant; shader.is_compute = is_compute; + shader.compute_local_size[0] = compute_local_size[0]; + shader.compute_local_size[1] = compute_local_size[1]; + shader.compute_local_size[2] = compute_local_size[2]; String error_text; @@ -5216,7 +5268,14 @@ Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint #ifdef FORCE_FULL_BARRIER _full_barrier(true); #else - _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, true); + if (dst_stage_mask == 0) { + dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + + if (p_post_barrier != RD::BARRIER_MASK_NO_BARRIER) { + _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, true); + } + #endif return err; } @@ -5255,7 +5314,12 @@ Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint3 #ifdef FORCE_FULL_BARRIER _full_barrier(true); #else - _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, p_post_barrier); + if (dst_stage_mask == 0) { + dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + + _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, dst_stage_mask); + #endif return OK; } @@ -5710,6 +5774,9 @@ RID RenderingDeviceVulkan::compute_pipeline_create(RID p_shader) { pipeline.pipeline_layout = shader->pipeline_layout; pipeline.shader = p_shader; pipeline.push_constant_size = shader->push_constant.push_constant_size; + pipeline.local_group_size[0] = shader->compute_local_size[0]; + pipeline.local_group_size[1] = shader->compute_local_size[1]; + pipeline.local_group_size[2] = shader->compute_local_size[2]; //create ID to associate with this pipeline RID id = compute_pipeline_owner.make_rid(pipeline); @@ -6019,7 +6086,7 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebu _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); - ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time."); + ERR_FAIL_COND_V_MSG(compute_list != nullptr && !compute_list->state.allow_draw_overlap, INVALID_ID, "Only one draw/compute list can be active at the same time."); Framebuffer *framebuffer = framebuffer_owner.getornull(p_framebuffer); ERR_FAIL_COND_V(!framebuffer, INVALID_ID); @@ -6040,7 +6107,14 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebu viewport_offset = regioni.position; viewport_size = regioni.size; - + if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION_CONTINUE) { + needs_clear_color = true; + p_initial_color_action = INITIAL_ACTION_CONTINUE; + } + if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION_CONTINUE) { + needs_clear_depth = true; + p_initial_depth_action = INITIAL_ACTION_CONTINUE; + } if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION) { needs_clear_color = true; p_initial_color_action = INITIAL_ACTION_KEEP; @@ -6388,6 +6462,19 @@ void RenderingDeviceVulkan::draw_list_bind_uniform_set(DrawListID p_list, RID p_ dl->state.sets[p_index].uniform_set_format = uniform_set->format; dl->state.sets[p_index].uniform_set = p_uniform_set; + uint32_t mst_count = uniform_set->mutable_storage_textures.size(); + if (mst_count) { + Texture **mst_textures = const_cast<UniformSet *>(uniform_set)->mutable_storage_textures.ptrw(); + for (uint32_t i = 0; i < mst_count; i++) { + if (mst_textures[i]->used_in_frame != frames_drawn) { + mst_textures[i]->used_in_frame = frames_drawn; + mst_textures[i]->used_in_transfer = false; + mst_textures[i]->used_in_compute = false; + } + mst_textures[i]->used_in_raster = true; + } + } + #ifdef DEBUG_ENABLED { //validate that textures bound are not attached as framebuffer bindings uint32_t attachable_count = uniform_set->attachable_textures.size(); @@ -6673,23 +6760,43 @@ void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) { access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } if (p_post_barrier & BARRIER_MASK_RASTER) { - barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; - access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT /*| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT*/; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT /*| VK_ACCESS_INDIRECT_COMMAND_READ_BIT*/; } if (p_post_barrier & BARRIER_MASK_TRANSFER) { barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; } + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + draw_list_bound_textures.clear(); - for (int i = 0; i < draw_list_storage_textures.size(); i++) { + VkImageMemoryBarrier *image_barriers = nullptr; + + uint32_t image_barrier_count = draw_list_storage_textures.size(); + + if (image_barrier_count) { + image_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * draw_list_storage_textures.size()); + } + + uint32_t src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + uint32_t src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + + if (image_barrier_count) { + src_stage |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + src_access |= VK_ACCESS_SHADER_WRITE_BIT; + } + + for (uint32_t i = 0; i < image_barrier_count; i++) { Texture *texture = texture_owner.getornull(draw_list_storage_textures[i]); - VkImageMemoryBarrier image_memory_barrier; + VkImageMemoryBarrier &image_memory_barrier = image_barriers[i]; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; - image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.srcAccessMask = src_access; image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = texture->layout; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; @@ -6703,8 +6810,6 @@ void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) { image_memory_barrier.subresourceRange.baseArrayLayer = texture->base_layer; image_memory_barrier.subresourceRange.layerCount = texture->layers; - vkCmdPipelineBarrier(frames[frame].draw_command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); - texture->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } @@ -6717,7 +6822,17 @@ void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) { #ifdef FORCE_FULL_BARRIER _full_barrier(true); #else - _memory_barrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, barrier_flags, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, access_flags, true); + + VkMemoryBarrier mem_barrier; + mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + mem_barrier.pNext = nullptr; + mem_barrier.srcAccessMask = src_access; + mem_barrier.dstAccessMask = access_flags; + + if (image_barrier_count > 0 || p_post_barrier != BARRIER_MASK_NO_BARRIER) { + vkCmdPipelineBarrier(frames[frame].draw_command_buffer, src_stage, barrier_flags, 0, 1, &mem_barrier, 0, nullptr, image_barrier_count, image_barriers); + } + #endif } @@ -6725,12 +6840,13 @@ void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) { /**** COMPUTE LISTS ****/ /***********************/ -RenderingDevice::ComputeListID RenderingDeviceVulkan::compute_list_begin() { - ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); +RenderingDevice::ComputeListID RenderingDeviceVulkan::compute_list_begin(bool p_allow_draw_overlap) { + ERR_FAIL_COND_V_MSG(!p_allow_draw_overlap && draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time."); compute_list = memnew(ComputeList); compute_list->command_buffer = frames[frame].draw_command_buffer; + compute_list->state.allow_draw_overlap = p_allow_draw_overlap; return ID_TYPE_COMPUTE_LIST; } @@ -6787,6 +6903,9 @@ void RenderingDeviceVulkan::compute_list_bind_compute_pipeline(ComputeListID p_l } cl->state.pipeline_shader = pipeline->shader; + cl->state.local_group_size[0] = pipeline->local_group_size[0]; + cl->state.local_group_size[1] = pipeline->local_group_size[1]; + cl->state.local_group_size[2] = pipeline->local_group_size[2]; } #ifdef DEBUG_ENABLED @@ -6824,11 +6943,24 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, cl->state.sets[p_index].uniform_set = p_uniform_set; uint32_t textures_to_sampled_count = uniform_set->mutable_sampled_textures.size(); + uint32_t textures_to_storage_count = uniform_set->mutable_storage_textures.size(); + Texture **textures_to_sampled = uniform_set->mutable_sampled_textures.ptrw(); + VkImageMemoryBarrier *texture_barriers = nullptr; + + if (textures_to_sampled_count + textures_to_storage_count) { + texture_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * (textures_to_sampled_count + textures_to_storage_count)); + } + uint32_t texture_barrier_count = 0; + + uint32_t src_stage_flags = 0; + for (uint32_t i = 0; i < textures_to_sampled_count; i++) { if (textures_to_sampled[i]->layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { - VkImageMemoryBarrier image_memory_barrier; + src_stage_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + + VkImageMemoryBarrier &image_memory_barrier = texture_barriers[texture_barrier_count++]; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; @@ -6845,23 +6977,55 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, image_memory_barrier.subresourceRange.baseArrayLayer = textures_to_sampled[i]->base_layer; image_memory_barrier.subresourceRange.layerCount = textures_to_sampled[i]->layers; - vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); - textures_to_sampled[i]->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; cl->state.textures_to_sampled_layout.erase(textures_to_sampled[i]); } + + if (textures_to_sampled[i]->used_in_frame != frames_drawn) { + textures_to_sampled[i]->used_in_frame = frames_drawn; + textures_to_sampled[i]->used_in_transfer = false; + textures_to_sampled[i]->used_in_raster = false; + } + textures_to_sampled[i]->used_in_compute = true; } - uint32_t textures_to_storage_count = uniform_set->mutable_storage_textures.size(); Texture **textures_to_storage = uniform_set->mutable_storage_textures.ptrw(); for (uint32_t i = 0; i < textures_to_storage_count; i++) { if (textures_to_storage[i]->layout != VK_IMAGE_LAYOUT_GENERAL) { - VkImageMemoryBarrier image_memory_barrier; + uint32_t src_access_flags = 0; + + if (textures_to_storage[i]->used_in_frame == frames_drawn) { + if (textures_to_storage[i]->used_in_compute) { + src_stage_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + src_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (textures_to_storage[i]->used_in_raster) { + src_stage_flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; + src_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (textures_to_storage[i]->used_in_transfer) { + src_stage_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + src_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; + } + + textures_to_storage[i]->used_in_compute = false; + textures_to_storage[i]->used_in_raster = false; + textures_to_storage[i]->used_in_compute = false; + + } else { + src_access_flags = 0; + textures_to_storage[i]->used_in_compute = false; + textures_to_storage[i]->used_in_raster = false; + textures_to_storage[i]->used_in_compute = false; + textures_to_storage[i]->used_in_frame = frames_drawn; + } + + VkImageMemoryBarrier &image_memory_barrier = texture_barriers[texture_barrier_count++]; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; - image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.srcAccessMask = src_access_flags; image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; image_memory_barrier.oldLayout = textures_to_storage[i]->layout; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; @@ -6875,14 +7039,20 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, image_memory_barrier.subresourceRange.baseArrayLayer = textures_to_storage[i]->base_layer; image_memory_barrier.subresourceRange.layerCount = textures_to_storage[i]->layers; - vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); - textures_to_storage[i]->layout = VK_IMAGE_LAYOUT_GENERAL; cl->state.textures_to_sampled_layout.insert(textures_to_storage[i]); //needs to go back to sampled layout afterwards } } + if (texture_barrier_count) { + if (src_stage_flags == 0) { + src_stage_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + } + + vkCmdPipelineBarrier(cl->command_buffer, src_stage_flags, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, texture_barrier_count, texture_barriers); + } + #if 0 { //validate that textures bound are not attached as framebuffer bindings uint32_t attachable_count = uniform_set->attachable_textures.size(); @@ -6976,6 +7146,27 @@ void RenderingDeviceVulkan::compute_list_dispatch(ComputeListID p_list, uint32_t vkCmdDispatch(cl->command_buffer, p_x_groups, p_y_groups, p_z_groups); } +void RenderingDeviceVulkan::compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads) { + ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); + ERR_FAIL_COND(!compute_list); + + ComputeList *cl = compute_list; + +#ifdef DEBUG_ENABLED + + ERR_FAIL_COND_MSG(!cl->validation.pipeline_active, "No compute pipeline was set before attempting to draw."); + + if (cl->validation.pipeline_push_constant_size > 0) { + //using push constants, check that they were supplied + ERR_FAIL_COND_MSG(!cl->validation.pipeline_push_constant_supplied, + "The shader in this pipeline requires a push constant to be set before drawing, but it's not present."); + } + +#endif + + compute_list_dispatch(p_list, (p_x_threads - 1) / cl->state.local_group_size[0] + 1, (p_y_threads - 1) / cl->state.local_group_size[1] + 1, (p_z_threads - 1) / cl->state.local_group_size[2] + 1); +} + void RenderingDeviceVulkan::compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset) { ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); ERR_FAIL_COND(!compute_list); @@ -7047,7 +7238,7 @@ void RenderingDeviceVulkan::compute_list_end(uint32_t p_post_barrier) { uint32_t access_flags = 0; if (p_post_barrier & BARRIER_MASK_COMPUTE) { barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } if (p_post_barrier & BARRIER_MASK_RASTER) { barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; @@ -7058,8 +7249,22 @@ void RenderingDeviceVulkan::compute_list_end(uint32_t p_post_barrier) { access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; } + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + + VkImageMemoryBarrier *image_barriers = nullptr; + + uint32_t image_barrier_count = compute_list->state.textures_to_sampled_layout.size(); + + if (image_barrier_count) { + image_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * image_barrier_count); + } + + uint32_t barrier_idx = 0; + for (Set<Texture *>::Element *E = compute_list->state.textures_to_sampled_layout.front(); E; E = E->next()) { - VkImageMemoryBarrier image_memory_barrier; + VkImageMemoryBarrier &image_memory_barrier = image_barriers[barrier_idx++]; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; @@ -7076,19 +7281,33 @@ void RenderingDeviceVulkan::compute_list_end(uint32_t p_post_barrier) { image_memory_barrier.subresourceRange.baseArrayLayer = E->get()->base_layer; image_memory_barrier.subresourceRange.layerCount = E->get()->layers; - // TODO: Look at the usages in the compute list and determine tighter dst stage and access masks based on some "final" usage equivalent - vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); - E->get()->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + if (E->get()->used_in_frame != frames_drawn) { + E->get()->used_in_transfer = false; + E->get()->used_in_raster = false; + E->get()->used_in_compute = false; + E->get()->used_in_frame = frames_drawn; + } } - memdelete(compute_list); - compute_list = nullptr; #ifdef FORCE_FULL_BARRIER _full_barrier(true); #else - _memory_barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, barrier_flags, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT, true); + VkMemoryBarrier mem_barrier; + mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + mem_barrier.pNext = nullptr; + mem_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + mem_barrier.dstAccessMask = access_flags; + + if (image_barrier_count > 0 || p_post_barrier != BARRIER_MASK_NO_BARRIER) { + vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, barrier_flags, 0, 1, &mem_barrier, 0, nullptr, image_barrier_count, image_barriers); + } + #endif + + memdelete(compute_list); + compute_list = nullptr; } void RenderingDeviceVulkan::barrier(uint32_t p_from, uint32_t p_to) { @@ -7107,11 +7326,15 @@ void RenderingDeviceVulkan::barrier(uint32_t p_from, uint32_t p_to) { src_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } + if (p_from == 0) { + src_barrier_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + } + uint32_t dst_barrier_flags = 0; uint32_t dst_access_flags = 0; if (p_to & BARRIER_MASK_COMPUTE) { dst_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } if (p_to & BARRIER_MASK_RASTER) { dst_barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; @@ -7122,6 +7345,10 @@ void RenderingDeviceVulkan::barrier(uint32_t p_from, uint32_t p_to) { dst_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; } + if (p_to == 0) { + dst_barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + _memory_barrier(src_barrier_flags, dst_barrier_flags, src_access_flags, dst_access_flags, true); } @@ -7325,6 +7552,16 @@ void RenderingDeviceVulkan::draw_command_end_label() { context->command_end_label(frames[frame].draw_command_buffer); } +String RenderingDeviceVulkan::get_device_vendor_name() const { + return context->get_device_vendor_name(); +} +String RenderingDeviceVulkan::get_device_name() const { + return context->get_device_name(); +} +String RenderingDeviceVulkan::get_device_pipeline_cache_uuid() const { + return context->get_device_pipeline_cache_uuid(); +} + void RenderingDeviceVulkan::_finalize_command_bufers() { if (draw_list) { ERR_PRINT("Found open draw list at the end of the frame, this should never happen (further drawing will likely not work)."); @@ -7377,6 +7614,7 @@ void RenderingDeviceVulkan::_begin_frame() { if (frames[frame].timestamp_count) { vkGetQueryPoolResults(device, frames[frame].timestamp_pool, 0, frames[frame].timestamp_count, sizeof(uint64_t) * max_timestamp_query_elements, frames[frame].timestamp_result_values, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT); + vkCmdResetQueryPool(frames[frame].setup_command_buffer, frames[frame].timestamp_pool, 0, frames[frame].timestamp_count); SWAP(frames[frame].timestamp_names, frames[frame].timestamp_result_names); SWAP(frames[frame].timestamp_cpu_values, frames[frame].timestamp_cpu_result_values); } diff --git a/drivers/vulkan/rendering_device_vulkan.h b/drivers/vulkan/rendering_device_vulkan.h index 4bea17e4a1..a2527d5c33 100644 --- a/drivers/vulkan/rendering_device_vulkan.h +++ b/drivers/vulkan/rendering_device_vulkan.h @@ -141,6 +141,11 @@ class RenderingDeviceVulkan : public RenderingDevice { VkImageLayout layout; + uint64_t used_in_frame = 0; + bool used_in_transfer = false; + bool used_in_raster = false; + bool used_in_compute = false; + uint32_t read_aspect_mask = 0; uint32_t barrier_aspect_mask = 0; bool bound = false; //bound to framebffer @@ -528,6 +533,8 @@ class RenderingDeviceVulkan : public RenderingDevice { PushConstant push_constant; + uint32_t compute_local_size[3] = { 0, 0, 0 }; + bool is_compute = false; int max_output = 0; Vector<Set> sets; @@ -686,6 +693,7 @@ class RenderingDeviceVulkan : public RenderingDevice { VkPipeline pipeline = VK_NULL_HANDLE; uint32_t push_constant_size = 0; uint32_t push_constant_stages = 0; + uint32_t local_group_size[3] = { 0, 0, 0 }; }; RID_Owner<ComputePipeline, true> compute_pipeline_owner; @@ -808,8 +816,10 @@ class RenderingDeviceVulkan : public RenderingDevice { uint32_t set_count = 0; RID pipeline; RID pipeline_shader; + uint32_t local_group_size[3] = { 0, 0, 0 }; VkPipelineLayout pipeline_layout = VK_NULL_HANDLE; uint32_t pipeline_push_constant_stages = 0; + bool allow_draw_overlap; } state; #ifdef DEBUG_ENABLED @@ -1028,13 +1038,14 @@ public: /**** COMPUTE LISTS ****/ /***********************/ - virtual ComputeListID compute_list_begin(); + virtual ComputeListID compute_list_begin(bool p_allow_draw_overlap = false); virtual void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline); virtual void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index); virtual void compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size); virtual void compute_list_add_barrier(ComputeListID p_list); virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); + virtual void compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads); virtual void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset); virtual void compute_list_end(uint32_t p_post_barrier = BARRIER_MASK_ALL); @@ -1085,6 +1096,10 @@ public: virtual void draw_command_insert_label(String p_label_name, const Color p_color = Color(1, 1, 1, 1)); virtual void draw_command_end_label(); + virtual String get_device_vendor_name() const; + virtual String get_device_name() const; + virtual String get_device_pipeline_cache_uuid() const; + RenderingDeviceVulkan(); ~RenderingDeviceVulkan(); }; diff --git a/drivers/vulkan/vulkan_context.cpp b/drivers/vulkan/vulkan_context.cpp index 98966477a5..c564cee757 100644 --- a/drivers/vulkan/vulkan_context.cpp +++ b/drivers/vulkan/vulkan_context.cpp @@ -380,7 +380,8 @@ Error VulkanContext::_create_physical_device() { ERR_FAIL_V(ERR_CANT_CREATE); } /* for now, just grab the first physical device */ - gpu = physical_devices[0]; + uint32_t device_index = 0; + gpu = physical_devices[device_index]; free(physical_devices); /* Look for device extensions */ @@ -389,6 +390,40 @@ Error VulkanContext::_create_physical_device() { enabled_extension_count = 0; memset(extension_names, 0, sizeof(extension_names)); + /* Get identifier properties */ + vkGetPhysicalDeviceProperties(gpu, &gpu_props); + + static const struct { + uint32_t id; + const char *name; + } vendor_names[] = { + { 0x1002, "AMD" }, + { 0x1010, "ImgTec" }, + { 0x10DE, "NVIDIA" }, + { 0x13B5, "ARM" }, + { 0x5143, "Qualcomm" }, + { 0x8086, "INTEL" }, + { 0, nullptr }, + }; + device_name = gpu_props.deviceName; + pipeline_cache_id = String::hex_encode_buffer(gpu_props.pipelineCacheUUID, VK_UUID_SIZE); + pipeline_cache_id += "-driver-" + itos(gpu_props.driverVersion); + { + device_vendor = "Unknown"; + uint32_t vendor_idx = 0; + while (vendor_names[vendor_idx].name != nullptr) { + if (gpu_props.vendorID == vendor_names[vendor_idx].id) { + device_vendor = vendor_names[vendor_idx].name; + break; + } + vendor_idx++; + } + } +#ifdef DEBUG_ENABLED + print_line("Using Vulkan Device #" + itos(device_index) + ": " + device_vendor + " - " + device_name); +#endif + device_api_version = gpu_props.apiVersion; + err = vkEnumerateDeviceExtensionProperties(gpu, nullptr, &device_extension_count, nullptr); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); @@ -498,7 +533,6 @@ Error VulkanContext::_create_physical_device() { break; } } - vkGetPhysicalDeviceProperties(gpu, &gpu_props); /* Call with NULL data to get count */ vkGetPhysicalDeviceQueueFamilyProperties(gpu, &queue_family_count, nullptr); @@ -565,6 +599,7 @@ Error VulkanContext::_create_device() { } err = vkCreateDevice(gpu, &sdevice, nullptr, &device); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); + return OK; } @@ -1590,11 +1625,12 @@ void VulkanContext::command_begin_label(VkCommandBuffer p_command_buffer, String if (!enabled_debug_utils) { return; } + + CharString cs = p_label_name.utf8().get_data(); VkDebugUtilsLabelEXT label; label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; label.pNext = nullptr; - CharString label_name = p_label_name.utf8(); - label.pLabelName = label_name.get_data(); + label.pLabelName = cs.get_data(); label.color[0] = p_color[0]; label.color[1] = p_color[1]; label.color[2] = p_color[2]; @@ -1606,11 +1642,11 @@ void VulkanContext::command_insert_label(VkCommandBuffer p_command_buffer, Strin if (!enabled_debug_utils) { return; } + CharString cs = p_label_name.utf8().get_data(); VkDebugUtilsLabelEXT label; label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; label.pNext = nullptr; - CharString label_name = p_label_name.utf8(); - label.pLabelName = label_name.get_data(); + label.pLabelName = cs.get_data(); label.color[0] = p_color[0]; label.color[1] = p_color[1]; label.color[2] = p_color[2]; @@ -1629,16 +1665,26 @@ void VulkanContext::set_object_name(VkObjectType p_object_type, uint64_t p_objec if (!enabled_debug_utils) { return; } + CharString obj_data = p_object_name.utf8(); VkDebugUtilsObjectNameInfoEXT name_info; name_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT; name_info.pNext = nullptr; name_info.objectType = p_object_type; name_info.objectHandle = p_object_handle; - CharString object_name = p_object_name.utf8(); - name_info.pObjectName = object_name.get_data(); + name_info.pObjectName = obj_data.get_data(); SetDebugUtilsObjectNameEXT(device, &name_info); } +String VulkanContext::get_device_vendor_name() const { + return device_vendor; +} +String VulkanContext::get_device_name() const { + return device_name; +} +String VulkanContext::get_device_pipeline_cache_uuid() const { + return pipeline_cache_id; +} + VulkanContext::VulkanContext() { use_validation_layers = Engine::get_singleton()->is_validation_layers_enabled(); diff --git a/drivers/vulkan/vulkan_context.h b/drivers/vulkan/vulkan_context.h index 5cb762aca8..dc6b0410bc 100644 --- a/drivers/vulkan/vulkan_context.h +++ b/drivers/vulkan/vulkan_context.h @@ -57,6 +57,11 @@ class VulkanContext { bool device_initialized = false; bool inst_initialized = false; + String device_vendor; + String device_name; + String pipeline_cache_id; + uint32_t device_api_version = 0; + bool buffers_prepared = false; // Present queue. @@ -215,6 +220,10 @@ public: void command_end_label(VkCommandBuffer p_command_buffer); void set_object_name(VkObjectType p_object_type, uint64_t p_object_handle, String p_object_name); + String get_device_vendor_name() const; + String get_device_name() const; + String get_device_pipeline_cache_uuid() const; + VulkanContext(); virtual ~VulkanContext(); }; diff --git a/editor/editor_node.cpp b/editor/editor_node.cpp index 040f1b1640..208f4b954a 100644 --- a/editor/editor_node.cpp +++ b/editor/editor_node.cpp @@ -6265,7 +6265,7 @@ EditorNode::EditorNode() { p = help_menu->get_popup(); p->connect("id_pressed", callable_mp(this, &EditorNode::_menu_option)); - p->add_icon_shortcut(gui_base->get_theme_icon("HelpSearch", "EditorIcons"), ED_SHORTCUT("editor/editor_help", TTR("Search"), KEY_MASK_SHIFT | KEY_F1), HELP_SEARCH); + p->add_icon_shortcut(gui_base->get_theme_icon("HelpSearch", "EditorIcons"), ED_SHORTCUT("editor/editor_help", TTR("Search")), HELP_SEARCH); p->add_separator(); p->add_icon_shortcut(gui_base->get_theme_icon("Instance", "EditorIcons"), ED_SHORTCUT("editor/online_docs", TTR("Online Docs")), HELP_DOCS); p->add_icon_shortcut(gui_base->get_theme_icon("Instance", "EditorIcons"), ED_SHORTCUT("editor/q&a", TTR("Q&A")), HELP_QA); diff --git a/editor/editor_settings.cpp b/editor/editor_settings.cpp index 7a602912c9..9908f5727e 100644 --- a/editor/editor_settings.cpp +++ b/editor/editor_settings.cpp @@ -959,27 +959,16 @@ void EditorSettings::create() { _create_script_templates(dir->get_current_dir().plus_file("script_templates")); - if (dir->change_dir("projects") != OK) { - dir->make_dir("projects"); - } else { - dir->change_dir(".."); - } - - // Validate/create project-specific config dir - - dir->change_dir("projects"); - String project_config_dir = ProjectSettings::get_singleton()->get_resource_path(); - if (project_config_dir.ends_with("/")) { - project_config_dir = config_path.substr(0, project_config_dir.size() - 1); - } - project_config_dir = project_config_dir.get_file() + "-" + project_config_dir.md5_text(); - - if (dir->change_dir(project_config_dir) != OK) { - dir->make_dir(project_config_dir); - } else { - dir->change_dir(".."); + { + // Validate/create project-specific editor settings dir. + DirAccessRef da = DirAccess::create(DirAccess::ACCESS_RESOURCES); + if (da->change_dir(EditorSettings::PROJECT_EDITOR_SETTINGS_PATH) != OK) { + Error err = da->make_dir_recursive(EditorSettings::PROJECT_EDITOR_SETTINGS_PATH); + if (err || da->change_dir(EditorSettings::PROJECT_EDITOR_SETTINGS_PATH) != OK) { + ERR_FAIL_MSG("Failed to create '" + EditorSettings::PROJECT_EDITOR_SETTINGS_PATH + "' folder."); + } + } } - dir->change_dir(".."); // Validate editor config file @@ -1001,7 +990,6 @@ void EditorSettings::create() { singleton->save_changed_setting = true; singleton->config_file_path = config_file_path; - singleton->project_config_dir = project_config_dir; singleton->settings_dir = config_dir; singleton->data_dir = data_dir; singleton->cache_dir = cache_dir; @@ -1277,7 +1265,7 @@ String EditorSettings::get_settings_dir() const { } String EditorSettings::get_project_settings_dir() const { - return get_settings_dir().plus_file("projects").plus_file(project_config_dir); + return EditorSettings::PROJECT_EDITOR_SETTINGS_PATH; } String EditorSettings::get_text_editor_themes_dir() const { diff --git a/editor/editor_settings.h b/editor/editor_settings.h index 61ec8546aa..616a938a86 100644 --- a/editor/editor_settings.h +++ b/editor/editor_settings.h @@ -46,6 +46,7 @@ class EditorSettings : public Resource { _THREAD_SAFE_CLASS_ public: + inline static const String PROJECT_EDITOR_SETTINGS_PATH = "res://.godot/editor"; struct Plugin { EditorPlugin *instance = nullptr; String path; diff --git a/editor/plugins/node_3d_editor_plugin.cpp b/editor/plugins/node_3d_editor_plugin.cpp index 0c005e0c23..a3009731f9 100644 --- a/editor/plugins/node_3d_editor_plugin.cpp +++ b/editor/plugins/node_3d_editor_plugin.cpp @@ -2831,7 +2831,7 @@ void Node3DEditorViewport::_menu_option(int p_option) { } break; case VIEW_FRONT: { cursor.x_rot = 0; - cursor.y_rot = 0; + cursor.y_rot = Math_PI; set_message(TTR("Front View."), 2); name = TTR("Front"); _set_auto_orthogonal(); @@ -2840,7 +2840,7 @@ void Node3DEditorViewport::_menu_option(int p_option) { } break; case VIEW_REAR: { cursor.x_rot = 0; - cursor.y_rot = Math_PI; + cursor.y_rot = 0; set_message(TTR("Rear View."), 2); name = TTR("Rear"); _set_auto_orthogonal(); diff --git a/editor/plugins/texture_region_editor_plugin.cpp b/editor/plugins/texture_region_editor_plugin.cpp index 61e0cc281d..36348f7753 100644 --- a/editor/plugins/texture_region_editor_plugin.cpp +++ b/editor/plugins/texture_region_editor_plugin.cpp @@ -480,20 +480,41 @@ void TextureRegionEditor::_region_input(const Ref<InputEvent> &p_input) { Vector2 dragged(mm->get_relative().x / draw_zoom, mm->get_relative().y / draw_zoom); hscroll->set_value(hscroll->get_value() - dragged.x); vscroll->set_value(vscroll->get_value() - dragged.y); - } else if (drag) { if (edited_margin >= 0) { float new_margin = 0; - if (edited_margin == 0) { - new_margin = prev_margin + (mm->get_position().y - drag_from.y) / draw_zoom; - } else if (edited_margin == 1) { - new_margin = prev_margin - (mm->get_position().y - drag_from.y) / draw_zoom; - } else if (edited_margin == 2) { - new_margin = prev_margin + (mm->get_position().x - drag_from.x) / draw_zoom; - } else if (edited_margin == 3) { - new_margin = prev_margin - (mm->get_position().x - drag_from.x) / draw_zoom; + + if (snap_mode != SNAP_GRID) { + if (edited_margin == 0) { + new_margin = prev_margin + (mm->get_position().y - drag_from.y) / draw_zoom; + } else if (edited_margin == 1) { + new_margin = prev_margin - (mm->get_position().y - drag_from.y) / draw_zoom; + } else if (edited_margin == 2) { + new_margin = prev_margin + (mm->get_position().x - drag_from.x) / draw_zoom; + } else if (edited_margin == 3) { + new_margin = prev_margin - (mm->get_position().x - drag_from.x) / draw_zoom; + } else { + ERR_PRINT("Unexpected edited_margin"); + } + + if (snap_mode == SNAP_PIXEL) { + new_margin = Math::round(new_margin); + } } else { - ERR_PRINT("Unexpected edited_margin"); + Vector2 pos_snapped = snap_point(mtx.affine_inverse().xform(mm->get_position())); + Rect2 rect_rounded = Rect2(rect.position.round(), rect.size.round()); + + if (edited_margin == 0) { + new_margin = pos_snapped.y - rect_rounded.position.y; + } else if (edited_margin == 1) { + new_margin = rect_rounded.size.y + rect_rounded.position.y - pos_snapped.y; + } else if (edited_margin == 2) { + new_margin = pos_snapped.x - rect_rounded.position.x; + } else if (edited_margin == 3) { + new_margin = rect_rounded.size.x + rect_rounded.position.x - pos_snapped.x; + } else { + ERR_PRINT("Unexpected edited_margin"); + } } if (new_margin < 0) { diff --git a/editor/plugins/tile_set_editor_plugin.cpp b/editor/plugins/tile_set_editor_plugin.cpp index deeab2fbc7..5ac7fe262f 100644 --- a/editor/plugins/tile_set_editor_plugin.cpp +++ b/editor/plugins/tile_set_editor_plugin.cpp @@ -2173,7 +2173,7 @@ Array TileSetEditor::_get_tiles_in_current_texture(bool sorted) { } } if (sorted) { - a.sort_custom(this, "_sort_tiles"); + a.sort_custom(callable_mp(this, &TileSetEditor::_sort_tiles)); } return a; } diff --git a/editor/project_manager.cpp b/editor/project_manager.cpp index 5951373af9..afbed0c610 100644 --- a/editor/project_manager.cpp +++ b/editor/project_manager.cpp @@ -2281,6 +2281,11 @@ void ProjectManager::_install_project(const String &p_zip_path, const String &p_ } void ProjectManager::_files_dropped(PackedStringArray p_files, int p_screen) { + if (p_files.size() == 1 && p_files[0].ends_with(".zip")) { + const String file = p_files[0].get_file(); + _install_project(p_files[0], file.substr(0, file.length() - 4).capitalize()); + return; + } Set<String> folders_set; DirAccess *da = DirAccess::create(DirAccess::ACCESS_FILESYSTEM); for (int i = 0; i < p_files.size(); i++) { diff --git a/main/main.cpp b/main/main.cpp index 657a6ad822..d70f0eb291 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -1634,7 +1634,7 @@ Error Main::setup2(Thread::ID p_main_tid_override) { register_server_types(); - MAIN_PRINT("Main: Load Remaps"); + MAIN_PRINT("Main: Load Boot Image"); Color clear = GLOBAL_DEF("rendering/environment/default_clear_color", Color(0.3, 0.3, 0.3)); RenderingServer::get_singleton()->set_default_clear_color(clear); @@ -1690,7 +1690,6 @@ Error Main::setup2(Thread::ID p_main_tid_override) { MAIN_PRINT("Main: DCC"); RenderingServer::get_singleton()->set_default_clear_color( GLOBAL_DEF("rendering/environment/default_clear_color", Color(0.3, 0.3, 0.3))); - MAIN_PRINT("Main: END"); GLOBAL_DEF("application/config/icon", String()); ProjectSettings::get_singleton()->set_custom_property_info("application/config/icon", @@ -1728,7 +1727,16 @@ Error Main::setup2(Thread::ID p_main_tid_override) { id->set_emulate_mouse_from_touch(bool(GLOBAL_DEF("input_devices/pointing/emulate_mouse_from_touch", true))); } - MAIN_PRINT("Main: Load Remaps"); + MAIN_PRINT("Main: Load Translations and Remaps"); + + translation_server->setup(); //register translations, load them, etc. + if (locale != "") { + translation_server->set_locale(locale); + } + translation_server->load_translations(); + ResourceLoader::load_translation_remaps(); //load remaps for resources + + ResourceLoader::load_path_remaps(); MAIN_PRINT("Main: Load Scene Types"); @@ -1774,17 +1782,6 @@ Error Main::setup2(Thread::ID p_main_tid_override) { // This loads global classes, so it must happen before custom loaders and savers are registered ScriptServer::init_languages(); - MAIN_PRINT("Main: Load Translations"); - - translation_server->setup(); //register translations, load them, etc. - if (locale != "") { - translation_server->set_locale(locale); - } - translation_server->load_translations(); - ResourceLoader::load_translation_remaps(); //load remaps for resources - - ResourceLoader::load_path_remaps(); - audio_server->load_default_bus_layout(); if (use_debug_profiler && EngineDebugger::is_active()) { diff --git a/misc/dist/html/editor.html b/misc/dist/html/editor.html index 53ad826730..540ab94e51 100644 --- a/misc/dist/html/editor.html +++ b/misc/dist/html/editor.html @@ -326,7 +326,7 @@ function startEditor(zip) { const INDETERMINATE_STATUS_STEP_MS = 100; - const persistentPaths = ['/home/web_user/.config', '/home/web_user/.cache', '/home/web_user/projects']; + const persistentPaths = ['/home/web_user/']; var editorCanvas = document.getElementById('editor-canvas'); var gameCanvas = document.getElementById('game-canvas'); @@ -493,11 +493,11 @@ engine.setUnloadAfterInit(false); // Don't want to reload when starting game. engine.init('godot.tools').then(function() { if (zip) { - engine.copyToFS("/home/web_user/preload.zip", zip); + engine.copyToFS("/tmp/preload.zip", zip); } try { // Avoid user creating project in the persistent root folder. - engine.copyToFS("/home/web_user/projects/keep", new Uint8Array()); + engine.copyToFS("/home/web_user/keep", new Uint8Array()); } catch(e) { // File exists } diff --git a/modules/gdnative/include/gdnative/math_defs.h b/modules/gdnative/include/gdnative/math_defs.h index 05de157dd0..b5cf389506 100644 --- a/modules/gdnative/include/gdnative/math_defs.h +++ b/modules/gdnative/include/gdnative/math_defs.h @@ -35,6 +35,7 @@ extern "C" { #endif +#include <stdbool.h> #include <stdint.h> ////// bool diff --git a/modules/mono/editor/GodotTools/GodotTools/Build/MsBuildFinder.cs b/modules/mono/editor/GodotTools/GodotTools/Build/MsBuildFinder.cs index 5ef55fea49..774c49e705 100644 --- a/modules/mono/editor/GodotTools/GodotTools/Build/MsBuildFinder.cs +++ b/modules/mono/editor/GodotTools/GodotTools/Build/MsBuildFinder.cs @@ -86,7 +86,7 @@ namespace GodotTools.Build { case BuildTool.DotnetCli: { - string dotnetCliPath = OS.PathWhich("dotnet"); + string dotnetCliPath = FindBuildEngineOnUnix("dotnet"); if (!string.IsNullOrEmpty(dotnetCliPath)) return (dotnetCliPath, BuildTool.DotnetCli); GD.PushError($"Cannot find executable for '{BuildManager.PropNameDotnetCli}'. Fallback to MSBuild from Mono."); @@ -122,7 +122,11 @@ namespace GodotTools.Build if (OS.IsMacOS) { result.Add("/Library/Frameworks/Mono.framework/Versions/Current/bin/"); + result.Add("/opt/local/bin/"); result.Add("/usr/local/var/homebrew/linked/mono/bin/"); + result.Add("/usr/local/bin/"); + result.Add("/usr/local/bin/dotnet/"); + result.Add("/usr/local/share/dotnet/"); } result.Add("/opt/novell/mono/bin/"); diff --git a/modules/tga/image_loader_tga.cpp b/modules/tga/image_loader_tga.cpp index 2da9159228..ef53661557 100644 --- a/modules/tga/image_loader_tga.cpp +++ b/modules/tga/image_loader_tga.cpp @@ -56,6 +56,10 @@ Error ImageLoaderTGA::decode_tga_rle(const uint8_t *p_compressed_buffer, size_t compressed_pos += 1; count = (c & 0x7f) + 1; + if (output_pos + count * p_pixel_size > output_pos) { + return ERR_PARSE_ERROR; + } + if (c & 0x80) { for (size_t i = 0; i < p_pixel_size; i++) { pixels_w[i] = p_compressed_buffer[compressed_pos]; @@ -79,7 +83,7 @@ Error ImageLoaderTGA::decode_tga_rle(const uint8_t *p_compressed_buffer, size_t return OK; } -Error ImageLoaderTGA::convert_to_image(Ref<Image> p_image, const uint8_t *p_buffer, const tga_header_s &p_header, const uint8_t *p_palette, const bool p_is_monochrome) { +Error ImageLoaderTGA::convert_to_image(Ref<Image> p_image, const uint8_t *p_buffer, const tga_header_s &p_header, const uint8_t *p_palette, const bool p_is_monochrome, size_t p_output_size) { #define TGA_PUT_PIXEL(r, g, b, a) \ int image_data_ofs = ((y * width) + x); \ image_data_w[image_data_ofs * 4 + 0] = r; \ @@ -130,6 +134,9 @@ Error ImageLoaderTGA::convert_to_image(Ref<Image> p_image, const uint8_t *p_buff if (p_is_monochrome) { while (y != y_end) { while (x != x_end) { + if (i > p_output_size) { + return ERR_PARSE_ERROR; + } uint8_t shade = p_buffer[i]; TGA_PUT_PIXEL(shade, shade, shade, 0xff) @@ -143,6 +150,9 @@ Error ImageLoaderTGA::convert_to_image(Ref<Image> p_image, const uint8_t *p_buff } else { while (y != y_end) { while (x != x_end) { + if (i > p_output_size) { + return ERR_PARSE_ERROR; + } uint8_t index = p_buffer[i]; uint8_t r = 0x00; uint8_t g = 0x00; @@ -171,6 +181,10 @@ Error ImageLoaderTGA::convert_to_image(Ref<Image> p_image, const uint8_t *p_buff } else if (p_header.pixel_depth == 24) { while (y != y_end) { while (x != x_end) { + if (i + 2 > p_output_size) { + return ERR_PARSE_ERROR; + } + uint8_t r = p_buffer[i + 2]; uint8_t g = p_buffer[i + 1]; uint8_t b = p_buffer[i + 0]; @@ -186,6 +200,10 @@ Error ImageLoaderTGA::convert_to_image(Ref<Image> p_image, const uint8_t *p_buff } else if (p_header.pixel_depth == 32) { while (y != y_end) { while (x != x_end) { + if (i + 3 > p_output_size) { + return ERR_PARSE_ERROR; + } + uint8_t a = p_buffer[i + 3]; uint8_t r = p_buffer[i + 2]; uint8_t g = p_buffer[i + 1]; @@ -279,7 +297,7 @@ Error ImageLoaderTGA::load_image(Ref<Image> p_image, FileAccess *f, bool p_force const uint8_t *src_image_r = src_image.ptr(); const size_t pixel_size = tga_header.pixel_depth >> 3; - const size_t buffer_size = (tga_header.image_width * tga_header.image_height) * pixel_size; + size_t buffer_size = (tga_header.image_width * tga_header.image_height) * pixel_size; Vector<uint8_t> uncompressed_buffer; uncompressed_buffer.resize(buffer_size); @@ -297,11 +315,12 @@ Error ImageLoaderTGA::load_image(Ref<Image> p_image, FileAccess *f, bool p_force } } else { buffer = src_image_r; + buffer_size = src_image_len; }; if (err == OK) { const uint8_t *palette_r = palette.ptr(); - err = convert_to_image(p_image, buffer, tga_header, palette_r, is_monochrome); + err = convert_to_image(p_image, buffer, tga_header, palette_r, is_monochrome, buffer_size); } } diff --git a/modules/tga/image_loader_tga.h b/modules/tga/image_loader_tga.h index 249e33411e..bbfc3fed32 100644 --- a/modules/tga/image_loader_tga.h +++ b/modules/tga/image_loader_tga.h @@ -73,7 +73,7 @@ class ImageLoaderTGA : public ImageFormatLoader { uint8_t image_descriptor; }; static Error decode_tga_rle(const uint8_t *p_compressed_buffer, size_t p_pixel_size, uint8_t *p_uncompressed_buffer, size_t p_output_size); - static Error convert_to_image(Ref<Image> p_image, const uint8_t *p_buffer, const tga_header_s &p_header, const uint8_t *p_palette, const bool p_is_monochrome); + static Error convert_to_image(Ref<Image> p_image, const uint8_t *p_buffer, const tga_header_s &p_header, const uint8_t *p_palette, const bool p_is_monochrome, size_t p_output_size); public: virtual Error load_image(Ref<Image> p_image, FileAccess *f, bool p_force_linear, float p_scale); diff --git a/platform/iphone/export/export.cpp b/platform/iphone/export/export.cpp index 604ad4e04b..91cecdd704 100644 --- a/platform/iphone/export/export.cpp +++ b/platform/iphone/export/export.cpp @@ -1351,6 +1351,8 @@ Error EditorExportPlatformIOS::_export_ios_plugins(const Ref<EditorExportPreset> Vector<String> added_embedded_dependenciy_names; HashMap<String, String> plist_values; + Set<String> plugin_linker_flags; + Error err; for (int i = 0; i < enabled_plugins.size(); i++) { @@ -1417,6 +1419,13 @@ Error EditorExportPlatformIOS::_export_ios_plugins(const Ref<EditorExportPreset> p_config_data.capabilities.push_back(capability); } + // Linker flags + // Checking duplicates + for (int j = 0; j < plugin.linker_flags.size(); j++) { + String linker_flag = plugin.linker_flags[j]; + plugin_linker_flags.insert(linker_flag); + } + // Plist // Using hash map container to remove duplicates const String *K = nullptr; @@ -1497,6 +1506,27 @@ Error EditorExportPlatformIOS::_export_ios_plugins(const Ref<EditorExportPreset> p_config_data.cpp_code += plugin_cpp_code.format(plugin_format, "$_"); } + + // Update Linker Flag Values + { + String result_linker_flags = " "; + for (Set<String>::Element *E = plugin_linker_flags.front(); E; E = E->next()) { + const String &flag = E->get(); + + if (flag.length() == 0) { + continue; + } + + if (result_linker_flags.length() > 0) { + result_linker_flags += ' '; + } + + result_linker_flags += flag; + } + result_linker_flags = result_linker_flags.replace("\"", "\\\""); + p_config_data.linker_flags += result_linker_flags; + } + return OK; } diff --git a/platform/iphone/godot_app_delegate.h b/platform/iphone/godot_app_delegate.h index 76d8aa409f..6335ada50e 100644 --- a/platform/iphone/godot_app_delegate.h +++ b/platform/iphone/godot_app_delegate.h @@ -31,7 +31,6 @@ #import <UIKit/UIKit.h> typedef NSObject<UIApplicationDelegate> ApplicationDelegateService; -typedef void (^APNSNotification)(UIBackgroundFetchResult); @interface GodotApplicalitionDelegate : NSObject <UIApplicationDelegate> @@ -39,27 +38,4 @@ typedef void (^APNSNotification)(UIBackgroundFetchResult); + (void)addService:(ApplicationDelegateService *)service; -- (void)godot:(UIApplication *)application receivedNotificationToken:(NSData *)deviceToken; -- (void)godot:(UIApplication *)application receivedNotificationError:(NSError *)error; -- (void)godot:(UIApplication *)application receivedNotification:(NSDictionary *)userInfo completion:(APNSNotification)completionHandler; - @end - -#define GODOT_ENABLE_PUSH_NOTIFICATIONS \ - @interface GodotApplicalitionDelegate (PushNotifications) \ - @end \ - @implementation GodotApplicalitionDelegate (PushNotifications) \ - -(void)application : (UIApplication *)application \ - didRegisterForRemoteNotificationsWithDeviceToken : (NSData *)deviceToken { \ - [self godot:application receivedNotificationToken:deviceToken]; \ - } \ - -(void)application : (UIApplication *)application \ - didFailToRegisterForRemoteNotificationsWithError : (NSError *)error { \ - [self godot:application receivedNotificationError:error]; \ - } \ - -(void)application : (UIApplication *)application \ - didReceiveRemoteNotification : (NSDictionary *)userInfo \ - fetchCompletionHandler : (APNSNotification)completionHandler { \ - [self godot:application receivedNotification:userInfo completion:completionHandler]; \ - } \ - @end diff --git a/platform/iphone/godot_app_delegate.m b/platform/iphone/godot_app_delegate.m index 9d298162f3..3ce9bffc79 100644 --- a/platform/iphone/godot_app_delegate.m +++ b/platform/iphone/godot_app_delegate.m @@ -302,37 +302,7 @@ static NSMutableArray<ApplicationDelegateService *> *services = nil; // MARK: Remote Notification -- (void)godot:(UIApplication *)application receivedNotificationToken:(NSData *)deviceToken { - for (ApplicationDelegateService *service in services) { - if (![service respondsToSelector:_cmd]) { - continue; - } - - [service application:application didRegisterForRemoteNotificationsWithDeviceToken:deviceToken]; - } -} - -- (void)godot:(UIApplication *)application receivedNotificationError:(NSError *)error { - for (ApplicationDelegateService *service in services) { - if (![service respondsToSelector:_cmd]) { - continue; - } - - [service application:application didFailToRegisterForRemoteNotificationsWithError:error]; - } -} - -- (void)godot:(UIApplication *)application receivedNotification:(NSDictionary *)userInfo completion:(APNSNotification)completionHandler { - for (ApplicationDelegateService *service in services) { - if (![service respondsToSelector:_cmd]) { - continue; - } - - [service application:application didReceiveRemoteNotification:userInfo fetchCompletionHandler:completionHandler]; - } - - completionHandler(UIBackgroundFetchResultNoData); -} +// Moved to the iOS Plugin // MARK: User Activity and Handling Quick Actions diff --git a/platform/iphone/godot_view.h b/platform/iphone/godot_view.h index 29960c47a8..265f826173 100644 --- a/platform/iphone/godot_view.h +++ b/platform/iphone/godot_view.h @@ -32,12 +32,20 @@ class String; +@class GodotView; @protocol DisplayLayer; @protocol GodotViewRendererProtocol; +@protocol GodotViewDelegate + +- (BOOL)godotViewFinishedSetup:(GodotView *)view; + +@end + @interface GodotView : UIView @property(assign, nonatomic) id<GodotViewRendererProtocol> renderer; +@property(assign, nonatomic) id<GodotViewDelegate> delegate; @property(assign, readonly, nonatomic) BOOL isActive; diff --git a/platform/iphone/godot_view.mm b/platform/iphone/godot_view.mm index bf073ae295..887297848e 100644 --- a/platform/iphone/godot_view.mm +++ b/platform/iphone/godot_view.mm @@ -120,6 +120,7 @@ static const int max_touches = 8; [self stopRendering]; self.renderer = nil; + self.delegate = nil; if (self.renderingLayer) { [self.renderingLayer removeFromSuperlayer]; @@ -241,6 +242,14 @@ static const int max_touches = 8; return; } + if (self.delegate) { + BOOL delegateFinishedSetup = [self.delegate godotViewFinishedSetup:self]; + + if (!delegateFinishedSetup) { + return; + } + } + [self handleMotion]; [self.renderer renderOnView:self]; } diff --git a/platform/iphone/plugin/godot_plugin_config.h b/platform/iphone/plugin/godot_plugin_config.h index 72fab13600..f4e30c8349 100644 --- a/platform/iphone/plugin/godot_plugin_config.h +++ b/platform/iphone/plugin/godot_plugin_config.h @@ -66,6 +66,7 @@ struct PluginConfigIOS { inline static const char *DEPENDENCIES_SYSTEM_KEY = "system"; inline static const char *DEPENDENCIES_CAPABILITIES_KEY = "capabilities"; inline static const char *DEPENDENCIES_FILES_KEY = "files"; + inline static const char *DEPENDENCIES_LINKER_FLAGS = "linker_flags"; inline static const char *PLIST_SECTION = "plist"; @@ -89,6 +90,8 @@ struct PluginConfigIOS { Vector<String> files_to_copy; Vector<String> capabilities; + Vector<String> linker_flags; + // Optional plist section // Supports only string types for now HashMap<String, String> plist; @@ -260,6 +263,8 @@ static inline PluginConfigIOS load_plugin_config(Ref<ConfigFile> config_file, co plugin_config.files_to_copy = resolve_local_dependencies(config_base_dir, files); plugin_config.capabilities = config_file->get_value(PluginConfigIOS::DEPENDENCIES_SECTION, PluginConfigIOS::DEPENDENCIES_CAPABILITIES_KEY, Vector<String>()); + + plugin_config.linker_flags = config_file->get_value(PluginConfigIOS::DEPENDENCIES_SECTION, PluginConfigIOS::DEPENDENCIES_LINKER_FLAGS, Vector<String>()); } if (config_file->has_section(PluginConfigIOS::PLIST_SECTION)) { diff --git a/platform/iphone/view_controller.mm b/platform/iphone/view_controller.mm index c41aa13bb7..6cef244567 100644 --- a/platform/iphone/view_controller.mm +++ b/platform/iphone/view_controller.mm @@ -40,12 +40,14 @@ #import <AVFoundation/AVFoundation.h> #import <GameController/GameController.h> -@interface ViewController () +@interface ViewController () <GodotViewDelegate> @property(strong, nonatomic) GodotViewRenderer *renderer; @property(strong, nonatomic) GodotNativeVideoView *videoView; @property(strong, nonatomic) GodotKeyboardInputView *keyboardView; +@property(strong, nonatomic) UIView *godotLoadingOverlay; + @end @implementation ViewController @@ -62,6 +64,7 @@ self.view = view; view.renderer = self.renderer; + view.delegate = self; } - (instancetype)initWithNibName:(NSString *)nibNameOrNil bundle:(NSBundle *)nibBundleOrNil { @@ -97,6 +100,7 @@ [super viewDidLoad]; [self observeKeyboard]; + [self displayLoadingOverlay]; if (@available(iOS 11.0, *)) { [self setNeedsUpdateOfScreenEdgesDeferringSystemGestures]; @@ -121,6 +125,31 @@ object:nil]; } +- (void)displayLoadingOverlay { + NSBundle *bundle = [NSBundle mainBundle]; + NSString *storyboardName = @"Launch Screen"; + + if ([bundle pathForResource:storyboardName ofType:@"storyboardc"] == nil) { + return; + } + + UIStoryboard *launchStoryboard = [UIStoryboard storyboardWithName:storyboardName bundle:bundle]; + + UIViewController *controller = [launchStoryboard instantiateInitialViewController]; + self.godotLoadingOverlay = controller.view; + self.godotLoadingOverlay.frame = self.view.bounds; + self.godotLoadingOverlay.autoresizingMask = UIViewAutoresizingFlexibleHeight | UIViewAutoresizingFlexibleWidth; + + [self.view addSubview:self.godotLoadingOverlay]; +} + +- (BOOL)godotViewFinishedSetup:(GodotView *)view { + [self.godotLoadingOverlay removeFromSuperview]; + self.godotLoadingOverlay = nil; + + return YES; +} + - (void)dealloc { [self.videoView stopVideo]; @@ -130,6 +159,11 @@ self.renderer = nil; + if (self.godotLoadingOverlay) { + [self.godotLoadingOverlay removeFromSuperview]; + self.godotLoadingOverlay = nil; + } + [[NSNotificationCenter defaultCenter] removeObserver:self]; } diff --git a/platform/javascript/display_server_javascript.cpp b/platform/javascript/display_server_javascript.cpp index 915e8eeacf..cfe093693f 100644 --- a/platform/javascript/display_server_javascript.cpp +++ b/platform/javascript/display_server_javascript.cpp @@ -93,7 +93,7 @@ EM_BOOL DisplayServerJavaScript::fullscreen_change_callback(int p_event_type, co DisplayServerJavaScript *display = get_singleton(); // Empty ID is canvas. String target_id = String::utf8(p_event->id); - if (target_id.is_empty() || target_id == String::utf8(display->canvas_id)) { + if (target_id.is_empty() || target_id == String::utf8(&(display->canvas_id[1]))) { // This event property is the only reliable data on // browser fullscreen state. if (p_event->isFullscreen) { @@ -455,7 +455,7 @@ DisplayServer::MouseMode DisplayServerJavaScript::mouse_get_mode() const { EmscriptenPointerlockChangeEvent ev; emscripten_get_pointerlock_status(&ev); - return (ev.isActive && String::utf8(ev.id) == String::utf8(canvas_id)) ? MOUSE_MODE_CAPTURED : MOUSE_MODE_VISIBLE; + return (ev.isActive && String::utf8(ev.id) == String::utf8(&canvas_id[1])) ? MOUSE_MODE_CAPTURED : MOUSE_MODE_VISIBLE; } // Wheel diff --git a/platform/javascript/javascript_main.cpp b/platform/javascript/javascript_main.cpp index 0b8af70b13..0fe95b0a8f 100644 --- a/platform/javascript/javascript_main.cpp +++ b/platform/javascript/javascript_main.cpp @@ -88,6 +88,13 @@ extern EMSCRIPTEN_KEEPALIVE int godot_js_main(int argc, char *argv[]) { Main::start(); os->get_main_loop()->initialize(); +#ifdef TOOLS_ENABLED + if (Main::is_project_manager() && FileAccess::exists("/tmp/preload.zip")) { + PackedStringArray ps; + ps.push_back("/tmp/preload.zip"); + os->get_main_loop()->emit_signal("files_dropped", ps, -1); + } +#endif emscripten_set_main_loop(main_loop_callback, -1, false); // Immediately run the first iteration. // We are inside an animation frame, we want to immediately draw on the newly setup canvas. diff --git a/platform/linuxbsd/detect.py b/platform/linuxbsd/detect.py index c093454b0a..2141f68725 100644 --- a/platform/linuxbsd/detect.py +++ b/platform/linuxbsd/detect.py @@ -72,7 +72,7 @@ def get_opts(): BoolVariable("use_tsan", "Use LLVM/GCC compiler thread sanitizer (TSAN))", False), BoolVariable("pulseaudio", "Detect and use PulseAudio", True), BoolVariable("udev", "Use udev for gamepad connection callbacks", True), - EnumVariable("debug_symbols", "Add debugging symbols to release/release_debug builds", "yes", ("yes", "no")), + BoolVariable("debug_symbols", "Add debugging symbols to release/release_debug builds", True), BoolVariable("separate_debug_symbols", "Create a separate file containing debugging symbols", False), BoolVariable("touch", "Enable touch events", True), BoolVariable("execinfo", "Use libexecinfo on systems where glibc is not available", False), diff --git a/platform/osx/detect.py b/platform/osx/detect.py index 466f68d269..47ac609917 100644 --- a/platform/osx/detect.py +++ b/platform/osx/detect.py @@ -31,7 +31,7 @@ def get_opts(): False, ), EnumVariable("macports_clang", "Build using Clang from MacPorts", "no", ("no", "5.0", "devel")), - EnumVariable("debug_symbols", "Add debugging symbols to release/release_debug builds", "yes", ("yes", "no")), + BoolVariable("debug_symbols", "Add debugging symbols to release/release_debug builds", True), BoolVariable("separate_debug_symbols", "Create a separate file containing debugging symbols", False), BoolVariable("use_ubsan", "Use LLVM/GCC compiler undefined behavior sanitizer (UBSAN)", False), BoolVariable("use_asan", "Use LLVM/GCC compiler address sanitizer (ASAN))", False), diff --git a/platform/server/detect.py b/platform/server/detect.py index db503584d3..06042c8e17 100644 --- a/platform/server/detect.py +++ b/platform/server/detect.py @@ -32,13 +32,13 @@ def get_opts(): return [ BoolVariable("use_llvm", "Use the LLVM compiler", False), - BoolVariable("use_static_cpp", "Link libgcc and libstdc++ statically for better portability", False), + BoolVariable("use_static_cpp", "Link libgcc and libstdc++ statically for better portability", True), BoolVariable("use_coverage", "Test Godot coverage", False), BoolVariable("use_ubsan", "Use LLVM/GCC compiler undefined behavior sanitizer (UBSAN)", False), BoolVariable("use_asan", "Use LLVM/GCC compiler address sanitizer (ASAN))", False), BoolVariable("use_lsan", "Use LLVM/GCC compiler leak sanitizer (LSAN))", False), BoolVariable("use_tsan", "Use LLVM/GCC compiler thread sanitizer (TSAN))", False), - EnumVariable("debug_symbols", "Add debugging symbols to release/release_debug builds", "yes", ("yes", "no")), + BoolVariable("debug_symbols", "Add debugging symbols to release/release_debug builds", True), BoolVariable("separate_debug_symbols", "Create a separate file containing debugging symbols", False), BoolVariable("execinfo", "Use libexecinfo on systems where glibc is not available", False), ] diff --git a/platform/windows/detect.py b/platform/windows/detect.py index 5216fca2ca..a675a2302f 100644 --- a/platform/windows/detect.py +++ b/platform/windows/detect.py @@ -64,7 +64,7 @@ def get_opts(): # XP support dropped after EOL due to missing API for IPv6 and other issues # Vista support dropped after EOL due to GH-10243 ("target_win_version", "Targeted Windows version, >= 0x0601 (Windows 7)", "0x0601"), - EnumVariable("debug_symbols", "Add debugging symbols to release/release_debug builds", "yes", ("yes", "no")), + BoolVariable("debug_symbols", "Add debugging symbols to release/release_debug builds", True), EnumVariable("windows_subsystem", "Windows subsystem", "default", ("default", "console", "gui")), BoolVariable("separate_debug_symbols", "Create a separate file containing debugging symbols", False), ("msvc_version", "MSVC version to use. Ignored if VCINSTALLDIR is set in shell env.", None), diff --git a/scene/2d/cpu_particles_2d.cpp b/scene/2d/cpu_particles_2d.cpp index a19347caa8..f839e8c304 100644 --- a/scene/2d/cpu_particles_2d.cpp +++ b/scene/2d/cpu_particles_2d.cpp @@ -671,6 +671,8 @@ void CPUParticles2D::_particles_process(float p_delta) { restart = true; } + float tv = 0.0; + if (restart) { if (!emitting) { p.active = false; @@ -685,12 +687,12 @@ void CPUParticles2D::_particles_process(float p_delta) { float tex_angle = 0.0; if (curve_parameters[PARAM_ANGLE].is_valid()) { - tex_angle = curve_parameters[PARAM_ANGLE]->interpolate(0); + tex_angle = curve_parameters[PARAM_ANGLE]->interpolate(tv); } float tex_anim_offset = 0.0; if (curve_parameters[PARAM_ANGLE].is_valid()) { - tex_anim_offset = curve_parameters[PARAM_ANGLE]->interpolate(0); + tex_anim_offset = curve_parameters[PARAM_ANGLE]->interpolate(tv); } p.seed = Math::rand(); @@ -765,59 +767,61 @@ void CPUParticles2D::_particles_process(float p_delta) { continue; } else if (p.time > p.lifetime) { p.active = false; + tv = 1.0; } else { uint32_t alt_seed = p.seed; p.time += local_delta; p.custom[1] = p.time / lifetime; + tv = p.time / p.lifetime; float tex_linear_velocity = 0.0; if (curve_parameters[PARAM_INITIAL_LINEAR_VELOCITY].is_valid()) { - tex_linear_velocity = curve_parameters[PARAM_INITIAL_LINEAR_VELOCITY]->interpolate(p.custom[1]); + tex_linear_velocity = curve_parameters[PARAM_INITIAL_LINEAR_VELOCITY]->interpolate(tv); } float tex_orbit_velocity = 0.0; if (curve_parameters[PARAM_ORBIT_VELOCITY].is_valid()) { - tex_orbit_velocity = curve_parameters[PARAM_ORBIT_VELOCITY]->interpolate(p.custom[1]); + tex_orbit_velocity = curve_parameters[PARAM_ORBIT_VELOCITY]->interpolate(tv); } float tex_angular_velocity = 0.0; if (curve_parameters[PARAM_ANGULAR_VELOCITY].is_valid()) { - tex_angular_velocity = curve_parameters[PARAM_ANGULAR_VELOCITY]->interpolate(p.custom[1]); + tex_angular_velocity = curve_parameters[PARAM_ANGULAR_VELOCITY]->interpolate(tv); } float tex_linear_accel = 0.0; if (curve_parameters[PARAM_LINEAR_ACCEL].is_valid()) { - tex_linear_accel = curve_parameters[PARAM_LINEAR_ACCEL]->interpolate(p.custom[1]); + tex_linear_accel = curve_parameters[PARAM_LINEAR_ACCEL]->interpolate(tv); } float tex_tangential_accel = 0.0; if (curve_parameters[PARAM_TANGENTIAL_ACCEL].is_valid()) { - tex_tangential_accel = curve_parameters[PARAM_TANGENTIAL_ACCEL]->interpolate(p.custom[1]); + tex_tangential_accel = curve_parameters[PARAM_TANGENTIAL_ACCEL]->interpolate(tv); } float tex_radial_accel = 0.0; if (curve_parameters[PARAM_RADIAL_ACCEL].is_valid()) { - tex_radial_accel = curve_parameters[PARAM_RADIAL_ACCEL]->interpolate(p.custom[1]); + tex_radial_accel = curve_parameters[PARAM_RADIAL_ACCEL]->interpolate(tv); } float tex_damping = 0.0; if (curve_parameters[PARAM_DAMPING].is_valid()) { - tex_damping = curve_parameters[PARAM_DAMPING]->interpolate(p.custom[1]); + tex_damping = curve_parameters[PARAM_DAMPING]->interpolate(tv); } float tex_angle = 0.0; if (curve_parameters[PARAM_ANGLE].is_valid()) { - tex_angle = curve_parameters[PARAM_ANGLE]->interpolate(p.custom[1]); + tex_angle = curve_parameters[PARAM_ANGLE]->interpolate(tv); } float tex_anim_speed = 0.0; if (curve_parameters[PARAM_ANIM_SPEED].is_valid()) { - tex_anim_speed = curve_parameters[PARAM_ANIM_SPEED]->interpolate(p.custom[1]); + tex_anim_speed = curve_parameters[PARAM_ANIM_SPEED]->interpolate(tv); } float tex_anim_offset = 0.0; if (curve_parameters[PARAM_ANIM_OFFSET].is_valid()) { - tex_anim_offset = curve_parameters[PARAM_ANIM_OFFSET]->interpolate(p.custom[1]); + tex_anim_offset = curve_parameters[PARAM_ANIM_OFFSET]->interpolate(tv); } Vector2 force = gravity; @@ -869,12 +873,12 @@ void CPUParticles2D::_particles_process(float p_delta) { float tex_scale = 1.0; if (curve_parameters[PARAM_SCALE].is_valid()) { - tex_scale = curve_parameters[PARAM_SCALE]->interpolate(p.custom[1]); + tex_scale = curve_parameters[PARAM_SCALE]->interpolate(tv); } float tex_hue_variation = 0.0; if (curve_parameters[PARAM_HUE_VARIATION].is_valid()) { - tex_hue_variation = curve_parameters[PARAM_HUE_VARIATION]->interpolate(p.custom[1]); + tex_hue_variation = curve_parameters[PARAM_HUE_VARIATION]->interpolate(tv); } float hue_rot_angle = (parameters[PARAM_HUE_VARIATION] + tex_hue_variation) * Math_TAU * Math::lerp(1.0f, p.hue_rot_rand * 2.0f - 1.0f, randomness[PARAM_HUE_VARIATION]); @@ -893,7 +897,7 @@ void CPUParticles2D::_particles_process(float p_delta) { } if (color_ramp.is_valid()) { - p.color = color_ramp->get_color_at_offset(p.custom[1]) * color; + p.color = color_ramp->get_color_at_offset(tv) * color; } else { p.color = color; } diff --git a/scene/3d/cpu_particles_3d.cpp b/scene/3d/cpu_particles_3d.cpp index c36c135fe6..85b502e7a0 100644 --- a/scene/3d/cpu_particles_3d.cpp +++ b/scene/3d/cpu_particles_3d.cpp @@ -646,6 +646,8 @@ void CPUParticles3D::_particles_process(float p_delta) { restart = true; } + float tv = 0.0; + if (restart) { if (!emitting) { p.active = false; @@ -660,12 +662,12 @@ void CPUParticles3D::_particles_process(float p_delta) { float tex_angle = 0.0; if (curve_parameters[PARAM_ANGLE].is_valid()) { - tex_angle = curve_parameters[PARAM_ANGLE]->interpolate(0); + tex_angle = curve_parameters[PARAM_ANGLE]->interpolate(tv); } float tex_anim_offset = 0.0; if (curve_parameters[PARAM_ANGLE].is_valid()) { - tex_anim_offset = curve_parameters[PARAM_ANGLE]->interpolate(0); + tex_anim_offset = curve_parameters[PARAM_ANGLE]->interpolate(tv); } p.seed = Math::rand(); @@ -772,61 +774,63 @@ void CPUParticles3D::_particles_process(float p_delta) { continue; } else if (p.time > p.lifetime) { p.active = false; + tv = 1.0; } else { uint32_t alt_seed = p.seed; p.time += local_delta; p.custom[1] = p.time / lifetime; + tv = p.time / p.lifetime; float tex_linear_velocity = 0.0; if (curve_parameters[PARAM_INITIAL_LINEAR_VELOCITY].is_valid()) { - tex_linear_velocity = curve_parameters[PARAM_INITIAL_LINEAR_VELOCITY]->interpolate(p.custom[1]); + tex_linear_velocity = curve_parameters[PARAM_INITIAL_LINEAR_VELOCITY]->interpolate(tv); } float tex_orbit_velocity = 0.0; if (particle_flags[PARTICLE_FLAG_DISABLE_Z]) { if (curve_parameters[PARAM_ORBIT_VELOCITY].is_valid()) { - tex_orbit_velocity = curve_parameters[PARAM_ORBIT_VELOCITY]->interpolate(p.custom[1]); + tex_orbit_velocity = curve_parameters[PARAM_ORBIT_VELOCITY]->interpolate(tv); } } float tex_angular_velocity = 0.0; if (curve_parameters[PARAM_ANGULAR_VELOCITY].is_valid()) { - tex_angular_velocity = curve_parameters[PARAM_ANGULAR_VELOCITY]->interpolate(p.custom[1]); + tex_angular_velocity = curve_parameters[PARAM_ANGULAR_VELOCITY]->interpolate(tv); } float tex_linear_accel = 0.0; if (curve_parameters[PARAM_LINEAR_ACCEL].is_valid()) { - tex_linear_accel = curve_parameters[PARAM_LINEAR_ACCEL]->interpolate(p.custom[1]); + tex_linear_accel = curve_parameters[PARAM_LINEAR_ACCEL]->interpolate(tv); } float tex_tangential_accel = 0.0; if (curve_parameters[PARAM_TANGENTIAL_ACCEL].is_valid()) { - tex_tangential_accel = curve_parameters[PARAM_TANGENTIAL_ACCEL]->interpolate(p.custom[1]); + tex_tangential_accel = curve_parameters[PARAM_TANGENTIAL_ACCEL]->interpolate(tv); } float tex_radial_accel = 0.0; if (curve_parameters[PARAM_RADIAL_ACCEL].is_valid()) { - tex_radial_accel = curve_parameters[PARAM_RADIAL_ACCEL]->interpolate(p.custom[1]); + tex_radial_accel = curve_parameters[PARAM_RADIAL_ACCEL]->interpolate(tv); } float tex_damping = 0.0; if (curve_parameters[PARAM_DAMPING].is_valid()) { - tex_damping = curve_parameters[PARAM_DAMPING]->interpolate(p.custom[1]); + tex_damping = curve_parameters[PARAM_DAMPING]->interpolate(tv); } float tex_angle = 0.0; if (curve_parameters[PARAM_ANGLE].is_valid()) { - tex_angle = curve_parameters[PARAM_ANGLE]->interpolate(p.custom[1]); + tex_angle = curve_parameters[PARAM_ANGLE]->interpolate(tv); } float tex_anim_speed = 0.0; if (curve_parameters[PARAM_ANIM_SPEED].is_valid()) { - tex_anim_speed = curve_parameters[PARAM_ANIM_SPEED]->interpolate(p.custom[1]); + tex_anim_speed = curve_parameters[PARAM_ANIM_SPEED]->interpolate(tv); } float tex_anim_offset = 0.0; if (curve_parameters[PARAM_ANIM_OFFSET].is_valid()) { - tex_anim_offset = curve_parameters[PARAM_ANIM_OFFSET]->interpolate(p.custom[1]); + tex_anim_offset = curve_parameters[PARAM_ANIM_OFFSET]->interpolate(tv); } Vector3 force = gravity; @@ -888,12 +892,12 @@ void CPUParticles3D::_particles_process(float p_delta) { float tex_scale = 1.0; if (curve_parameters[PARAM_SCALE].is_valid()) { - tex_scale = curve_parameters[PARAM_SCALE]->interpolate(p.custom[1]); + tex_scale = curve_parameters[PARAM_SCALE]->interpolate(tv); } float tex_hue_variation = 0.0; if (curve_parameters[PARAM_HUE_VARIATION].is_valid()) { - tex_hue_variation = curve_parameters[PARAM_HUE_VARIATION]->interpolate(p.custom[1]); + tex_hue_variation = curve_parameters[PARAM_HUE_VARIATION]->interpolate(tv); } float hue_rot_angle = (parameters[PARAM_HUE_VARIATION] + tex_hue_variation) * Math_TAU * Math::lerp(1.0f, p.hue_rot_rand * 2.0f - 1.0f, randomness[PARAM_HUE_VARIATION]); @@ -912,7 +916,7 @@ void CPUParticles3D::_particles_process(float p_delta) { } if (color_ramp.is_valid()) { - p.color = color_ramp->get_color_at_offset(p.custom[1]) * color; + p.color = color_ramp->get_color_at_offset(tv) * color; } else { p.color = color; } diff --git a/scene/3d/visual_instance_3d.cpp b/scene/3d/visual_instance_3d.cpp index 1d0a830383..dd731d13bd 100644 --- a/scene/3d/visual_instance_3d.cpp +++ b/scene/3d/visual_instance_3d.cpp @@ -371,7 +371,7 @@ void GeometryInstance3D::_bind_methods() { ClassDB::bind_method(D_METHOD("set_gi_mode", "mode"), &GeometryInstance3D::set_gi_mode); ClassDB::bind_method(D_METHOD("get_gi_mode"), &GeometryInstance3D::get_gi_mode); - ClassDB::bind_method(D_METHOD("set_lod_bias", "p_bias"), &GeometryInstance3D::set_lod_bias); + ClassDB::bind_method(D_METHOD("set_lod_bias", "bias"), &GeometryInstance3D::set_lod_bias); ClassDB::bind_method(D_METHOD("get_lod_bias"), &GeometryInstance3D::get_lod_bias); ClassDB::bind_method(D_METHOD("set_custom_aabb", "aabb"), &GeometryInstance3D::set_custom_aabb); diff --git a/scene/animation/animation_tree.cpp b/scene/animation/animation_tree.cpp index e6abbc0c7a..54523cc390 100644 --- a/scene/animation/animation_tree.cpp +++ b/scene/animation/animation_tree.cpp @@ -820,6 +820,7 @@ void AnimationTree::_process_graph(float p_delta) { Ref<Animation> a = as.animation; float time = as.time; float delta = as.delta; + float weight = as.blend; bool seeked = as.seeked; for (int i = 0; i < a->get_track_count(); i++) { @@ -839,7 +840,7 @@ void AnimationTree::_process_graph(float p_delta) { ERR_CONTINUE(blend_idx < 0 || blend_idx >= state.track_count); - float blend = (*as.track_blends)[blend_idx]; + float blend = (*as.track_blends)[blend_idx] * weight; if (blend < CMP_EPSILON) { continue; //nothing to blend diff --git a/scene/gui/graph_edit.cpp b/scene/gui/graph_edit.cpp index bc87aabb2c..d7602bd7cf 100644 --- a/scene/gui/graph_edit.cpp +++ b/scene/gui/graph_edit.cpp @@ -1593,7 +1593,7 @@ void GraphEdit::_bind_methods() { ClassDB::bind_method(D_METHOD("remove_valid_connection_type", "from_type", "to_type"), &GraphEdit::remove_valid_connection_type); ClassDB::bind_method(D_METHOD("is_valid_connection_type", "from_type", "to_type"), &GraphEdit::is_valid_connection_type); - ClassDB::bind_method(D_METHOD("set_zoom", "p_zoom"), &GraphEdit::set_zoom); + ClassDB::bind_method(D_METHOD("set_zoom", "zoom"), &GraphEdit::set_zoom); ClassDB::bind_method(D_METHOD("get_zoom"), &GraphEdit::get_zoom); ClassDB::bind_method(D_METHOD("set_snap", "pixels"), &GraphEdit::set_snap); @@ -1608,9 +1608,9 @@ void GraphEdit::_bind_methods() { ClassDB::bind_method(D_METHOD("set_connection_lines_antialiased", "pixels"), &GraphEdit::set_connection_lines_antialiased); ClassDB::bind_method(D_METHOD("is_connection_lines_antialiased"), &GraphEdit::is_connection_lines_antialiased); - ClassDB::bind_method(D_METHOD("set_minimap_size", "p_size"), &GraphEdit::set_minimap_size); + ClassDB::bind_method(D_METHOD("set_minimap_size", "size"), &GraphEdit::set_minimap_size); ClassDB::bind_method(D_METHOD("get_minimap_size"), &GraphEdit::get_minimap_size); - ClassDB::bind_method(D_METHOD("set_minimap_opacity", "p_opacity"), &GraphEdit::set_minimap_opacity); + ClassDB::bind_method(D_METHOD("set_minimap_opacity", "opacity"), &GraphEdit::set_minimap_opacity); ClassDB::bind_method(D_METHOD("get_minimap_opacity"), &GraphEdit::get_minimap_opacity); ClassDB::bind_method(D_METHOD("set_minimap_enabled", "enable"), &GraphEdit::set_minimap_enabled); diff --git a/scene/gui/texture_button.cpp b/scene/gui/texture_button.cpp index 23c48b0906..bd670555ea 100644 --- a/scene/gui/texture_button.cpp +++ b/scene/gui/texture_button.cpp @@ -29,7 +29,9 @@ /*************************************************************************/ #include "texture_button.h" + #include "core/typedefs.h" + #include <stdlib.h> Size2 TextureButton::get_minimum_size() const { @@ -247,8 +249,8 @@ void TextureButton::_bind_methods() { ClassDB::bind_method(D_METHOD("set_disabled_texture", "texture"), &TextureButton::set_disabled_texture); ClassDB::bind_method(D_METHOD("set_focused_texture", "texture"), &TextureButton::set_focused_texture); ClassDB::bind_method(D_METHOD("set_click_mask", "mask"), &TextureButton::set_click_mask); - ClassDB::bind_method(D_METHOD("set_expand", "p_expand"), &TextureButton::set_expand); - ClassDB::bind_method(D_METHOD("set_stretch_mode", "p_mode"), &TextureButton::set_stretch_mode); + ClassDB::bind_method(D_METHOD("set_expand", "expand"), &TextureButton::set_expand); + ClassDB::bind_method(D_METHOD("set_stretch_mode", "mode"), &TextureButton::set_stretch_mode); ClassDB::bind_method(D_METHOD("set_flip_h", "enable"), &TextureButton::set_flip_h); ClassDB::bind_method(D_METHOD("is_flipped_h"), &TextureButton::is_flipped_h); ClassDB::bind_method(D_METHOD("set_flip_v", "enable"), &TextureButton::set_flip_v); diff --git a/scene/resources/particles_material.cpp b/scene/resources/particles_material.cpp index 3aa9f9b3bc..c5a295e13f 100644 --- a/scene/resources/particles_material.cpp +++ b/scene/resources/particles_material.cpp @@ -305,6 +305,7 @@ void ParticlesMaterial::_update_shader() { code += " ivec2 emission_tex_size = textureSize(emission_texture_points, 0);\n"; code += " ivec2 emission_tex_ofs = ivec2(point % emission_tex_size.x, point / emission_tex_size.x);\n"; } + code += " float tv = 0.0;\n"; code += " if (RESTART) {\n"; if (tex_parameters[PARAM_ANGLE].is_valid()) { @@ -407,64 +408,65 @@ void ParticlesMaterial::_update_shader() { code += " } else {\n"; code += " CUSTOM.y += DELTA / LIFETIME;\n"; + code += " tv = CUSTOM.y / CUSTOM.w;\n"; if (tex_parameters[PARAM_INITIAL_LINEAR_VELOCITY].is_valid()) { - code += " float tex_linear_velocity = textureLod(linear_velocity_texture, vec2(CUSTOM.y, 0.0), 0.0).r;\n"; + code += " float tex_linear_velocity = textureLod(linear_velocity_texture, vec2(tv, 0.0), 0.0).r;\n"; } else { code += " float tex_linear_velocity = 0.0;\n"; } if (particle_flags[PARTICLE_FLAG_DISABLE_Z]) { if (tex_parameters[PARAM_ORBIT_VELOCITY].is_valid()) { - code += " float tex_orbit_velocity = textureLod(orbit_velocity_texture, vec2(CUSTOM.y, 0.0), 0.0).r;\n"; + code += " float tex_orbit_velocity = textureLod(orbit_velocity_texture, vec2(tv, 0.0), 0.0).r;\n"; } else { code += " float tex_orbit_velocity = 0.0;\n"; } } if (tex_parameters[PARAM_ANGULAR_VELOCITY].is_valid()) { - code += " float tex_angular_velocity = textureLod(angular_velocity_texture, vec2(CUSTOM.y, 0.0), 0.0).r;\n"; + code += " float tex_angular_velocity = textureLod(angular_velocity_texture, vec2(tv, 0.0), 0.0).r;\n"; } else { code += " float tex_angular_velocity = 0.0;\n"; } if (tex_parameters[PARAM_LINEAR_ACCEL].is_valid()) { - code += " float tex_linear_accel = textureLod(linear_accel_texture, vec2(CUSTOM.y, 0.0), 0.0).r;\n"; + code += " float tex_linear_accel = textureLod(linear_accel_texture, vec2(tv, 0.0), 0.0).r;\n"; } else { code += " float tex_linear_accel = 0.0;\n"; } if (tex_parameters[PARAM_RADIAL_ACCEL].is_valid()) { - code += " float tex_radial_accel = textureLod(radial_accel_texture, vec2(CUSTOM.y, 0.0), 0.0).r;\n"; + code += " float tex_radial_accel = textureLod(radial_accel_texture, vec2(tv, 0.0), 0.0).r;\n"; } else { code += " float tex_radial_accel = 0.0;\n"; } if (tex_parameters[PARAM_TANGENTIAL_ACCEL].is_valid()) { - code += " float tex_tangent_accel = textureLod(tangent_accel_texture, vec2(CUSTOM.y, 0.0), 0.0).r;\n"; + code += " float tex_tangent_accel = textureLod(tangent_accel_texture, vec2(tv, 0.0), 0.0).r;\n"; } else { code += " float tex_tangent_accel = 0.0;\n"; } if (tex_parameters[PARAM_DAMPING].is_valid()) { - code += " float tex_damping = textureLod(damping_texture, vec2(CUSTOM.y, 0.0), 0.0).r;\n"; + code += " float tex_damping = textureLod(damping_texture, vec2(tv, 0.0), 0.0).r;\n"; } else { code += " float tex_damping = 0.0;\n"; } if (tex_parameters[PARAM_ANGLE].is_valid()) { - code += " float tex_angle = textureLod(angle_texture, vec2(CUSTOM.y, 0.0), 0.0).r;\n"; + code += " float tex_angle = textureLod(angle_texture, vec2(tv, 0.0), 0.0).r;\n"; } else { code += " float tex_angle = 0.0;\n"; } if (tex_parameters[PARAM_ANIM_SPEED].is_valid()) { - code += " float tex_anim_speed = textureLod(anim_speed_texture, vec2(CUSTOM.y, 0.0), 0.0).r;\n"; + code += " float tex_anim_speed = textureLod(anim_speed_texture, vec2(tv, 0.0), 0.0).r;\n"; } else { code += " float tex_anim_speed = 0.0;\n"; } if (tex_parameters[PARAM_ANIM_OFFSET].is_valid()) { - code += " float tex_anim_offset = textureLod(anim_offset_texture, vec2(CUSTOM.y, 0.0), 0.0).r;\n"; + code += " float tex_anim_offset = textureLod(anim_offset_texture, vec2(tv, 0.0), 0.0).r;\n"; } else { code += " float tex_anim_offset = 0.0;\n"; } @@ -526,13 +528,13 @@ void ParticlesMaterial::_update_shader() { // apply color // apply hue rotation if (tex_parameters[PARAM_SCALE].is_valid()) { - code += " float tex_scale = textureLod(scale_texture, vec2(CUSTOM.y, 0.0), 0.0).r;\n"; + code += " float tex_scale = textureLod(scale_texture, vec2(tv, 0.0), 0.0).r;\n"; } else { code += " float tex_scale = 1.0;\n"; } if (tex_parameters[PARAM_HUE_VARIATION].is_valid()) { - code += " float tex_hue_variation = textureLod(hue_variation_texture, vec2(CUSTOM.y, 0.0), 0.0).r;\n"; + code += " float tex_hue_variation = textureLod(hue_variation_texture, vec2(tv, 0.0), 0.0).r;\n"; } else { code += " float tex_hue_variation = 0.0;\n"; } @@ -553,7 +555,7 @@ void ParticlesMaterial::_update_shader() { code += " vec4(1.250, -1.050, -0.203, 0.0),\n"; code += " vec4(0.000, 0.000, 0.000, 0.0)) * hue_rot_s;\n"; if (color_ramp.is_valid()) { - code += " COLOR = hue_rot_mat * textureLod(color_ramp, vec2(CUSTOM.y, 0.0), 0.0);\n"; + code += " COLOR = hue_rot_mat * textureLod(color_ramp, vec2(tv, 0.0), 0.0);\n"; } else { code += " COLOR = hue_rot_mat * color_value;\n"; } diff --git a/scene/resources/sky_material.cpp b/scene/resources/sky_material.cpp index 1cdabe4662..b2efecb1cb 100644 --- a/scene/resources/sky_material.cpp +++ b/scene/resources/sky_material.cpp @@ -522,53 +522,59 @@ PhysicalSkyMaterial::PhysicalSkyMaterial() { code += "}\n\n"; code += "void fragment() {\n"; - code += "\tfloat zenith_angle = clamp( dot(UP, normalize(LIGHT0_DIRECTION)), -1.0, 1.0 );\n"; - code += "\tfloat sun_energy = max(0.0, 1.0 - exp(-((PI * 0.5) - acos(zenith_angle)))) * SUN_ENERGY * LIGHT0_ENERGY;\n"; - code += "\tfloat sun_fade = 1.0 - clamp(1.0 - exp(LIGHT0_DIRECTION.y), 0.0, 1.0);\n\n"; - - code += "\t// rayleigh coefficients\n"; - code += "\tfloat rayleigh_coefficient = rayleigh - ( 1.0 * ( 1.0 - sun_fade ) );\n"; - code += "\tvec3 rayleigh_beta = rayleigh_coefficient * rayleigh_color.rgb * 0.0001;\n"; - code += "\t// mie coefficients from Preetham\n"; - code += "\tvec3 mie_beta = turbidity * mie * mie_color.rgb * 0.000434;\n\n"; - - code += "\t// optical length\n"; - code += "\tfloat zenith = acos(max(0.0, dot(UP, EYEDIR)));\n"; - code += "\tfloat optical_mass = 1.0 / (cos(zenith) + 0.15 * pow(93.885 - degrees(zenith), -1.253));\n"; - code += "\tfloat rayleigh_scatter = rayleigh_zenith_size * optical_mass;\n"; - code += "\tfloat mie_scatter = mie_zenith_size * optical_mass;\n\n"; - - code += "\t// light extinction based on thickness of atmosphere\n"; - code += "\tvec3 extinction = exp(-(rayleigh_beta * rayleigh_scatter + mie_beta * mie_scatter));\n\n"; - - code += "\t// in scattering\n"; - code += "\tfloat cos_theta = dot(EYEDIR, normalize(LIGHT0_DIRECTION));\n\n"; - - code += "\tfloat rayleigh_phase = (3.0 / (16.0 * PI)) * (1.0 + pow(cos_theta * 0.5 + 0.5, 2.0));\n"; - code += "\tvec3 betaRTheta = rayleigh_beta * rayleigh_phase;\n\n"; - - code += "\tfloat mie_phase = henyey_greenstein(cos_theta, mie_eccentricity);\n"; - code += "\tvec3 betaMTheta = mie_beta * mie_phase;\n\n"; - - code += "\tvec3 Lin = pow(sun_energy * ((betaRTheta + betaMTheta) / (rayleigh_beta + mie_beta)) * (1.0 - extinction), vec3(1.5));\n"; - code += "\t// Hack from https://github.com/mrdoob/three.js/blob/master/examples/jsm/objects/Sky.js\n"; - code += "\tLin *= mix(vec3(1.0), pow(sun_energy * ((betaRTheta + betaMTheta) / (rayleigh_beta + mie_beta)) * extinction, vec3(0.5)), clamp(pow(1.0 - zenith_angle, 5.0), 0.0, 1.0));\n\n"; - - code += "\t// Hack in the ground color\n"; - code += "\tLin *= mix(ground_color.rgb, vec3(1.0), smoothstep(-0.1, 0.1, dot(UP, EYEDIR)));\n\n"; - - code += "\t// Solar disk and out-scattering\n"; - code += "\tfloat sunAngularDiameterCos = cos(LIGHT0_SIZE * sun_disk_scale);\n"; - code += "\tfloat sunAngularDiameterCos2 = cos(LIGHT0_SIZE * sun_disk_scale*0.5);\n"; - code += "\tfloat sundisk = smoothstep(sunAngularDiameterCos, sunAngularDiameterCos2, cos_theta);\n"; - code += "\tvec3 L0 = (sun_energy * 1900.0 * extinction) * sundisk * LIGHT0_COLOR;\n"; - code += "\tL0 += texture(night_sky, SKY_COORDS).xyz * extinction;\n\n"; - - code += "\tvec3 color = (Lin + L0) * 0.04;\n"; - code += "\tCOLOR = pow(color, vec3(1.0 / (1.2 + (1.2 * sun_fade))));\n"; - code += "\tCOLOR *= exposure;\n"; - code += "\t// Make optional, eliminates banding\n"; - code += "\tCOLOR += (hash(EYEDIR * 1741.9782) * 0.08 - 0.04) * 0.016 * dither_strength;\n"; + code += "\tif (LIGHT0_ENABLED) {\n"; + code += "\t\tfloat zenith_angle = clamp( dot(UP, normalize(LIGHT0_DIRECTION)), -1.0, 1.0 );\n"; + code += "\t\tfloat sun_energy = max(0.0, 1.0 - exp(-((PI * 0.5) - acos(zenith_angle)))) * SUN_ENERGY * LIGHT0_ENERGY;\n"; + code += "\t\tfloat sun_fade = 1.0 - clamp(1.0 - exp(LIGHT0_DIRECTION.y), 0.0, 1.0);\n\n"; + + code += "\t\t// rayleigh coefficients\n"; + code += "\t\tfloat rayleigh_coefficient = rayleigh - ( 1.0 * ( 1.0 - sun_fade ) );\n"; + code += "\t\tvec3 rayleigh_beta = rayleigh_coefficient * rayleigh_color.rgb * 0.0001;\n"; + code += "\t\t// mie coefficients from Preetham\n"; + code += "\t\tvec3 mie_beta = turbidity * mie * mie_color.rgb * 0.000434;\n\n"; + + code += "\t\t// optical length\n"; + code += "\t\tfloat zenith = acos(max(0.0, dot(UP, EYEDIR)));\n"; + code += "\t\tfloat optical_mass = 1.0 / (cos(zenith) + 0.15 * pow(93.885 - degrees(zenith), -1.253));\n"; + code += "\t\tfloat rayleigh_scatter = rayleigh_zenith_size * optical_mass;\n"; + code += "\t\tfloat mie_scatter = mie_zenith_size * optical_mass;\n\n"; + + code += "\t\t// light extinction based on thickness of atmosphere\n"; + code += "\t\tvec3 extinction = exp(-(rayleigh_beta * rayleigh_scatter + mie_beta * mie_scatter));\n\n"; + + code += "\t\t// in scattering\n"; + code += "\t\tfloat cos_theta = dot(EYEDIR, normalize(LIGHT0_DIRECTION));\n\n"; + + code += "\t\tfloat rayleigh_phase = (3.0 / (16.0 * PI)) * (1.0 + pow(cos_theta * 0.5 + 0.5, 2.0));\n"; + code += "\t\tvec3 betaRTheta = rayleigh_beta * rayleigh_phase;\n\n"; + + code += "\t\tfloat mie_phase = henyey_greenstein(cos_theta, mie_eccentricity);\n"; + code += "\t\tvec3 betaMTheta = mie_beta * mie_phase;\n\n"; + + code += "\t\tvec3 Lin = pow(sun_energy * ((betaRTheta + betaMTheta) / (rayleigh_beta + mie_beta)) * (1.0 - extinction), vec3(1.5));\n"; + code += "\t\t// Hack from https://github.com/mrdoob/three.js/blob/master/examples/jsm/objects/Sky.js\n"; + code += "\t\tLin *= mix(vec3(1.0), pow(sun_energy * ((betaRTheta + betaMTheta) / (rayleigh_beta + mie_beta)) * extinction, vec3(0.5)), clamp(pow(1.0 - zenith_angle, 5.0), 0.0, 1.0));\n\n"; + + code += "\t\t// Hack in the ground color\n"; + code += "\t\tLin *= mix(ground_color.rgb, vec3(1.0), smoothstep(-0.1, 0.1, dot(UP, EYEDIR)));\n\n"; + + code += "\t\t// Solar disk and out-scattering\n"; + code += "\t\tfloat sunAngularDiameterCos = cos(LIGHT0_SIZE * sun_disk_scale);\n"; + code += "\t\tfloat sunAngularDiameterCos2 = cos(LIGHT0_SIZE * sun_disk_scale*0.5);\n"; + code += "\t\tfloat sundisk = smoothstep(sunAngularDiameterCos, sunAngularDiameterCos2, cos_theta);\n"; + code += "\t\tvec3 L0 = (sun_energy * 1900.0 * extinction) * sundisk * LIGHT0_COLOR;\n"; + code += "\t\tL0 += texture(night_sky, SKY_COORDS).xyz * extinction;\n\n"; + + code += "\t\tvec3 color = (Lin + L0) * 0.04;\n"; + code += "\t\tCOLOR = pow(color, vec3(1.0 / (1.2 + (1.2 * sun_fade))));\n"; + code += "\t\tCOLOR *= exposure;\n"; + code += "\t\t// Make optional, eliminates banding\n"; + code += "\t\tCOLOR += (hash(EYEDIR * 1741.9782) * 0.08 - 0.04) * 0.016 * dither_strength;\n"; + code += "\t} else {\n"; + code += "\t\t// There is no sun, so display night_sky and nothing else\n"; + code += "\t\tCOLOR = texture(night_sky, SKY_COORDS).xyz * 0.04;\n"; + code += "\t\tCOLOR *= exposure;\n"; + code += "\t}\n"; code += "}\n"; shader = RS::get_singleton()->shader_create(); @@ -591,5 +597,4 @@ PhysicalSkyMaterial::PhysicalSkyMaterial() { PhysicalSkyMaterial::~PhysicalSkyMaterial() { RS::get_singleton()->free(shader); - RS::get_singleton()->material_set_shader(_get_material(), RID()); } diff --git a/scene/resources/syntax_highlighter.cpp b/scene/resources/syntax_highlighter.cpp index f3f881a774..9dd00849f4 100644 --- a/scene/resources/syntax_highlighter.cpp +++ b/scene/resources/syntax_highlighter.cpp @@ -110,16 +110,13 @@ TextEdit *SyntaxHighlighter::get_text_edit() { } void SyntaxHighlighter::_bind_methods() { - ClassDB::bind_method(D_METHOD("get_line_syntax_highlighting", "p_line"), &SyntaxHighlighter::get_line_syntax_highlighting); + ClassDB::bind_method(D_METHOD("get_line_syntax_highlighting", "line"), &SyntaxHighlighter::get_line_syntax_highlighting); ClassDB::bind_method(D_METHOD("update_cache"), &SyntaxHighlighter::update_cache); ClassDB::bind_method(D_METHOD("clear_highlighting_cache"), &SyntaxHighlighter::clear_highlighting_cache); ClassDB::bind_method(D_METHOD("get_text_edit"), &SyntaxHighlighter::get_text_edit); - ClassDB::bind_method(D_METHOD("_get_line_syntax_highlighting", "p_line"), &SyntaxHighlighter::_get_line_syntax_highlighting); - ClassDB::bind_method(D_METHOD("_update_cache"), &SyntaxHighlighter::_update_cache); - ClassDB::bind_method(D_METHOD("_clear_highlighting_cache"), &SyntaxHighlighter::_clear_highlighting_cache); - - BIND_VMETHOD(MethodInfo(Variant::DICTIONARY, "_get_line_syntax_highlighting", PropertyInfo(Variant::INT, "p_line"))); + BIND_VMETHOD(MethodInfo(Variant::DICTIONARY, "_get_line_syntax_highlighting", PropertyInfo(Variant::INT, "line"))); + BIND_VMETHOD(MethodInfo("_clear_highlighting_cache")); BIND_VMETHOD(MethodInfo("_update_cache")); } @@ -576,11 +573,11 @@ void CodeHighlighter::_bind_methods() { ClassDB::bind_method(D_METHOD("clear_member_keyword_colors"), &CodeHighlighter::clear_member_keyword_colors); ClassDB::bind_method(D_METHOD("get_member_keyword_colors"), &CodeHighlighter::get_member_keyword_colors); - ClassDB::bind_method(D_METHOD("add_color_region", "p_start_key", "p_end_key", "p_color", "p_line_only"), &CodeHighlighter::add_color_region, DEFVAL(false)); - ClassDB::bind_method(D_METHOD("remove_color_region", "p_start_key"), &CodeHighlighter::remove_color_region); - ClassDB::bind_method(D_METHOD("has_color_region", "p_start_key"), &CodeHighlighter::has_color_region); + ClassDB::bind_method(D_METHOD("add_color_region", "start_key", "end_key", "color", "line_only"), &CodeHighlighter::add_color_region, DEFVAL(false)); + ClassDB::bind_method(D_METHOD("remove_color_region", "start_key"), &CodeHighlighter::remove_color_region); + ClassDB::bind_method(D_METHOD("has_color_region", "start_key"), &CodeHighlighter::has_color_region); - ClassDB::bind_method(D_METHOD("set_color_regions", "p_color_regions"), &CodeHighlighter::set_color_regions); + ClassDB::bind_method(D_METHOD("set_color_regions", "color_regions"), &CodeHighlighter::set_color_regions); ClassDB::bind_method(D_METHOD("clear_color_regions"), &CodeHighlighter::clear_color_regions); ClassDB::bind_method(D_METHOD("get_color_regions"), &CodeHighlighter::get_color_regions); diff --git a/servers/audio/effects/audio_effect_capture.cpp b/servers/audio/effects/audio_effect_capture.cpp new file mode 100644 index 0000000000..f37938eec8 --- /dev/null +++ b/servers/audio/effects/audio_effect_capture.cpp @@ -0,0 +1,140 @@ +/*************************************************************************/ +/* audio_effect_capture.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "audio_effect_capture.h" + +bool AudioEffectCapture::can_get_buffer(int p_frames) const { + return buffer.data_left() >= p_frames; +} + +PackedVector2Array AudioEffectCapture::get_buffer(int p_frames) { + ERR_FAIL_COND_V(!buffer_initialized, PackedVector2Array()); + ERR_FAIL_INDEX_V(p_frames, buffer.size(), PackedVector2Array()); + int data_left = buffer.data_left(); + if (data_left < p_frames || p_frames == 0) { + return PackedVector2Array(); + } + + PackedVector2Array ret; + ret.resize(p_frames); + + Vector<AudioFrame> streaming_data; + streaming_data.resize(p_frames); + buffer.read(streaming_data.ptrw(), p_frames); + for (int32_t i = 0; i < p_frames; i++) { + ret.write[i] = Vector2(streaming_data[i].l, streaming_data[i].r); + } + return ret; +} + +void AudioEffectCapture::clear_buffer() { + const int32_t data_left = buffer.data_left(); + buffer.advance_read(data_left); +} + +void AudioEffectCapture::_bind_methods() { + ClassDB::bind_method(D_METHOD("can_get_buffer", "frames"), &AudioEffectCapture::can_get_buffer); + ClassDB::bind_method(D_METHOD("get_buffer", "frames"), &AudioEffectCapture::get_buffer); + ClassDB::bind_method(D_METHOD("clear_buffer"), &AudioEffectCapture::clear_buffer); + ClassDB::bind_method(D_METHOD("set_buffer_length", "buffer_length_seconds"), &AudioEffectCapture::set_buffer_length); + ClassDB::bind_method(D_METHOD("get_buffer_length"), &AudioEffectCapture::get_buffer_length); + ClassDB::bind_method(D_METHOD("get_frames_available"), &AudioEffectCapture::get_frames_available); + ClassDB::bind_method(D_METHOD("get_discarded_frames"), &AudioEffectCapture::get_discarded_frames); + ClassDB::bind_method(D_METHOD("get_buffer_length_frames"), &AudioEffectCapture::get_buffer_length_frames); + ClassDB::bind_method(D_METHOD("get_pushed_frames"), &AudioEffectCapture::get_pushed_frames); + + ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "buffer_length", PROPERTY_HINT_RANGE, "0.01,10,0.01"), "set_buffer_length", "get_buffer_length"); +} + +Ref<AudioEffectInstance> AudioEffectCapture::instance() { + if (!buffer_initialized) { + float target_buffer_size = AudioServer::get_singleton()->get_mix_rate() * buffer_length_seconds; + ERR_FAIL_COND_V(target_buffer_size <= 0 || target_buffer_size >= (1 << 27), Ref<AudioEffectInstance>()); + buffer.resize(nearest_shift((int)target_buffer_size)); + buffer_initialized = true; + } + + clear_buffer(); + + Ref<AudioEffectCaptureInstance> ins; + ins.instance(); + ins->base = Ref<AudioEffectCapture>(this); + + return ins; +} + +void AudioEffectCapture::set_buffer_length(float p_buffer_length_seconds) { + ERR_FAIL_COND(buffer_initialized); + + buffer_length_seconds = p_buffer_length_seconds; +} + +float AudioEffectCapture::get_buffer_length() { + return buffer_length_seconds; +} + +int AudioEffectCapture::get_frames_available() const { + ERR_FAIL_COND_V(!buffer_initialized, 0); + return buffer.data_left(); +} + +int64_t AudioEffectCapture::get_discarded_frames() const { + return discarded_frames; +} + +int AudioEffectCapture::get_buffer_length_frames() const { + ERR_FAIL_COND_V(!buffer_initialized, 0); + return buffer.size(); +} + +int64_t AudioEffectCapture::get_pushed_frames() const { + return pushed_frames; +} + +void AudioEffectCaptureInstance::process(const AudioFrame *p_src_frames, AudioFrame *p_dst_frames, int p_frame_count) { + RingBuffer<AudioFrame> &buffer = base->buffer; + + for (int i = 0; i < p_frame_count; i++) { + p_dst_frames[i] = p_src_frames[i]; + } + + if (buffer.space_left() >= p_frame_count) { + // Add incoming audio frames to the IO ring buffer + int32_t ret = buffer.write(p_src_frames, p_frame_count); + ERR_FAIL_COND_MSG(ret != p_frame_count, "Failed to add data to effect capture ring buffer despite sufficient space."); + atomic_add(&base->pushed_frames, p_frame_count); + } else { + atomic_add(&base->discarded_frames, p_frame_count); + } +} + +bool AudioEffectCaptureInstance::process_silence() const { + return true; +} diff --git a/servers/audio/effects/audio_effect_capture.h b/servers/audio/effects/audio_effect_capture.h new file mode 100644 index 0000000000..b154be85de --- /dev/null +++ b/servers/audio/effects/audio_effect_capture.h @@ -0,0 +1,82 @@ +/*************************************************************************/ +/* audio_effect_capture.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef AUDIO_EFFECT_CAPTURE_H +#define AUDIO_EFFECT_CAPTURE_H + +#include "core/config/engine.h" +#include "core/math/audio_frame.h" +#include "core/object/reference.h" +#include "core/templates/vector.h" +#include "servers/audio/audio_effect.h" +#include "servers/audio_server.h" + +class AudioEffectCapture; + +class AudioEffectCaptureInstance : public AudioEffectInstance { + GDCLASS(AudioEffectCaptureInstance, AudioEffectInstance); + friend class AudioEffectCapture; + Ref<AudioEffectCapture> base; + +public: + virtual void process(const AudioFrame *p_src_frames, AudioFrame *p_dst_frames, int p_frame_count) override; + virtual bool process_silence() const override; +}; + +class AudioEffectCapture : public AudioEffect { + GDCLASS(AudioEffectCapture, AudioEffect) + friend class AudioEffectCaptureInstance; + + RingBuffer<AudioFrame> buffer; + uint64_t discarded_frames = 0; + uint64_t pushed_frames = 0; + float buffer_length_seconds = 0.1f; + bool buffer_initialized = false; + +protected: + static void _bind_methods(); + +public: + virtual Ref<AudioEffectInstance> instance() override; + + void set_buffer_length(float p_buffer_length_seconds); + float get_buffer_length(); + + bool can_get_buffer(int p_frames) const; + PackedVector2Array get_buffer(int p_len); + void clear_buffer(); + + int get_frames_available() const; + int64_t get_discarded_frames() const; + int get_buffer_length_frames() const; + int64_t get_pushed_frames() const; +}; + +#endif // AUDIO_EFFECT_CAPTURE_H diff --git a/servers/audio_server.cpp b/servers/audio_server.cpp index d4f7876b4b..16c6a26595 100644 --- a/servers/audio_server.cpp +++ b/servers/audio_server.cpp @@ -401,6 +401,7 @@ void AudioServer::_mix_step() { for (int k = 0; k < bus->channels.size(); k++) { if (!bus->channels[k].active) { + bus->channels.write[k].peak_volume = AudioFrame(AUDIO_MIN_PEAK_DB, AUDIO_MIN_PEAK_DB); continue; } @@ -434,7 +435,7 @@ void AudioServer::_mix_step() { } } - bus->channels.write[k].peak_volume = AudioFrame(Math::linear2db(peak.l + 0.0000000001), Math::linear2db(peak.r + 0.0000000001)); + bus->channels.write[k].peak_volume = AudioFrame(Math::linear2db(peak.l + AUDIO_PEAK_OFFSET), Math::linear2db(peak.r + AUDIO_PEAK_OFFSET)); if (!bus->channels[k].used) { //see if any audio is contained, because channel was not used diff --git a/servers/audio_server.h b/servers/audio_server.h index 51fbc59851..a1a373e1ca 100644 --- a/servers/audio_server.h +++ b/servers/audio_server.h @@ -199,7 +199,7 @@ private: last_mix_with_audio = 0; used = false; active = false; - peak_volume = AudioFrame(0, 0); + peak_volume = AudioFrame(AUDIO_MIN_PEAK_DB, AUDIO_MIN_PEAK_DB); } }; diff --git a/servers/register_server_types.cpp b/servers/register_server_types.cpp index 58bcdf5802..50efd7c554 100644 --- a/servers/register_server_types.cpp +++ b/servers/register_server_types.cpp @@ -36,6 +36,7 @@ #include "audio/audio_effect.h" #include "audio/audio_stream.h" #include "audio/effects/audio_effect_amplify.h" +#include "audio/effects/audio_effect_capture.h" #include "audio/effects/audio_effect_chorus.h" #include "audio/effects/audio_effect_compressor.h" #include "audio/effects/audio_effect_delay.h" @@ -166,6 +167,8 @@ void register_server_types() { ClassDB::register_class<AudioEffectRecord>(); ClassDB::register_class<AudioEffectSpectrumAnalyzer>(); ClassDB::register_virtual_class<AudioEffectSpectrumAnalyzerInstance>(); + + ClassDB::register_class<AudioEffectCapture>(); } ClassDB::register_virtual_class<RenderingDevice>(); diff --git a/servers/rendering/renderer_rd/cluster_builder_rd.cpp b/servers/rendering/renderer_rd/cluster_builder_rd.cpp index c35e5e1730..0fdd864d47 100644 --- a/servers/rendering/renderer_rd/cluster_builder_rd.cpp +++ b/servers/rendering/renderer_rd/cluster_builder_rd.cpp @@ -400,12 +400,14 @@ void ClusterBuilderRD::begin(const Transform &p_view_transform, const CameraMatr void ClusterBuilderRD::bake_cluster() { RENDER_TIMESTAMP(">Bake Cluster"); + RD::get_singleton()->draw_command_begin_label("Bake Light Cluster"); + //clear cluster buffer - RD::get_singleton()->buffer_clear(cluster_buffer, 0, cluster_buffer_size); + RD::get_singleton()->buffer_clear(cluster_buffer, 0, cluster_buffer_size, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); if (render_element_count > 0) { //clear render buffer - RD::get_singleton()->buffer_clear(cluster_render_buffer, 0, cluster_render_buffer_size); + RD::get_singleton()->buffer_clear(cluster_render_buffer, 0, cluster_render_buffer_size, RD::BARRIER_MASK_RASTER); { //fill state uniform @@ -420,12 +422,12 @@ void ClusterBuilderRD::bake_cluster() { state.cluster_depth_offset = (render_element_max / 32); state.cluster_data_size = state.cluster_depth_offset + render_element_max; - RD::get_singleton()->buffer_update(state_uniform, 0, sizeof(StateUniform), &state); + RD::get_singleton()->buffer_update(state_uniform, 0, sizeof(StateUniform), &state, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } //update instances - RD::get_singleton()->buffer_update(element_buffer, 0, sizeof(RenderElementData) * render_element_count, render_elements); + RD::get_singleton()->buffer_update(element_buffer, 0, sizeof(RenderElementData) * render_element_count, render_elements, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); RENDER_TIMESTAMP("Render Elements"); @@ -469,7 +471,7 @@ void ClusterBuilderRD::bake_cluster() { RD::get_singleton()->draw_list_draw(draw_list, true, instances); i += instances; } - RD::get_singleton()->draw_list_end(); + RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_COMPUTE); } //store elements RENDER_TIMESTAMP("Pack Elements"); @@ -491,12 +493,15 @@ void ClusterBuilderRD::bake_cluster() { RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterStore::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cluster_screen_size.x, cluster_screen_size.y, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cluster_screen_size.x, cluster_screen_size.y, 1); - RD::get_singleton()->compute_list_end(); + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } + } else { + RD::get_singleton()->barrier(RD::BARRIER_MASK_TRANSFER, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } RENDER_TIMESTAMP("<Bake Cluster"); + RD::get_singleton()->draw_command_end_label(); } void ClusterBuilderRD::debug(ElementType p_element) { @@ -519,7 +524,7 @@ void ClusterBuilderRD::debug(ElementType p_element) { RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterDebug::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, screen_size.x, screen_size.y, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, screen_size.x, screen_size.y, 1); RD::get_singleton()->compute_list_end(); } diff --git a/servers/rendering/renderer_rd/effects_rd.cpp b/servers/rendering/renderer_rd/effects_rd.cpp index f1bab19445..8269328597 100644 --- a/servers/rendering/renderer_rd/effects_rd.cpp +++ b/servers/rendering/renderer_rd/effects_rd.cpp @@ -299,15 +299,12 @@ void EffectsRD::copy_to_rect(RID p_source_rd_texture, RID p_dest_texture, const copy.push_constant.target[0] = p_rect.position.x; copy.push_constant.target[1] = p_rect.position.y; - int32_t x_groups = (p_rect.size.width - 1) / 8 + 1; - int32_t y_groups = (p_rect.size.height - 1) / 8 + 1; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8_bit_dst ? COPY_MODE_SIMPLY_COPY_8BIT : COPY_MODE_SIMPLY_COPY]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_rect.size.width, p_rect.size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -322,15 +319,12 @@ void EffectsRD::copy_cubemap_to_panorama(RID p_source_cube, RID p_dest_panorama, copy.push_constant.target[1] = 0; copy.push_constant.camera_z_far = p_lod; - int32_t x_groups = (p_panorama_size.width - 1) / 8 + 1; - int32_t y_groups = (p_panorama_size.height - 1) / 8 + 1; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_is_array ? COPY_MODE_CUBE_ARRAY_TO_PANORAMA : COPY_MODE_CUBE_TO_PANORAMA]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_cube), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_panorama), 3); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_panorama_size.width, p_panorama_size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -349,15 +343,12 @@ void EffectsRD::copy_depth_to_rect_and_linearize(RID p_source_rd_texture, RID p_ copy.push_constant.camera_z_far = p_z_far; copy.push_constant.camera_z_near = p_z_near; - int32_t x_groups = (p_rect.size.width - 1) / 8 + 1; - int32_t y_groups = (p_rect.size.height - 1) / 8 + 1; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[COPY_MODE_LINEARIZE_DEPTH]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_rect.size.width, p_rect.size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -374,15 +365,12 @@ void EffectsRD::copy_depth_to_rect(RID p_source_rd_texture, RID p_dest_texture, copy.push_constant.target[0] = p_rect.position.x; copy.push_constant.target[1] = p_rect.position.y; - int32_t x_groups = (p_rect.size.width - 1) / 8 + 1; - int32_t y_groups = (p_rect.size.height - 1) / 8 + 1; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[COPY_MODE_SIMPLY_COPY_DEPTH]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_rect.size.width, p_rect.size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -400,14 +388,11 @@ void EffectsRD::set_color(RID p_dest_texture, const Color &p_color, const Rect2i copy.push_constant.set_color[2] = p_color.b; copy.push_constant.set_color[3] = p_color.a; - int32_t x_groups = (p_region.size.width - 1) / 8 + 1; - int32_t y_groups = (p_region.size.height - 1) / 8 + 1; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8bit_dst ? COPY_MODE_SET_COLOR_8BIT : COPY_MODE_SET_COLOR]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -420,8 +405,6 @@ void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back copy.push_constant.section[2] = p_region.size.width; copy.push_constant.section[3] = p_region.size.height; - int32_t x_groups = (p_region.size.width - 1) / 8 + 1; - int32_t y_groups = (p_region.size.height - 1) / 8 + 1; //HORIZONTAL RD::DrawListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8bit_dst ? COPY_MODE_GAUSSIAN_COPY_8BIT : COPY_MODE_GAUSSIAN_COPY]); @@ -431,7 +414,7 @@ void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back copy.push_constant.flags = base_flags | COPY_FLAG_HORIZONTAL; RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -442,7 +425,7 @@ void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back copy.push_constant.flags = base_flags; RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -452,9 +435,6 @@ void EffectsRD::gaussian_glow(RID p_source_rd_texture, RID p_back_texture, const CopyMode copy_mode = p_first_pass && p_auto_exposure.is_valid() ? COPY_MODE_GAUSSIAN_GLOW_AUTO_EXPOSURE : COPY_MODE_GAUSSIAN_GLOW; uint32_t base_flags = 0; - int32_t x_groups = (p_size.width + 7) / 8; - int32_t y_groups = (p_size.height + 7) / 8; - copy.push_constant.section[2] = p_size.x; copy.push_constant.section[3] = p_size.y; @@ -479,16 +459,13 @@ void EffectsRD::gaussian_glow(RID p_source_rd_texture, RID p_back_texture, const copy.push_constant.flags = base_flags | (p_first_pass ? COPY_FLAG_GLOW_FIRST_PASS : 0) | (p_high_quality ? COPY_FLAG_HIGH_QUALITY_GLOW : 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_size.width, p_size.height, 1); RD::get_singleton()->compute_list_end(); } void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, RenderingServer::EnvironmentSSRRoughnessQuality p_roughness_quality, RID p_blur_radius, RID p_blur_radius2, RID p_metallic, const Color &p_metallic_mask, RID p_depth, RID p_scale_depth, RID p_scale_normal, RID p_output, RID p_output_blur, const Size2i &p_screen_size, int p_max_steps, float p_fade_in, float p_fade_out, float p_tolerance, const CameraMatrix &p_camera) { RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - int32_t x_groups = (p_screen_size.width - 1) / 8 + 1; - int32_t y_groups = (p_screen_size.height - 1) / 8 + 1; - { //scale color and depth to half ssr_scale.push_constant.camera_z_far = p_camera.get_z_far(); ssr_scale.push_constant.camera_z_near = p_camera.get_z_near(); @@ -506,7 +483,7 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_scale.push_constant, sizeof(ScreenSpaceReflectionScalePushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); } @@ -547,7 +524,7 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R } RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_scale_normal), 2); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); } if (p_roughness_quality != RS::ENV_SSR_ROUGNESS_QUALITY_DISABLED) { @@ -585,7 +562,7 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_filter.push_constant, sizeof(ScreenSpaceReflectionFilterPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -600,7 +577,7 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_filter.push_constant, sizeof(ScreenSpaceReflectionFilterPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); } RD::get_singleton()->compute_list_end(); @@ -609,9 +586,6 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R void EffectsRD::sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_depth, const CameraMatrix &p_camera, const Size2i &p_screen_size, float p_scale, float p_depth_scale, RenderingServer::SubSurfaceScatteringQuality p_quality) { RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - int32_t x_groups = (p_screen_size.width - 1) / 8 + 1; - int32_t y_groups = (p_screen_size.height - 1) / 8 + 1; - Plane p = p_camera.xform4(Plane(1, 0, -1, 1)); p.normal /= p.d; float unit_size = p.normal.x; @@ -635,7 +609,7 @@ void EffectsRD::sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_dept RD::get_singleton()->compute_list_set_push_constant(compute_list, &sss.push_constant, sizeof(SubSurfaceScatteringPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -646,7 +620,7 @@ void EffectsRD::sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_dept sss.push_constant.vertical = true; RD::get_singleton()->compute_list_set_push_constant(compute_list, &sss.push_constant, sizeof(SubSurfaceScatteringPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -690,15 +664,12 @@ void EffectsRD::make_mipmap(RID p_source_rd_texture, RID p_dest_texture, const S copy.push_constant.section[2] = p_size.width; copy.push_constant.section[3] = p_size.height; - int32_t x_groups = (p_size.width - 1) / 8 + 1; - int32_t y_groups = (p_size.height - 1) / 8 + 1; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[COPY_MODE_MIPMAP]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_size.width, p_size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -719,7 +690,7 @@ void EffectsRD::copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuffe RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(CopyToDPPushConstant)); RD::get_singleton()->draw_list_draw(draw_list, true); - RD::get_singleton()->draw_list_end(); + RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_TRANSFER); } void EffectsRD::tonemapper(RID p_source_color, RID p_dst_framebuffer, const TonemapSettings &p_settings) { @@ -804,10 +775,7 @@ void EffectsRD::luminance_reduction(RID p_source_texture, const Size2i p_source_ RD::get_singleton()->compute_list_set_push_constant(compute_list, &luminance_reduce.push_constant, sizeof(LuminanceReducePushConstant)); - int32_t x_groups = (luminance_reduce.push_constant.source_size[0] - 1) / 8 + 1; - int32_t y_groups = (luminance_reduce.push_constant.source_size[1] - 1) / 8 + 1; - - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, luminance_reduce.push_constant.source_size[0], luminance_reduce.push_constant.source_size[1], 1); luminance_reduce.push_constant.source_size[0] = MAX(luminance_reduce.push_constant.source_size[0] / 8, 1); luminance_reduce.push_constant.source_size[1] = MAX(luminance_reduce.push_constant.source_size[1] / 8, 1); @@ -848,14 +816,12 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_texture), 1); - int32_t x_groups = (p_base_texture_size.x - 1) / 8 + 1; - int32_t y_groups = (p_base_texture_size.y - 1) / 8 + 1; bokeh.push_constant.size[0] = p_base_texture_size.x; bokeh.push_constant.size[1] = p_base_texture_size.y; RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_base_texture_size.x, p_base_texture_size.y, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); if (p_bokeh_shape == RS::DOF_BOKEH_BOX || p_bokeh_shape == RS::DOF_BOKEH_HEXAGON) { @@ -872,8 +838,6 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_halfsize_texture1), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_base_texture), 1); - x_groups = ((p_base_texture_size.x >> 1) - 1) / 8 + 1; - y_groups = ((p_base_texture_size.y >> 1) - 1) / 8 + 1; bokeh.push_constant.size[0] = p_base_texture_size.x >> 1; bokeh.push_constant.size[1] = p_base_texture_size.y >> 1; bokeh.push_constant.half_size = true; @@ -887,7 +851,7 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, bokeh.push_constant.size[0], bokeh.push_constant.size[1], 1); RD::get_singleton()->compute_list_add_barrier(compute_list); //third pass @@ -903,7 +867,7 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, bokeh.push_constant.size[0], bokeh.push_constant.size[1], 1); RD::get_singleton()->compute_list_add_barrier(compute_list); if (p_quality == RS::DOF_BLUR_QUALITY_VERY_LOW || p_quality == RS::DOF_BLUR_QUALITY_LOW) { @@ -914,8 +878,6 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_halfsize_texture2), 1); - x_groups = (p_base_texture_size.x - 1) / 8 + 1; - y_groups = (p_base_texture_size.y - 1) / 8 + 1; bokeh.push_constant.size[0] = p_base_texture_size.x; bokeh.push_constant.size[1] = p_base_texture_size.y; bokeh.push_constant.half_size = false; @@ -923,7 +885,7 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_base_texture_size.x, p_base_texture_size.y, 1); } } else { //circle @@ -941,15 +903,13 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_halfsize_texture1), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_base_texture), 1); - x_groups = ((p_base_texture_size.x >> 1) - 1) / 8 + 1; - y_groups = ((p_base_texture_size.y >> 1) - 1) / 8 + 1; bokeh.push_constant.size[0] = p_base_texture_size.x >> 1; bokeh.push_constant.size[1] = p_base_texture_size.y >> 1; bokeh.push_constant.half_size = true; RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, bokeh.push_constant.size[0], bokeh.push_constant.size[1], 1); RD::get_singleton()->compute_list_add_barrier(compute_list); //circle is just one pass, then upscale @@ -961,8 +921,6 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_halfsize_texture1), 1); - x_groups = (p_base_texture_size.x - 1) / 8 + 1; - y_groups = (p_base_texture_size.y - 1) / 8 + 1; bokeh.push_constant.size[0] = p_base_texture_size.x; bokeh.push_constant.size[1] = p_base_texture_size.y; bokeh.push_constant.half_size = false; @@ -970,7 +928,7 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_base_texture_size.x, p_base_texture_size.y, 1); } RD::get_singleton()->compute_list_end(); @@ -995,10 +953,9 @@ void EffectsRD::gather_ssao(RD::ComputeListID p_compute_list, const Vector<RID> RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_uniform_set_from_image(p_ao_slices[i]), 2); RD::get_singleton()->compute_list_set_push_constant(p_compute_list, &ssao.gather_push_constant, sizeof(SSAOGatherPushConstant)); - int x_groups = ((p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; - int y_groups = ((p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; + Size2i size = Size2i(p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1), p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)); - RD::get_singleton()->compute_list_dispatch(p_compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(p_compute_list, size.x, size.y, 1); } RD::get_singleton()->compute_list_add_barrier(p_compute_list); } @@ -1072,10 +1029,9 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep } RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.downsample_push_constant, sizeof(SSAODownsamplePushConstant)); - int x_groups = (MAX(1, p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; - int y_groups = (MAX(1, p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; + Size2i size(MAX(1, p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1)), MAX(1, p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1))); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, size.x, size.y, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->draw_command_end_label(); // Downsample SSAO } @@ -1193,21 +1149,19 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER_BASE]); gather_ssao(compute_list, p_ao_pong_slices, p_settings, true); //generate importance map - int x_groups = (p_settings.quarter_screen_size.x - 1) / 8 + 1; - int y_groups = (p_settings.quarter_screen_size.y - 1) / 8 + 1; RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GENERATE_IMPORTANCE_MAP]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao_pong), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map), 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); //process importance map A RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_PROCESS_IMPORTANCE_MAPA]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_importance_map), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map_pong), 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); //process Importance Map B RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_PROCESS_IMPORTANCE_MAPB]); @@ -1215,7 +1169,7 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map), 1); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, ssao.counter_uniform_set, 2); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER_ADAPTIVE]); @@ -1272,10 +1226,8 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep } RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.blur_push_constant, sizeof(SSAOBlurPushConstant)); - int x_groups = ((p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; - int y_groups = ((p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; - - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + Size2i size(p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1), p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, size.x, size.y, 1); } if (p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW) { @@ -1313,18 +1265,15 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.interleave_push_constant, sizeof(SSAOInterleavePushConstant)); - int x_groups = (p_settings.full_screen_size.x - 1) / 8 + 1; - int y_groups = (p_settings.full_screen_size.y - 1) / 8 + 1; - - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.full_screen_size.x, p_settings.full_screen_size.y, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->draw_command_end_label(); // Interleave } RD::get_singleton()->draw_command_end_label(); //SSAO - RD::get_singleton()->compute_list_end(); + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_TRANSFER); //wait for upcoming transfer int zero[1] = { 0 }; - RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero); + RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero, 0); //no barrier } void EffectsRD::roughness_limit(RID p_source_normal, RID p_roughness, const Size2i &p_size, float p_curve) { @@ -1337,12 +1286,9 @@ void EffectsRD::roughness_limit(RID p_source_normal, RID p_roughness, const Size RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_normal), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_roughness), 1); - int x_groups = (p_size.x - 1) / 8 + 1; - int y_groups = (p_size.y - 1) / 8 + 1; - RD::get_singleton()->compute_list_set_push_constant(compute_list, &roughness_limiter.push_constant, sizeof(RoughnessLimiterPushConstant)); //not used but set anyway - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_size.x, p_size.y, 1); RD::get_singleton()->compute_list_end(); } @@ -1455,7 +1401,7 @@ void EffectsRD::render_sky(RD::DrawListID p_list, float p_time, RID p_fb, RID p_ RD::get_singleton()->draw_list_draw(draw_list, true); } -void EffectsRD::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples) { +void EffectsRD::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples, uint32_t p_barrier) { ResolvePushConstant push_constant; push_constant.screen_size[0] = p_screen_size.x; push_constant.screen_size[1] = p_screen_size.y; @@ -1472,19 +1418,26 @@ void EffectsRD::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RI RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ResolvePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.x, p_screen_size.y, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.x, p_screen_size.y, 1); - RD::get_singleton()->compute_list_end(); + RD::get_singleton()->compute_list_end(p_barrier); } void EffectsRD::reduce_shadow(RID p_source_shadow, RID p_dest_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, int p_shrink_limit, RD::ComputeListID compute_list) { uint32_t push_constant[8] = { (uint32_t)p_source_size.x, (uint32_t)p_source_size.y, (uint32_t)p_source_rect.position.x, (uint32_t)p_source_rect.position.y, (uint32_t)p_shrink_limit, 0, 0, 0 }; - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shadow_reduce.pipelines[SHADOW_REDUCE_REDUCE]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_source_shadow, p_dest_shadow), 0); + uint32_t height = p_source_rect.size.height; + if (true) { // subgroup support, @TODO must detect them + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shadow_reduce.pipelines[p_shrink_limit == 1 ? SHADOW_REDUCE_REDUCE_SUBGROUPS_8 : SHADOW_REDUCE_REDUCE_SUBGROUPS]); + height /= 2; //cause kernel is 8x4 + } else { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shadow_reduce.pipelines[SHADOW_REDUCE_REDUCE]); + } + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_shadow), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_shadow), 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(uint32_t) * 8); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, height, 1); } void EffectsRD::filter_shadow(RID p_shadow, RID p_backing_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, RenderingServer::EnvVolumetricFogShadowFilter p_filter, RD::ComputeListID compute_list, bool p_vertical, bool p_horizontal) { uint32_t push_constant[8] = { (uint32_t)p_source_size.x, (uint32_t)p_source_size.y, (uint32_t)p_source_rect.position.x, (uint32_t)p_source_rect.position.y, 0, 0, 0, 0 }; @@ -1506,9 +1459,10 @@ void EffectsRD::filter_shadow(RID p_shadow, RID p_backing_shadow, const Size2i & if (p_vertical) { push_constant[6] = 1; push_constant[7] = 0; - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_shadow, p_backing_shadow), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_shadow), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_backing_shadow), 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(uint32_t) * 8); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1); } if (p_vertical && p_horizontal) { RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -1516,9 +1470,10 @@ void EffectsRD::filter_shadow(RID p_shadow, RID p_backing_shadow, const Size2i & if (p_horizontal) { push_constant[6] = 0; push_constant[7] = 1; - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_backing_shadow, p_shadow), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_backing_shadow), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_shadow), 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(uint32_t) * 8); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1); } } @@ -2020,6 +1975,8 @@ EffectsRD::EffectsRD() { { Vector<String> shadow_reduce_modes; shadow_reduce_modes.push_back("\n#define MODE_REDUCE\n"); + shadow_reduce_modes.push_back("\n#define MODE_REDUCE_SUBGROUP\n"); + shadow_reduce_modes.push_back("\n#define MODE_REDUCE_SUBGROUP\n#define MODE_REDUCE_8\n"); shadow_reduce_modes.push_back("\n#define MODE_FILTER\n"); shadow_reduce.shader.initialize(shadow_reduce_modes); diff --git a/servers/rendering/renderer_rd/effects_rd.h b/servers/rendering/renderer_rd/effects_rd.h index 00309b4d0f..7ae5ea2f37 100644 --- a/servers/rendering/renderer_rd/effects_rd.h +++ b/servers/rendering/renderer_rd/effects_rd.h @@ -599,6 +599,8 @@ class EffectsRD { enum ShadowReduceMode { SHADOW_REDUCE_REDUCE, + SHADOW_REDUCE_REDUCE_SUBGROUPS, + SHADOW_REDUCE_REDUCE_SUBGROUPS_8, SHADOW_REDUCE_FILTER, SHADOW_REDUCE_MAX }; @@ -763,7 +765,7 @@ public: void merge_specular(RID p_dest_framebuffer, RID p_specular, RID p_base, RID p_reflection); void sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_depth, const CameraMatrix &p_camera, const Size2i &p_screen_size, float p_scale, float p_depth_scale, RS::SubSurfaceScatteringQuality p_quality); - void resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples); + void resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples, uint32_t p_barrier = RD::BARRIER_MASK_ALL); void reduce_shadow(RID p_source_shadow, RID p_dest_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, int p_shrink_limit, RenderingDevice::ComputeListID compute_list); void filter_shadow(RID p_shadow, RID p_backing_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, RS::EnvVolumetricFogShadowFilter p_filter, RenderingDevice::ComputeListID compute_list, bool p_vertical = true, bool p_horizontal = true); diff --git a/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp b/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp index a20a5073c3..509495680a 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp @@ -453,7 +453,7 @@ void RendererSceneRenderForward::MaterialData::update_parameters(const Map<Strin //check whether buffer changed if (p_uniform_dirty && ubo_data.size()) { update_uniform_buffer(shader_data->uniforms, shader_data->ubo_offsets.ptr(), p_parameters, ubo_data.ptrw(), ubo_data.size(), false); - RD::get_singleton()->buffer_update(uniform_buffer, 0, ubo_data.size(), ubo_data.ptrw()); + RD::get_singleton()->buffer_update(uniform_buffer, 0, ubo_data.size(), ubo_data.ptrw(), RD::BARRIER_MASK_RASTER); } uint32_t tex_uniform_count = shader_data->texture_uniforms.size(); @@ -810,10 +810,20 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList bool shadow_pass = (p_params->pass_mode == PASS_MODE_SHADOW) || (p_params->pass_mode == PASS_MODE_SHADOW_DP); - float old_offset[2] = { 0, 0 }; + SceneState::PushConstant push_constant; + + if (p_params->pass_mode == PASS_MODE_DEPTH_MATERIAL) { + push_constant.uv_offset = Math::make_half_float(p_params->uv_offset.y) << 16; + push_constant.uv_offset |= Math::make_half_float(p_params->uv_offset.x); + } else { + push_constant.uv_offset = 0; + } for (uint32_t i = p_from_element; i < p_to_element; i++) { const GeometryInstanceSurfaceDataCache *surf = p_params->elements[i]; + const RenderElementInfo &element_info = p_params->element_info[i]; + + push_constant.base_index = i + p_params->element_offset; RID material_uniform_set; ShaderData *shader; @@ -834,13 +844,6 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList continue; } - if (p_params->pass_mode == PASS_MODE_DEPTH_MATERIAL) { - old_offset[0] = surf->owner->push_constant.lightmap_uv_scale[0]; - old_offset[1] = surf->owner->push_constant.lightmap_uv_scale[1]; - surf->owner->push_constant.lightmap_uv_scale[0] = p_params->uv_offset.x; - surf->owner->push_constant.lightmap_uv_scale[1] = p_params->uv_offset.y; - } - //find cull variant ShaderData::CullVariant cull_variant; @@ -862,16 +865,16 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList switch (p_params->pass_mode) { case PASS_MODE_COLOR: case PASS_MODE_COLOR_TRANSPARENT: { - if (surf->sort.uses_lightmap) { + if (element_info.uses_lightmap) { shader_version = SHADER_VERSION_LIGHTMAP_COLOR_PASS; - } else if (surf->sort.uses_forward_gi) { + } else if (element_info.uses_forward_gi) { shader_version = SHADER_VERSION_COLOR_PASS_WITH_FORWARD_GI; } else { shader_version = SHADER_VERSION_COLOR_PASS; } } break; case PASS_MODE_COLOR_SPECULAR: { - if (surf->sort.uses_lightmap) { + if (element_info.uses_lightmap) { shader_version = SHADER_VERSION_LIGHTMAP_COLOR_PASS_WITH_SEPARATE_SPECULAR; } else { shader_version = SHADER_VERSION_COLOR_PASS_WITH_SEPARATE_SPECULAR; @@ -913,31 +916,7 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList storage->mesh_surface_get_vertex_arrays_and_format(mesh_surface, pipeline->get_vertex_input_mask(), vertex_array_rd, vertex_format); } - if (p_params->screen_lod_threshold > 0.0 && storage->mesh_surface_has_lod(mesh_surface)) { - //lod - Vector3 support_min = surf->owner->transformed_aabb.get_support(-p_params->lod_plane.normal); - Vector3 support_max = surf->owner->transformed_aabb.get_support(p_params->lod_plane.normal); - - float distance_min = p_params->lod_plane.distance_to(support_min); - float distance_max = p_params->lod_plane.distance_to(support_max); - - float distance = 0.0; - - if (distance_min * distance_max < 0.0) { - //crossing plane - distance = 0.0; - } else if (distance_min >= 0.0) { - distance = distance_min; - } else if (distance_max <= 0.0) { - distance = -distance_max; - } - - index_array_rd = storage->mesh_surface_get_index_array_with_lod(mesh_surface, surf->owner->lod_model_scale * surf->owner->lod_bias, distance * p_params->lod_distance_multiplier, p_params->screen_lod_threshold); - - } else { - //no lod - index_array_rd = storage->mesh_surface_get_index_array(mesh_surface); - } + index_array_rd = storage->mesh_surface_get_index_array(mesh_surface, element_info.lod_index); if (prev_vertex_array_rd != vertex_array_rd) { RD::get_singleton()->draw_list_bind_vertex_array(draw_list, vertex_array_rd); @@ -974,14 +953,11 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList prev_material_uniform_set = material_uniform_set; } - RD::get_singleton()->draw_list_set_push_constant(draw_list, &surf->owner->push_constant, sizeof(GeometryInstanceForward::PushConstant)); + RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(SceneState::PushConstant)); - RD::get_singleton()->draw_list_draw(draw_list, index_array_rd.is_valid(), surf->owner->instance_count); - - if (p_params->pass_mode == PASS_MODE_DEPTH_MATERIAL) { - surf->owner->push_constant.lightmap_uv_scale[0] = old_offset[0]; - surf->owner->push_constant.lightmap_uv_scale[1] = old_offset[1]; - } + uint32_t instance_count = surf->owner->instance_count > 1 ? surf->owner->instance_count : element_info.repeat; + RD::get_singleton()->draw_list_draw(draw_list, index_array_rd.is_valid(), instance_count); + i += element_info.repeat - 1; //skip equal elements } } @@ -1039,16 +1015,16 @@ void RendererSceneRenderForward::_render_list_with_threads(RenderListParameters thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count()); RD::get_singleton()->draw_list_begin_split(p_framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures); RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RendererSceneRenderForward::_render_list_thread_function, p_params); - RD::get_singleton()->draw_list_end(); + RD::get_singleton()->draw_list_end(p_params->barrier); } else { //single threaded RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures); _render_list(draw_list, fb_format, p_params, 0, p_params->element_count); - RD::get_singleton()->draw_list_end(); + RD::get_singleton()->draw_list_end(p_params->barrier); } } -void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers, bool p_pancake_shadows) { +void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers, bool p_pancake_shadows, int p_index) { //CameraMatrix projection = p_cam_projection; //projection.flip_y(); // Vulkan and modern APIs use Y-Down CameraMatrix correction; @@ -1287,22 +1263,120 @@ void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_ren scene_state.ubo.roughness_limiter_amount = screen_space_roughness_limiter_get_amount(); scene_state.ubo.roughness_limiter_limit = screen_space_roughness_limiter_get_limit(); - RD::get_singleton()->buffer_update(scene_state.uniform_buffer, 0, sizeof(SceneState::UBO), &scene_state.ubo); + if (p_index >= (int)scene_state.uniform_buffers.size()) { + uint32_t from = scene_state.uniform_buffers.size(); + scene_state.uniform_buffers.resize(p_index + 1); + render_pass_uniform_sets.resize(p_index + 1); + for (uint32_t i = from; i < scene_state.uniform_buffers.size(); i++) { + scene_state.uniform_buffers[i] = RD::get_singleton()->uniform_buffer_create(sizeof(SceneState::UBO)); + } + } + RD::get_singleton()->buffer_update(scene_state.uniform_buffers[p_index], 0, sizeof(SceneState::UBO), &scene_state.ubo, RD::BARRIER_MASK_RASTER); +} + +void RendererSceneRenderForward::_update_instance_data_buffer(RenderListType p_render_list) { + if (scene_state.instance_data[p_render_list].size() > 0) { + if (scene_state.instance_buffer[p_render_list] == RID() || scene_state.instance_buffer_size[p_render_list] < scene_state.instance_data[p_render_list].size()) { + if (scene_state.instance_buffer[p_render_list] != RID()) { + RD::get_singleton()->free(scene_state.instance_buffer[p_render_list]); + } + uint32_t new_size = nearest_power_of_2_templated(MAX(uint64_t(INSTANCE_DATA_BUFFER_MIN_SIZE), scene_state.instance_data[p_render_list].size())); + scene_state.instance_buffer[p_render_list] = RD::get_singleton()->storage_buffer_create(new_size * sizeof(SceneState::InstanceData)); + scene_state.instance_buffer_size[p_render_list] = new_size; + } + RD::get_singleton()->buffer_update(scene_state.instance_buffer[p_render_list], 0, sizeof(SceneState::InstanceData) * scene_state.instance_data[p_render_list].size(), scene_state.instance_data[p_render_list].ptr(), RD::BARRIER_MASK_RASTER); + } +} +void RendererSceneRenderForward::_fill_instance_data(RenderListType p_render_list, uint32_t p_offset, int32_t p_max_elements, bool p_update_buffer) { + RenderList *rl = &render_list[p_render_list]; + uint32_t element_total = p_max_elements >= 0 ? uint32_t(p_max_elements) : rl->elements.size(); + + scene_state.instance_data[p_render_list].resize(p_offset + element_total); + rl->element_info.resize(p_offset + element_total); + + uint32_t repeats = 0; + GeometryInstanceSurfaceDataCache *prev_surface = nullptr; + for (uint32_t i = 0; i < element_total; i++) { + GeometryInstanceSurfaceDataCache *surface = rl->elements[i + p_offset]; + GeometryInstanceForward *inst = surface->owner; + + SceneState::InstanceData &instance_data = scene_state.instance_data[p_render_list][i + p_offset]; + + if (inst->store_transform_cache) { + RendererStorageRD::store_transform(inst->transform, instance_data.transform); + } else { + RendererStorageRD::store_transform(Transform(), instance_data.transform); + } + + instance_data.flags = inst->flags_cache; + instance_data.gi_offset = inst->gi_offset_cache; + instance_data.layer_mask = inst->layer_mask; + instance_data.instance_uniforms_ofs = uint32_t(inst->shader_parameters_offset); + instance_data.lightmap_uv_scale[0] = inst->lightmap_uv_scale.position.x; + instance_data.lightmap_uv_scale[1] = inst->lightmap_uv_scale.position.y; + instance_data.lightmap_uv_scale[2] = inst->lightmap_uv_scale.size.x; + instance_data.lightmap_uv_scale[3] = inst->lightmap_uv_scale.size.y; + + bool cant_repeat = instance_data.flags & INSTANCE_DATA_FLAG_MULTIMESH || inst->mesh_instance.is_valid(); + + if (prev_surface != nullptr && !cant_repeat && prev_surface->sort.sort_key1 == surface->sort.sort_key1 && prev_surface->sort.sort_key2 == surface->sort.sort_key2) { + //this element is the same as the previous one, count repeats to draw it using instancing + repeats++; + } else { + if (repeats > 0) { + for (uint32_t j = 1; j <= repeats; j++) { + rl->element_info[p_offset + i - j].repeat = j; + } + } + repeats = 1; + } + + RenderElementInfo &element_info = rl->element_info[p_offset + i]; + + element_info.lod_index = surface->sort.lod_index; + element_info.uses_forward_gi = surface->sort.uses_forward_gi; + element_info.uses_lightmap = surface->sort.uses_lightmap; + + if (cant_repeat) { + prev_surface = nullptr; + } else { + prev_surface = surface; + } + } + + if (repeats > 0) { + for (uint32_t j = 1; j <= repeats; j++) { + rl->element_info[p_offset + element_total - j].repeat = j; + } + } + + if (p_update_buffer) { + _update_instance_data_buffer(p_render_list); + } } -void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInstance *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi, bool p_using_opaque_gi) { - scene_state.used_sss = false; - scene_state.used_screen_texture = false; - scene_state.used_normal_texture = false; - scene_state.used_depth_texture = false; +void RendererSceneRenderForward::_fill_render_list(RenderListType p_render_list, const PagedArray<GeometryInstance *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi, bool p_using_opaque_gi, const Plane &p_lod_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold, bool p_append) { + if (p_render_list == RENDER_LIST_OPAQUE) { + scene_state.used_sss = false; + scene_state.used_screen_texture = false; + scene_state.used_normal_texture = false; + scene_state.used_depth_texture = false; + } + uint32_t lightmap_captures_used = 0; Plane near_plane(p_cam_transform.origin, -p_cam_transform.basis.get_axis(Vector3::AXIS_Z)); near_plane.d += p_cam_projection.get_z_near(); float z_max = p_cam_projection.get_z_far() - p_cam_projection.get_z_near(); - uint32_t lightmap_captures_used = 0; + RenderList *rl = &render_list[p_render_list]; _update_dirty_geometry_instances(); - render_list.clear(); + + if (!p_append) { + rl->clear(); + if (p_render_list == RENDER_LIST_OPAQUE) { + render_list[RENDER_LIST_ALPHA].clear(); //opaque fills alpha too + } + } //fill list @@ -1318,7 +1392,7 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInst bool uses_lightmap = false; bool uses_gi = false; - if (p_pass_mode == PASS_MODE_COLOR) { + if (p_render_list == RENDER_LIST_OPAQUE) { //setup GI if (inst->lightmap_instance.is_valid()) { @@ -1330,15 +1404,15 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInst } } if (lightmap_cull_index >= 0) { - inst->push_constant.gi_offset &= 0xFFFF; - inst->push_constant.gi_offset |= lightmap_cull_index; + inst->gi_offset_cache = inst->lightmap_slice_index << 16; + inst->gi_offset_cache |= lightmap_cull_index; flags |= INSTANCE_DATA_FLAG_USE_LIGHTMAP; if (scene_state.lightmap_has_sh[lightmap_cull_index]) { flags |= INSTANCE_DATA_FLAG_USE_SH_LIGHTMAP; } uses_lightmap = true; } else { - inst->push_constant.gi_offset = 0xFFFFFFFF; + inst->gi_offset_cache = 0xFFFFFFFF; } } else if (inst->lightmap_sh) { @@ -1352,7 +1426,7 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInst lcd.sh[j * 4 + 3] = src_capture[j].a; } flags |= INSTANCE_DATA_FLAG_USE_LIGHTMAP_CAPTURE; - inst->push_constant.gi_offset = lightmap_captures_used; + inst->gi_offset_cache = lightmap_captures_used; lightmap_captures_used++; uses_lightmap = true; } @@ -1379,7 +1453,7 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInst SWAP(probe0_index, probe1_index); } - inst->push_constant.gi_offset = probe0_index | (probe1_index << 16); + inst->gi_offset_cache = probe0_index | (probe1_index << 16); flags |= INSTANCE_DATA_FLAG_USE_GIPROBE; uses_gi = true; } else { @@ -1387,11 +1461,11 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInst flags |= INSTANCE_DATA_FLAG_USE_SDFGI; uses_gi = true; } - inst->push_constant.gi_offset = 0xFFFFFFFF; + inst->gi_offset_cache = 0xFFFFFFFF; } } } - inst->push_constant.flags = flags; + inst->flags_cache = flags; GeometryInstanceSurfaceDataCache *surf = inst->surface_caches; @@ -1399,12 +1473,39 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInst surf->sort.uses_forward_gi = 0; surf->sort.uses_lightmap = 0; + // LOD + + if (p_screen_lod_threshold > 0.0 && storage->mesh_surface_has_lod(surf->surface)) { + //lod + Vector3 lod_support_min = inst->transformed_aabb.get_support(-p_lod_plane.normal); + Vector3 lod_support_max = inst->transformed_aabb.get_support(p_lod_plane.normal); + + float distance_min = p_lod_plane.distance_to(lod_support_min); + float distance_max = p_lod_plane.distance_to(lod_support_max); + + float distance = 0.0; + + if (distance_min * distance_max < 0.0) { + //crossing plane + distance = 0.0; + } else if (distance_min >= 0.0) { + distance = distance_min; + } else if (distance_max <= 0.0) { + distance = -distance_max; + } + + surf->sort.lod_index = storage->mesh_surface_get_lod(surf->surface, inst->lod_model_scale * inst->lod_bias, distance * p_lod_distance_multiplier, p_screen_lod_threshold); + } else { + surf->sort.lod_index = 0; + } + + // ADD Element if (p_pass_mode == PASS_MODE_COLOR) { if (surf->flags & (GeometryInstanceSurfaceDataCache::FLAG_PASS_DEPTH | GeometryInstanceSurfaceDataCache::FLAG_PASS_OPAQUE)) { - render_list.add_element(surf); + rl->add_element(surf); } if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_PASS_ALPHA) { - render_list.add_alpha_element(surf); + render_list[RENDER_LIST_ALPHA].add_element(surf); if (uses_gi) { surf->sort.uses_forward_gi = 1; } @@ -1429,11 +1530,11 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInst } else if (p_pass_mode == PASS_MODE_SHADOW || p_pass_mode == PASS_MODE_SHADOW_DP) { if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_PASS_SHADOW) { - render_list.add_element(surf); + rl->add_element(surf); } } else { if (surf->flags & (GeometryInstanceSurfaceDataCache::FLAG_PASS_DEPTH | GeometryInstanceSurfaceDataCache::FLAG_PASS_OPAQUE)) { - render_list.add_element(surf); + rl->add_element(surf); } } @@ -1443,8 +1544,8 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInst } } - if (lightmap_captures_used) { - RD::get_singleton()->buffer_update(scene_state.lightmap_capture_buffer, 0, sizeof(LightmapCaptureData) * lightmap_captures_used, scene_state.lightmap_captures); + if (p_render_list == RENDER_LIST_OPAQUE && lightmap_captures_used) { + RD::get_singleton()->buffer_update(scene_state.lightmap_capture_buffer, 0, sizeof(LightmapCaptureData) * lightmap_captures_used, scene_state.lightmap_captures, RD::BARRIER_MASK_RASTER); } } @@ -1473,29 +1574,21 @@ void RendererSceneRenderForward::_setup_lightmaps(const PagedArray<RID> &p_light scene_state.lightmaps_used++; } if (scene_state.lightmaps_used > 0) { - RD::get_singleton()->buffer_update(scene_state.lightmap_buffer, 0, sizeof(LightmapData) * scene_state.lightmaps_used, scene_state.lightmaps); + RD::get_singleton()->buffer_update(scene_state.lightmap_buffer, 0, sizeof(LightmapData) * scene_state.lightmaps_used, scene_state.lightmaps, RD::BARRIER_MASK_RASTER); } } -void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_screen_lod_threshold) { +void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_screen_lod_threshold) { RenderBufferDataForward *render_buffer = nullptr; if (p_render_buffer.is_valid()) { render_buffer = (RenderBufferDataForward *)render_buffers_get_data(p_render_buffer); } //first of all, make a new render pass - render_pass++; - //fill up ubo RENDER_TIMESTAMP("Setup 3D Scene"); - if (p_reflection_probe.is_valid()) { - scene_state.ubo.reflection_multiplier = 0.0; - } else { - scene_state.ubo.reflection_multiplier = 1.0; - } - float lod_distance_multiplier = p_cam_projection.get_lod_multiplier(); Plane lod_camera_plane(p_cam_transform.get_origin(), -p_cam_transform.basis.get_axis(Vector3::AXIS_Z)); @@ -1508,7 +1601,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf Vector2 vp_he = p_cam_projection.get_viewport_half_extents(); scene_state.ubo.viewport_size[0] = vp_he.x; scene_state.ubo.viewport_size[1] = vp_he.y; - scene_state.ubo.directional_light_count = p_directional_light_count; + scene_state.ubo.directional_light_count = 0; Size2i screen_size; RID opaque_framebuffer; @@ -1592,13 +1685,21 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf ERR_FAIL(); //bug? } + RD::get_singleton()->draw_command_begin_label("Render Setup"); + _setup_lightmaps(p_lightmaps, p_cam_transform); _setup_giprobes(p_gi_probes); _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); _update_render_base_uniform_set(); //may have changed due to the above (light buffer enlarged, as an example) - _fill_render_list(p_instances, PASS_MODE_COLOR, p_cam_projection, p_cam_transform, using_sdfgi, using_sdfgi || using_giprobe); + _fill_render_list(RENDER_LIST_OPAQUE, p_instances, PASS_MODE_COLOR, p_cam_projection, p_cam_transform, using_sdfgi, using_sdfgi || using_giprobe, lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + render_list[RENDER_LIST_OPAQUE].sort_by_key(); + render_list[RENDER_LIST_ALPHA].sort_by_depth(); + _fill_instance_data(RENDER_LIST_OPAQUE); + _fill_instance_data(RENDER_LIST_ALPHA); + + RD::get_singleton()->draw_command_end_label(); bool using_sss = !low_end && render_buffer && scene_state.used_sss && sub_surface_scattering_get_quality() != RS::SUB_SURFACE_SCATTERING_QUALITY_DISABLED; @@ -1682,8 +1783,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf clear_color = p_default_bg_color; } - render_list.sort_by_key(false); - bool debug_giprobes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_ALBEDO || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_LIGHTING || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_EMISSION; bool debug_sdfgi_probes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_SDFGI_PROBES; bool depth_pre_pass = !low_end && depth_framebuffer.is_valid(); @@ -1691,42 +1790,64 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf bool using_ssao = depth_pre_pass && p_render_buffer.is_valid() && p_environment.is_valid() && environment_is_ssao_enabled(p_environment); bool continue_depth = false; if (depth_pre_pass) { //depth pre pass - RENDER_TIMESTAMP("Render Depth Pre-Pass"); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + bool needs_pre_resolve = _needs_post_prepass_render(using_sdfgi || using_giprobe); + if (needs_pre_resolve) { + RENDER_TIMESTAMP("GI + Render Depth Pre-Pass (parallel)"); + } else { + RENDER_TIMESTAMP("Render Depth Pre-Pass"); + } + if (needs_pre_resolve) { + //pre clear the depth framebuffer, as AMD (and maybe others?) use compute for it, and barrier other compute shaders. + RD::get_singleton()->draw_list_begin(depth_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_CONTINUE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_CONTINUE, depth_pass_clear); + RD::get_singleton()->draw_list_end(); + //start compute processes here, so they run at the same time as depth pre-pass + _post_prepass_render(using_sdfgi || using_giprobe); + } - bool finish_depth = using_ssao || using_sdfgi || using_giprobe; - RenderListParameters render_list_params(render_list.elements, render_list.element_count, false, depth_pass_mode, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); RD::get_singleton()->draw_command_begin_label("Render Depth Pre-Pass"); - _render_list_with_threads(&render_list_params, depth_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, finish_depth ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE, depth_pass_clear); + + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_OPAQUE, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + + bool finish_depth = using_ssao || using_sdfgi || using_giprobe; + RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].element_info.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), false, depth_pass_mode, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + _render_list_with_threads(&render_list_params, depth_framebuffer, needs_pre_resolve ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, needs_pre_resolve ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_CLEAR, finish_depth ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE, needs_pre_resolve ? Vector<Color>() : depth_pass_clear); + RD::get_singleton()->draw_command_end_label(); + + if (needs_pre_resolve) { + _pre_resolve_render(using_sdfgi || using_giprobe); + } + if (render_buffer && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { RENDER_TIMESTAMP("Resolve Depth Pre-Pass"); - RD::get_singleton()->draw_command_insert_label("Resolve Depth Pre-Pass"); + RD::get_singleton()->draw_command_begin_label("Resolve Depth Pre-Pass"); if (depth_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS || depth_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE) { + if (needs_pre_resolve) { + RD::get_singleton()->barrier(RD::BARRIER_MASK_RASTER, RD::BARRIER_MASK_COMPUTE); + } static int texture_samples[RS::VIEWPORT_MSAA_MAX] = { 1, 2, 4, 8, 16 }; storage->get_effects()->resolve_gi(render_buffer->depth_msaa, render_buffer->normal_roughness_buffer_msaa, using_giprobe ? render_buffer->giprobe_buffer_msaa : RID(), render_buffer->depth, render_buffer->normal_roughness_buffer, using_giprobe ? render_buffer->giprobe_buffer : RID(), Vector2i(render_buffer->width, render_buffer->height), texture_samples[render_buffer->msaa]); } else if (finish_depth) { RD::get_singleton()->texture_resolve_multisample(render_buffer->depth_msaa, render_buffer->depth); } + RD::get_singleton()->draw_command_end_label(); } continue_depth = !finish_depth; } - if (using_ssao) { - _process_ssao(p_render_buffer, p_environment, render_buffer->normal_roughness_buffer, p_cam_projection); - } + _pre_opaque_render(using_ssao, using_sdfgi || using_giprobe, render_buffer ? render_buffer->normal_roughness_buffer : RID(), render_buffer ? render_buffer->giprobe_buffer : RID()); - if (using_sdfgi || using_giprobe) { - _process_gi(p_render_buffer, render_buffer->normal_roughness_buffer, render_buffer->giprobe_buffer, p_environment, p_cam_projection, p_cam_transform, p_gi_probes); - } + RD::get_singleton()->draw_command_begin_label("Render Opaque Pass"); + + scene_state.ubo.directional_light_count = _get_render_state_directional_light_count(); _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), p_render_buffer.is_valid()); RENDER_TIMESTAMP("Render Opaque Pass"); - RID rp_uniform_set = _setup_render_pass_uniform_set(p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_cluster_buffer, p_gi_probes, p_lightmaps, true); + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_OPAQUE, p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_cluster_buffer, p_gi_probes, p_lightmaps, true); bool can_continue_color = !scene_state.used_screen_texture && !using_ssr && !using_sss; bool can_continue_depth = !scene_state.used_depth_texture && !using_ssr && !using_sss; @@ -1747,10 +1868,8 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf } RID framebuffer = using_separate_specular ? opaque_specular_framebuffer : opaque_framebuffer; - RenderListParameters render_list_params(render_list.elements, render_list.element_count, false, using_separate_specular ? PASS_MODE_COLOR_SPECULAR : PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); - RD::get_singleton()->draw_command_begin_label("Render Opaque Pass"); - _render_list_with_threads(&render_list_params, framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, depth_pre_pass ? (continue_depth ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CONTINUE) : RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0); - RD::get_singleton()->draw_command_end_label(); + RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].element_info.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), false, using_separate_specular ? PASS_MODE_COLOR_SPECULAR : PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + _render_list_with_threads(&render_list_params, framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, depth_pre_pass ? (continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP) : RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0); if (will_continue_color && using_separate_specular) { // close the specular framebuffer, as it's no longer used RD::get_singleton()->draw_list_begin(render_buffer->specular_only_fb, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_CONTINUE); @@ -1758,6 +1877,8 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf } } + RD::get_singleton()->draw_command_end_label(); + if (debug_giprobes) { //debug giprobes bool will_continue_color = (can_continue_color || draw_sky || draw_sky_fog_only); @@ -1837,32 +1958,44 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf RENDER_TIMESTAMP("Render Transparent Pass"); - _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); + RD::get_singleton()->draw_command_begin_label("Render Transparent Pass"); - render_list.sort_by_reverse_depth_and_priority(true); + rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_ALPHA, p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_cluster_buffer, p_gi_probes, p_lightmaps, true); + + _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); { - RD::get_singleton()->draw_command_begin_label("Render Transparent Pass"); - RenderListParameters render_list_params(&render_list.elements[render_list.max_elements - render_list.alpha_element_count], render_list.alpha_element_count, false, PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + RenderListParameters render_list_params(render_list[RENDER_LIST_ALPHA].elements.ptr(), render_list[RENDER_LIST_ALPHA].element_info.ptr(), render_list[RENDER_LIST_ALPHA].elements.size(), false, PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); _render_list_with_threads(&render_list_params, alpha_framebuffer, can_continue_color ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, can_continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ); - RD::get_singleton()->draw_command_end_label(); } + RD::get_singleton()->draw_command_end_label(); + + RD::get_singleton()->draw_command_begin_label("Resolve"); + if (render_buffer && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { RD::get_singleton()->texture_resolve_multisample(render_buffer->color_msaa, render_buffer->color); } -} -void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold, const Rect2i &p_rect, bool p_flip_y, bool p_clear_region, bool p_begin, bool p_end) { - RENDER_TIMESTAMP("Setup Rendering Shadow"); + RD::get_singleton()->draw_command_end_label(); +} +void RendererSceneRenderForward::_render_shadow_begin() { + scene_state.shadow_passes.clear(); + RD::get_singleton()->draw_command_begin_label("Shadow Setup"); _update_render_base_uniform_set(); - render_pass++; + render_list[RENDER_LIST_SECONDARY].clear(); + scene_state.instance_data[RENDER_LIST_SECONDARY].clear(); +} +void RendererSceneRenderForward::_render_shadow_append(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold, const Rect2i &p_rect, bool p_flip_y, bool p_clear_region, bool p_begin, bool p_end) { + uint32_t shadow_pass_index = scene_state.shadow_passes.size(); + + SceneState::ShadowPass shadow_pass; scene_state.ubo.dual_paraboloid_side = p_use_dp_flip ? -1 : 1; - _setup_environment(RID(), RID(), p_projection, p_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), !p_flip_y, Color(), 0, p_zfar, false, p_use_pancake); + _setup_environment(RID(), RID(), p_projection, p_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), !p_flip_y, Color(), 0, p_zfar, false, p_use_pancake, shadow_pass_index); if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_DISABLE_LOD) { p_screen_lod_threshold = 0.0; @@ -1870,13 +2003,11 @@ void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, const PagedAr PassMode pass_mode = p_use_dp ? PASS_MODE_SHADOW_DP : PASS_MODE_SHADOW; - _fill_render_list(p_instances, pass_mode, p_projection, p_transform); - - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); - - RENDER_TIMESTAMP("Render Shadow"); - - render_list.sort_by_key(false); + uint32_t render_list_from = render_list[RENDER_LIST_SECONDARY].elements.size(); + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, p_projection, p_transform, false, false, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold, true); + uint32_t render_list_size = render_list[RENDER_LIST_SECONDARY].elements.size() - render_list_from; + render_list[RENDER_LIST_SECONDARY].sort_by_key_range(render_list_from, render_list_size); + _fill_instance_data(RENDER_LIST_SECONDARY, render_list_from, render_list_size, false); { //regular forward for now @@ -1884,49 +2015,87 @@ void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, const PagedAr if (p_flip_y) { flip_cull = !flip_cull; } - RD::get_singleton()->draw_command_begin_label("Render Shadow"); - RenderListParameters render_list_params(render_list.elements, render_list.element_count, flip_cull, pass_mode, true, rp_uniform_set, false, Vector2(), p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold); - _render_list_with_threads(&render_list_params, p_framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, p_begin ? (p_clear_region ? RD::INITIAL_ACTION_CLEAR_REGION : RD::INITIAL_ACTION_CLEAR) : RD::INITIAL_ACTION_CONTINUE, p_end ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE, Vector<Color>(), 1.0, 0, p_rect); - RD::get_singleton()->draw_command_end_label(); + + shadow_pass.element_from = render_list_from; + shadow_pass.element_count = render_list_size; + shadow_pass.flip_cull = flip_cull; + shadow_pass.pass_mode = pass_mode; + + shadow_pass.rp_uniform_set = RID(); //will be filled later when instance buffer is complete + shadow_pass.camera_plane = p_camera_plane; + shadow_pass.screen_lod_threshold = p_screen_lod_threshold; + shadow_pass.lod_distance_multiplier = p_lod_distance_multiplier; + + shadow_pass.framebuffer = p_framebuffer; + shadow_pass.initial_depth_action = p_begin ? (p_clear_region ? RD::INITIAL_ACTION_CLEAR_REGION : RD::INITIAL_ACTION_CLEAR) : (p_clear_region ? RD::INITIAL_ACTION_CLEAR_REGION_CONTINUE : RD::INITIAL_ACTION_CONTINUE); + shadow_pass.final_depth_action = p_end ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE; + shadow_pass.rect = p_rect; + + scene_state.shadow_passes.push_back(shadow_pass); } } +void RendererSceneRenderForward::_render_shadow_process() { + _update_instance_data_buffer(RENDER_LIST_SECONDARY); + //render shadows one after the other, so this can be done un-barriered and the driver can optimize (as well as allow us to run compute at the same time) + + for (uint32_t i = 0; i < scene_state.shadow_passes.size(); i++) { + //render passes need to be configured after instance buffer is done, since they need the latest version + SceneState::ShadowPass &shadow_pass = scene_state.shadow_passes[i]; + shadow_pass.rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>(), false, i); + } + + RD::get_singleton()->draw_command_end_label(); +} +void RendererSceneRenderForward::_render_shadow_end(uint32_t p_barrier) { + RD::get_singleton()->draw_command_begin_label("Shadow Render"); + + for (uint32_t i = 0; i < scene_state.shadow_passes.size(); i++) { + SceneState::ShadowPass &shadow_pass = scene_state.shadow_passes[i]; + RenderListParameters render_list_parameters(render_list[RENDER_LIST_SECONDARY].elements.ptr() + shadow_pass.element_from, render_list[RENDER_LIST_SECONDARY].element_info.ptr() + shadow_pass.element_from, shadow_pass.element_count, shadow_pass.flip_cull, shadow_pass.pass_mode, true, shadow_pass.rp_uniform_set, false, Vector2(), shadow_pass.camera_plane, shadow_pass.lod_distance_multiplier, shadow_pass.screen_lod_threshold, shadow_pass.element_from, RD::BARRIER_MASK_NO_BARRIER); + _render_list_with_threads(&render_list_parameters, shadow_pass.framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, shadow_pass.initial_depth_action, shadow_pass.final_depth_action, Vector<Color>(), 1.0, 0, shadow_pass.rect); + } + + if (p_barrier != RD::BARRIER_MASK_NO_BARRIER) { + RD::get_singleton()->barrier(RD::BARRIER_MASK_RASTER, p_barrier); + } + RD::get_singleton()->draw_command_end_label(); +} + void RendererSceneRenderForward::_render_particle_collider_heightfield(RID p_fb, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, const PagedArray<GeometryInstance *> &p_instances) { RENDER_TIMESTAMP("Setup Render Collider Heightfield"); - _update_render_base_uniform_set(); - - render_pass++; + RD::get_singleton()->draw_command_begin_label("Render Collider Heightfield"); + _update_render_base_uniform_set(); scene_state.ubo.dual_paraboloid_side = 0; _setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), true, Color(), 0, p_cam_projection.get_z_far(), false, false); PassMode pass_mode = PASS_MODE_SHADOW; - _fill_render_list(p_instances, pass_mode, p_cam_projection, p_cam_transform); + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, p_cam_projection, p_cam_transform); + render_list[RENDER_LIST_SECONDARY].sort_by_key(); + _fill_instance_data(RENDER_LIST_SECONDARY); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); RENDER_TIMESTAMP("Render Collider Heightfield"); - render_list.sort_by_key(false); - { //regular forward for now - RD::get_singleton()->draw_command_begin_label("Render Collider Heightfield"); - RenderListParameters render_list_params(render_list.elements, render_list.element_count, false, pass_mode, true, rp_uniform_set); + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), false, pass_mode, true, rp_uniform_set); _render_list_with_threads(&render_list_params, p_fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ); - RD::get_singleton()->draw_command_end_label(); } + RD::get_singleton()->draw_command_end_label(); } void RendererSceneRenderForward::_render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) { RENDER_TIMESTAMP("Setup Rendering Material"); - _update_render_base_uniform_set(); + RD::get_singleton()->draw_command_begin_label("Render Material"); - render_pass++; + _update_render_base_uniform_set(); scene_state.ubo.dual_paraboloid_side = 0; scene_state.ubo.material_uv2_mode = false; @@ -1934,16 +2103,16 @@ void RendererSceneRenderForward::_render_material(const Transform &p_cam_transfo _setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0); PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL; - _fill_render_list(p_instances, pass_mode, p_cam_projection, p_cam_transform); + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, p_cam_projection, p_cam_transform); + render_list[RENDER_LIST_SECONDARY].sort_by_key(); + _fill_instance_data(RENDER_LIST_SECONDARY); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); RENDER_TIMESTAMP("Render Material"); - render_list.sort_by_key(false); - { - RenderListParameters render_list_params(render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set); + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), true, pass_mode, true, rp_uniform_set); //regular forward for now Vector<Color> clear; clear.push_back(Color(0, 0, 0, 0)); @@ -1955,14 +2124,16 @@ void RendererSceneRenderForward::_render_material(const Transform &p_cam_transfo _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), &render_list_params, 0, render_list_params.element_count); RD::get_singleton()->draw_list_end(); } + + RD::get_singleton()->draw_command_end_label(); } void RendererSceneRenderForward::_render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) { RENDER_TIMESTAMP("Setup Rendering UV2"); - _update_render_base_uniform_set(); + RD::get_singleton()->draw_command_begin_label("Render UV2"); - render_pass++; + _update_render_base_uniform_set(); scene_state.ubo.dual_paraboloid_side = 0; scene_state.ubo.material_uv2_mode = true; @@ -1970,16 +2141,16 @@ void RendererSceneRenderForward::_render_uv2(const PagedArray<GeometryInstance * _setup_environment(RID(), RID(), CameraMatrix(), Transform(), RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0); PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL; - _fill_render_list(p_instances, pass_mode, CameraMatrix(), Transform()); + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, CameraMatrix(), Transform()); + render_list[RENDER_LIST_SECONDARY].sort_by_key(); + _fill_instance_data(RENDER_LIST_SECONDARY); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); RENDER_TIMESTAMP("Render Material"); - render_list.sort_by_key(false); - { - RenderListParameters render_list_params(render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set, true); + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), true, pass_mode, true, rp_uniform_set, true); //regular forward for now Vector<Color> clear; clear.push_back(Color(0, 0, 0, 0)); @@ -2015,23 +2186,24 @@ void RendererSceneRenderForward::_render_uv2(const PagedArray<GeometryInstance * RD::get_singleton()->draw_list_end(); } + + RD::get_singleton()->draw_command_end_label(); } void RendererSceneRenderForward::_render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<GeometryInstance *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) { RENDER_TIMESTAMP("Render SDFGI"); + RD::get_singleton()->draw_command_begin_label("Render SDFGI Voxel"); + _update_render_base_uniform_set(); RenderBufferDataForward *render_buffer = (RenderBufferDataForward *)render_buffers_get_data(p_render_buffers); ERR_FAIL_COND(!render_buffer); - render_pass++; - PassMode pass_mode = PASS_MODE_SDF; - _fill_render_list(p_instances, pass_mode, CameraMatrix(), Transform()); - render_list.sort_by_key(false); - - RID rp_uniform_set = _setup_sdfgi_render_pass_uniform_set(p_albedo_texture, p_emission_texture, p_emission_aniso_texture, p_geom_facing_texture); + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, CameraMatrix(), Transform()); + render_list[RENDER_LIST_SECONDARY].sort_by_key(); + _fill_instance_data(RENDER_LIST_SECONDARY); Vector3 half_extents = p_bounds.size * 0.5; Vector3 center = p_bounds.position + half_extents; @@ -2084,15 +2256,19 @@ void RendererSceneRenderForward::_render_sdfgi(RID p_render_buffers, const Vecto _setup_environment(RID(), RID(), camera_proj, cam_xform, RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0); + RID rp_uniform_set = _setup_sdfgi_render_pass_uniform_set(p_albedo_texture, p_emission_texture, p_emission_aniso_texture, p_geom_facing_texture); + Map<Size2i, RID>::Element *E = sdfgi_framebuffer_size_cache.find(fb_size); if (!E) { RID fb = RD::get_singleton()->framebuffer_create_empty(fb_size); E = sdfgi_framebuffer_size_cache.insert(fb_size, fb); } - RenderListParameters render_list_params(render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set, false); + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), true, pass_mode, true, rp_uniform_set, false); _render_list_with_threads(&render_list_params, E->get(), RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, Vector<Color>(), 1.0, 0, Rect2(), sbs); } + + RD::get_singleton()->draw_command_end_label(); } void RendererSceneRenderForward::_base_uniforms_changed() { @@ -2144,21 +2320,13 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { { RD::Uniform u; u.binding = 3; - u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.ids.push_back(scene_state.uniform_buffer); - uniforms.push_back(u); - } - - { - RD::Uniform u; - u.binding = 4; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.ids.push_back(get_omni_light_buffer()); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 5; + u.binding = 4; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.ids.push_back(get_spot_light_buffer()); uniforms.push_back(u); @@ -2166,35 +2334,35 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { { RD::Uniform u; - u.binding = 6; + u.binding = 5; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.ids.push_back(get_reflection_probe_buffer()); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 7; + u.binding = 6; u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; u.ids.push_back(get_directional_light_buffer()); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 8; + u.binding = 7; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.ids.push_back(scene_state.lightmap_buffer); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 9; + u.binding = 8; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.ids.push_back(scene_state.lightmap_capture_buffer); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 10; + u.binding = 9; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID decal_atlas = storage->decal_atlas_get_texture(); u.ids.push_back(decal_atlas); @@ -2202,7 +2370,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { } { RD::Uniform u; - u.binding = 11; + u.binding = 10; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID decal_atlas = storage->decal_atlas_get_texture_srgb(); u.ids.push_back(decal_atlas); @@ -2210,7 +2378,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { } { RD::Uniform u; - u.binding = 12; + u.binding = 11; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.ids.push_back(get_decal_buffer()); uniforms.push_back(u); @@ -2219,7 +2387,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 13; + u.binding = 12; u.ids.push_back(storage->global_variables_get_storage_buffer()); uniforms.push_back(u); } @@ -2227,7 +2395,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { if (!low_end) { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.binding = 14; + u.binding = 13; u.ids.push_back(sdfgi_get_ubo()); uniforms.push_back(u); } @@ -2236,10 +2404,9 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { } } -RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, bool p_use_directional_shadow_atlas) { - if (render_pass_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(render_pass_uniform_set)) { - RD::get_singleton()->free(render_pass_uniform_set); - } +RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RenderListType p_render_list, RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, bool p_use_directional_shadow_atlas, int p_index) { + //there should always be enough uniform buffers for render passes, otherwise bugs + ERR_FAIL_INDEX_V(p_index, (int)scene_state.uniform_buffers.size(), RID()); RenderBufferDataForward *rb = nullptr; if (p_render_buffers.is_valid()) { @@ -2251,6 +2418,24 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff Vector<RD::Uniform> uniforms; { + RD::Uniform u; + u.binding = 0; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.ids.push_back(scene_state.uniform_buffers[p_index]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + RID instance_buffer = scene_state.instance_buffer[p_render_list]; + if (instance_buffer == RID()) { + instance_buffer = default_vec4_xform_buffer; // any buffer will do since its not used + } + u.ids.push_back(instance_buffer); + uniforms.push_back(u); + } + { RID radiance_texture; if (p_radiance_texture.is_valid()) { radiance_texture = p_radiance_texture; @@ -2258,7 +2443,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff radiance_texture = storage->texture_rd_get_default(is_using_radiance_cubemap_array() ? RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_ARRAY_BLACK : RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_BLACK); } RD::Uniform u; - u.binding = 0; + u.binding = 2; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.push_back(radiance_texture); uniforms.push_back(u); @@ -2267,7 +2452,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RID ref_texture = p_reflection_atlas.is_valid() ? reflection_atlas_get_texture(p_reflection_atlas) : RID(); RD::Uniform u; - u.binding = 1; + u.binding = 3; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; if (ref_texture.is_valid()) { u.ids.push_back(ref_texture); @@ -2279,7 +2464,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 2; + u.binding = 4; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID texture; if (p_shadow_atlas.is_valid()) { @@ -2293,7 +2478,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 3; + u.binding = 5; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; if (p_use_directional_shadow_atlas && directional_shadow_get_texture().is_valid()) { u.ids.push_back(directional_shadow_get_texture()); @@ -2304,7 +2489,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 4; + u.binding = 6; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.resize(scene_state.max_lightmaps); RID default_tex = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE); @@ -2323,7 +2508,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 5; + u.binding = 7; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.resize(MAX_GI_PROBES); RID default_tex = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE); @@ -2344,7 +2529,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 6; + u.binding = 8; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; RID cb = p_cluster_buffer.is_valid() ? p_cluster_buffer : default_vec4_xform_buffer; u.ids.push_back(cb); @@ -2353,7 +2538,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 7; + u.binding = 9; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID texture = (false && rb && rb->depth.is_valid()) ? rb->depth : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE); u.ids.push_back(texture); @@ -2361,7 +2546,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 8; + u.binding = 10; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID bbt = rb ? render_buffers_get_back_buffer_texture(p_render_buffers) : RID(); RID texture = bbt.is_valid() ? bbt : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); @@ -2372,7 +2557,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff if (!low_end) { { RD::Uniform u; - u.binding = 9; + u.binding = 11; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID texture = rb && rb->normal_roughness_buffer.is_valid() ? rb->normal_roughness_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_NORMAL); u.ids.push_back(texture); @@ -2381,7 +2566,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 10; + u.binding = 12; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID aot = rb ? render_buffers_get_ao_texture(p_render_buffers) : RID(); RID texture = aot.is_valid() ? aot : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); @@ -2391,7 +2576,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 11; + u.binding = 13; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID ambient_buffer = p_render_buffers.is_valid() ? render_buffers_get_gi_ambient_texture(p_render_buffers) : RID(); RID texture = ambient_buffer.is_valid() ? ambient_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); @@ -2401,7 +2586,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 12; + u.binding = 14; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID reflection_buffer = p_render_buffers.is_valid() ? render_buffers_get_gi_reflection_texture(p_render_buffers) : RID(); RID texture = reflection_buffer.is_valid() ? reflection_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); @@ -2410,7 +2595,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 13; + u.binding = 15; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID t; if (rb && render_buffers_is_sdfgi_enabled(p_render_buffers)) { @@ -2423,7 +2608,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 14; + u.binding = 16; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; if (rb && render_buffers_is_sdfgi_enabled(p_render_buffers)) { u.ids.push_back(render_buffers_get_sdfgi_occlusion_texture(p_render_buffers)); @@ -2434,14 +2619,14 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 15; + u.binding = 17; u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; u.ids.push_back(rb ? render_buffers_get_gi_probe_buffer(p_render_buffers) : render_buffers_get_default_gi_probe_buffer()); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 16; + u.binding = 18; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID vfog = RID(); if (rb && render_buffers_has_volumetric_fog(p_render_buffers)) { @@ -2457,8 +2642,16 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } } - render_pass_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, default_shader_rd, RENDER_PASS_UNIFORM_SET); - return render_pass_uniform_set; + if (p_index >= (int)render_pass_uniform_sets.size()) { + render_pass_uniform_sets.resize(p_index + 1); + } + + if (render_pass_uniform_sets[p_index].is_valid() && RD::get_singleton()->uniform_set_is_valid(render_pass_uniform_sets[p_index])) { + RD::get_singleton()->free(render_pass_uniform_sets[p_index]); + } + + render_pass_uniform_sets[p_index] = RD::get_singleton()->uniform_set_create(uniforms, default_shader_rd, RENDER_PASS_UNIFORM_SET); + return render_pass_uniform_sets[p_index]; } RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albedo_texture, RID p_emission_texture, RID p_emission_aniso_texture, RID p_geom_facing_texture) { @@ -2469,10 +2662,24 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed Vector<RD::Uniform> uniforms; { + RD::Uniform u; + u.binding = 0; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.ids.push_back(scene_state.uniform_buffers[0]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(scene_state.instance_buffer[RENDER_LIST_SECONDARY]); + uniforms.push_back(u); + } + { // No radiance texture. RID radiance_texture = storage->texture_rd_get_default(is_using_radiance_cubemap_array() ? RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_ARRAY_BLACK : RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_BLACK); RD::Uniform u; - u.binding = 0; + u.binding = 2; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.push_back(radiance_texture); uniforms.push_back(u); @@ -2482,7 +2689,7 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed // No reflection atlas. RID ref_texture = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_ARRAY_BLACK); RD::Uniform u; - u.binding = 1; + u.binding = 3; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.push_back(ref_texture); uniforms.push_back(u); @@ -2491,7 +2698,7 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed { // No shadow atlas. RD::Uniform u; - u.binding = 2; + u.binding = 4; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID texture = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE); u.ids.push_back(texture); @@ -2501,7 +2708,7 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed { // No directional shadow atlas. RD::Uniform u; - u.binding = 3; + u.binding = 5; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID texture = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE); u.ids.push_back(texture); @@ -2511,7 +2718,7 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed { // No Lightmaps RD::Uniform u; - u.binding = 4; + u.binding = 6; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.resize(scene_state.max_lightmaps); RID default_tex = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE); @@ -2525,7 +2732,7 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed { // No GIProbes RD::Uniform u; - u.binding = 5; + u.binding = 7; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.resize(MAX_GI_PROBES); RID default_tex = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE); @@ -2538,7 +2745,7 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed { RD::Uniform u; - u.binding = 6; + u.binding = 8; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; RID cb = default_vec4_xform_buffer; u.ids.push_back(cb); @@ -2550,28 +2757,28 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 7; + u.binding = 9; u.ids.push_back(p_albedo_texture); uniforms.push_back(u); } { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 8; + u.binding = 10; u.ids.push_back(p_emission_texture); uniforms.push_back(u); } { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 9; + u.binding = 11; u.ids.push_back(p_emission_aniso_texture); uniforms.push_back(u); } { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 10; + u.binding = 12; u.ids.push_back(p_geom_facing_texture); uniforms.push_back(u); } @@ -2709,10 +2916,11 @@ void RendererSceneRenderForward::_geometry_instance_add_surface_with_material(Ge sdcache->sort.sort_key1 = 0; sdcache->sort.sort_key2 = 0; - sdcache->sort.surface_type = ginstance->data->base_type; - sdcache->sort.material_id = p_material_id; + sdcache->sort.surface_index = p_surface; + sdcache->sort.material_id_low = p_material_id & 0x3FFF; + sdcache->sort.material_id_hi = p_material_id >> 14; sdcache->sort.shader_id = p_shader_id; - sdcache->sort.geometry_id = p_mesh.get_local_index(); + sdcache->sort.geometry_id = p_mesh.get_local_index(); //only meshes can repeat anyway sdcache->sort.uses_forward_gi = ginstance->can_sdfgi; sdcache->sort.priority = p_material->priority; } @@ -2842,11 +3050,6 @@ void RendererSceneRenderForward::_geometry_instance_update(GeometryInstance *p_g //Fill push constant - ginstance->push_constant.instance_uniforms_ofs = ginstance->data->shader_parameters_offset >= 0 ? ginstance->data->shader_parameters_offset : 0; - ginstance->push_constant.layer_mask = ginstance->data->layer_mask; - ginstance->push_constant.flags = 0; - ginstance->push_constant.gi_offset = 0xFFFFFFFF; //disabled - bool store_transform = true; if (ginstance->data->base_type == RS::INSTANCE_MULTIMESH) { @@ -2903,21 +3106,10 @@ void RendererSceneRenderForward::_geometry_instance_update(GeometryInstance *p_g } } - if (store_transform) { - RendererStorageRD::store_transform(ginstance->data->transform, ginstance->push_constant.transform); - } else { - RendererStorageRD::store_transform(Transform(), ginstance->push_constant.transform); - } - + ginstance->store_transform_cache = store_transform; ginstance->can_sdfgi = false; - if (lightmap_instance_is_valid(ginstance->lightmap_instance)) { - ginstance->push_constant.gi_offset = ginstance->data->lightmap_slice_index << 16; - ginstance->push_constant.lightmap_uv_scale[0] = ginstance->data->lightmap_uv_scale.position.x; - ginstance->push_constant.lightmap_uv_scale[1] = ginstance->data->lightmap_uv_scale.position.y; - ginstance->push_constant.lightmap_uv_scale[2] = ginstance->data->lightmap_uv_scale.size.width; - ginstance->push_constant.lightmap_uv_scale[3] = ginstance->data->lightmap_uv_scale.size.height; - } else if (!low_end) { + if (!lightmap_instance_is_valid(ginstance->lightmap_instance) && !low_end) { if (ginstance->gi_probes[0].is_null() && (ginstance->data->use_baked_light || ginstance->data->use_dynamic_gi)) { ginstance->can_sdfgi = true; } @@ -3007,8 +3199,7 @@ void RendererSceneRenderForward::geometry_instance_set_mesh_instance(GeometryIns void RendererSceneRenderForward::geometry_instance_set_transform(GeometryInstance *p_geometry_instance, const Transform &p_transform, const AABB &p_aabb, const AABB &p_transformed_aabb) { GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); ERR_FAIL_COND(!ginstance); - RendererStorageRD::store_transform(p_transform, ginstance->push_constant.transform); - ginstance->data->transform = p_transform; + ginstance->transform = p_transform; ginstance->mirror = p_transform.basis.determinant() < 0; ginstance->data->aabb = p_aabb; ginstance->transformed_aabb = p_transformed_aabb; @@ -3043,8 +3234,8 @@ void RendererSceneRenderForward::geometry_instance_set_use_lightmap(GeometryInst GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); ERR_FAIL_COND(!ginstance); ginstance->lightmap_instance = p_lightmap_instance; - ginstance->data->lightmap_uv_scale = p_lightmap_uv_scale; - ginstance->data->lightmap_slice_index = p_lightmap_slice_index; + ginstance->lightmap_uv_scale = p_lightmap_uv_scale; + ginstance->lightmap_slice_index = p_lightmap_slice_index; _geometry_instance_mark_dirty(ginstance); } void RendererSceneRenderForward::geometry_instance_set_lightmap_capture(GeometryInstance *p_geometry_instance, const Color *p_sh9) { @@ -3067,7 +3258,7 @@ void RendererSceneRenderForward::geometry_instance_set_lightmap_capture(Geometry void RendererSceneRenderForward::geometry_instance_set_instance_shader_parameters_offset(GeometryInstance *p_geometry_instance, int32_t p_offset) { GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); ERR_FAIL_COND(!ginstance); - ginstance->data->shader_parameters_offset = p_offset; + ginstance->shader_parameters_offset = p_offset; _geometry_instance_mark_dirty(ginstance); } void RendererSceneRenderForward::geometry_instance_set_cast_double_sided_shadows(GeometryInstance *p_geometry_instance, bool p_enable) { @@ -3081,8 +3272,7 @@ void RendererSceneRenderForward::geometry_instance_set_cast_double_sided_shadows void RendererSceneRenderForward::geometry_instance_set_layer_mask(GeometryInstance *p_geometry_instance, uint32_t p_layer_mask) { GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); ERR_FAIL_COND(!ginstance); - ginstance->data->layer_mask = p_layer_mask; - ginstance->push_constant.layer_mask = p_layer_mask; + ginstance->layer_mask = p_layer_mask; } void RendererSceneRenderForward::geometry_instance_free(GeometryInstance *p_geometry_instance) { @@ -3114,7 +3304,7 @@ void RendererSceneRenderForward::geometry_instance_pair_decal_instances(Geometry Transform RendererSceneRenderForward::geometry_instance_get_transform(GeometryInstance *p_instance) { GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_instance); ERR_FAIL_COND_V(!ginstance, Transform()); - return ginstance->data->transform; + return ginstance->transform; } AABB RendererSceneRenderForward::geometry_instance_get_aabb(GeometryInstance *p_instance) { GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_instance); @@ -3382,13 +3572,6 @@ RendererSceneRenderForward::RendererSceneRenderForward(RendererStorageRD *p_stor shader.compiler.initialize(actions); } - //render list - render_list.max_elements = GLOBAL_DEF_RST("rendering/limits/rendering/max_renderable_elements", (int)128000); - render_list.init(); - render_pass = 0; - - scene_state.uniform_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(SceneState::UBO)); - { //default material and shader default_shader = storage->shader_create(); @@ -3442,8 +3625,10 @@ RendererSceneRenderForward::~RendererSceneRenderForward() { directional_shadow_atlas_set_size(0); //clear base uniform set if still valid - if (render_pass_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(render_pass_uniform_set)) { - RD::get_singleton()->free(render_pass_uniform_set); + for (uint32_t i = 0; i < render_pass_uniform_sets.size(); i++) { + if (render_pass_uniform_sets[i].is_valid() && RD::get_singleton()->uniform_set_is_valid(render_pass_uniform_sets[i])) { + RD::get_singleton()->free(render_pass_uniform_sets[i]); + } } if (sdfgi_pass_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(sdfgi_pass_uniform_set)) { @@ -3462,9 +3647,16 @@ RendererSceneRenderForward::~RendererSceneRenderForward() { storage->free(default_material); { - RD::get_singleton()->free(scene_state.uniform_buffer); + for (uint32_t i = 0; i < scene_state.uniform_buffers.size(); i++) { + RD::get_singleton()->free(scene_state.uniform_buffers[i]); + } RD::get_singleton()->free(scene_state.lightmap_buffer); RD::get_singleton()->free(scene_state.lightmap_capture_buffer); + for (uint32_t i = 0; i < RENDER_LIST_MAX; i++) { + if (scene_state.instance_buffer[i] != RID()) { + RD::get_singleton()->free(scene_state.instance_buffer[i]); + } + } memdelete_arr(scene_state.lightmap_captures); } diff --git a/servers/rendering/renderer_rd/renderer_scene_render_forward.h b/servers/rendering/renderer_rd/renderer_scene_render_forward.h index 0b57c7f76c..af78c50fda 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_forward.h +++ b/servers/rendering/renderer_rd/renderer_scene_render_forward.h @@ -50,6 +50,15 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { MAX_GI_PROBES = 8, MAX_LIGHTMAPS = 8, MAX_GI_PROBES_PER_INSTANCE = 2, + INSTANCE_DATA_BUFFER_MIN_SIZE = 4096 + }; + + enum RenderListType { + RENDER_LIST_OPAQUE, //used for opaque objects + RENDER_LIST_ALPHA, //used for transparent objects + RENDER_LIST_SECONDARY, //used for shadows and other objects + RENDER_LIST_MAX + }; /* Scene Shader */ @@ -245,7 +254,7 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { RID shadow_sampler; RID render_base_uniform_set; - RID render_pass_uniform_set; + LocalVector<RID> render_pass_uniform_sets; RID sdfgi_pass_uniform_set; uint64_t lightmap_texture_array_version = 0xFFFFFFFF; @@ -257,7 +266,58 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { void _update_render_base_uniform_set(); RID _setup_sdfgi_render_pass_uniform_set(RID p_albedo_texture, RID p_emission_texture, RID p_emission_aniso_texture, RID p_geom_facing_texture); - RID _setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, bool p_use_directional_shadow_atlas = false); + RID _setup_render_pass_uniform_set(RenderListType p_render_list, RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, bool p_use_directional_shadow_atlas = false, int p_index = 0); + + enum PassMode { + PASS_MODE_COLOR, + PASS_MODE_COLOR_SPECULAR, + PASS_MODE_COLOR_TRANSPARENT, + PASS_MODE_SHADOW, + PASS_MODE_SHADOW_DP, + PASS_MODE_DEPTH, + PASS_MODE_DEPTH_NORMAL_ROUGHNESS, + PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE, + PASS_MODE_DEPTH_MATERIAL, + PASS_MODE_SDF, + }; + + struct GeometryInstanceSurfaceDataCache; + struct RenderElementInfo; + + struct RenderListParameters { + GeometryInstanceSurfaceDataCache **elements = nullptr; + RenderElementInfo *element_info = nullptr; + int element_count = 0; + bool reverse_cull = false; + PassMode pass_mode = PASS_MODE_COLOR; + bool no_gi = false; + RID render_pass_uniform_set; + bool force_wireframe = false; + Vector2 uv_offset; + Plane lod_plane; + float lod_distance_multiplier = 0.0; + float screen_lod_threshold = 0.0; + RD::FramebufferFormatID framebuffer_format = 0; + uint32_t element_offset = 0; + uint32_t barrier = RD::BARRIER_MASK_ALL; + + RenderListParameters(GeometryInstanceSurfaceDataCache **p_elements, RenderElementInfo *p_element_info, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), const Plane &p_lod_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, uint32_t p_element_offset = 0, uint32_t p_barrier = RD::BARRIER_MASK_ALL) { + elements = p_elements; + element_info = p_element_info; + element_count = p_element_count; + reverse_cull = p_reverse_cull; + pass_mode = p_pass_mode; + no_gi = p_no_gi; + render_pass_uniform_set = p_render_pass_uniform_set; + force_wireframe = p_force_wireframe; + uv_offset = p_uv_offset; + lod_plane = p_lod_plane; + lod_distance_multiplier = p_lod_distance_multiplier; + screen_lod_threshold = p_screen_lod_threshold; + element_offset = p_element_offset; + barrier = p_barrier; + } + }; struct LightmapData { float normal_xform[12]; @@ -367,9 +427,24 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { uint32_t pancake_shadows; }; + struct PushConstant { + uint32_t base_index; // + uint32_t uv_offset; //packed + uint32_t pad[2]; + }; + + struct InstanceData { + float transform[16]; + uint32_t flags; + uint32_t instance_uniforms_ofs; //base offset in global buffer for instance variables + uint32_t gi_offset; //GI information when using lightmapping (VCT or lightmap index) + uint32_t layer_mask; + float lightmap_uv_scale[4]; + }; + UBO ubo; - RID uniform_buffer; + LocalVector<RID> uniform_buffers; LightmapData lightmaps[MAX_LIGHTMAPS]; RID lightmap_ids[MAX_LIGHTMAPS]; @@ -378,6 +453,10 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { uint32_t max_lightmaps; RID lightmap_buffer; + RID instance_buffer[RENDER_LIST_MAX]; + uint32_t instance_buffer_size[RENDER_LIST_MAX] = { 0, 0, 0 }; + LocalVector<InstanceData> instance_data[RENDER_LIST_MAX]; + LightmapCaptureData *lightmap_captures; uint32_t max_lightmap_captures; RID lightmap_capture_buffer; @@ -390,10 +469,29 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { bool used_depth_texture = false; bool used_sss = false; + struct ShadowPass { + uint32_t element_from; + uint32_t element_count; + bool flip_cull; + PassMode pass_mode; + + RID rp_uniform_set; + Plane camera_plane; + float lod_distance_multiplier; + float screen_lod_threshold; + + RID framebuffer; + RD::InitialAction initial_depth_action; + RD::FinalAction final_depth_action; + Rect2i rect; + }; + + LocalVector<ShadowPass> shadow_passes; + } scene_state; static RendererSceneRenderForward *singleton; - uint64_t render_pass; + double time; RID default_shader; RID default_material; @@ -407,51 +505,15 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { RID default_vec4_xform_buffer; RID default_vec4_xform_uniform_set; - enum PassMode { - PASS_MODE_COLOR, - PASS_MODE_COLOR_SPECULAR, - PASS_MODE_COLOR_TRANSPARENT, - PASS_MODE_SHADOW, - PASS_MODE_SHADOW_DP, - PASS_MODE_DEPTH, - PASS_MODE_DEPTH_NORMAL_ROUGHNESS, - PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE, - PASS_MODE_DEPTH_MATERIAL, - PASS_MODE_SDF, - }; - - void _setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false); + void _setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false, int p_index = 0); void _setup_giprobes(const PagedArray<RID> &p_giprobes); void _setup_lightmaps(const PagedArray<RID> &p_lightmaps, const Transform &p_cam_transform); - struct GeometryInstanceSurfaceDataCache; - - struct RenderListParameters { - GeometryInstanceSurfaceDataCache **elements = nullptr; - int element_count = 0; - bool reverse_cull = false; - PassMode pass_mode = PASS_MODE_COLOR; - bool no_gi = false; - RID render_pass_uniform_set; - bool force_wireframe = false; - Vector2 uv_offset; - Plane lod_plane; - float lod_distance_multiplier = 0.0; - float screen_lod_threshold = 0.0; - RD::FramebufferFormatID framebuffer_format = 0; - RenderListParameters(GeometryInstanceSurfaceDataCache **p_elements, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), const Plane &p_lod_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0) { - elements = p_elements; - element_count = p_element_count; - reverse_cull = p_reverse_cull; - pass_mode = p_pass_mode; - no_gi = p_no_gi; - render_pass_uniform_set = p_render_pass_uniform_set; - force_wireframe = p_force_wireframe; - uv_offset = p_uv_offset; - lod_plane = p_lod_plane; - lod_distance_multiplier = p_lod_distance_multiplier; - screen_lod_threshold = p_screen_lod_threshold; - } + struct RenderElementInfo { + uint32_t repeat : 22; + uint32_t uses_lightmap : 1; + uint32_t uses_forward_gi : 1; + uint32_t lod_index : 8; }; template <PassMode p_pass_mode> @@ -465,7 +527,9 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { uint32_t render_list_thread_threshold = 500; - void _fill_render_list(const PagedArray<GeometryInstance *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi = false, bool p_using_opaque_gi = false); + void _update_instance_data_buffer(RenderListType p_render_list); + void _fill_instance_data(RenderListType p_render_list, uint32_t p_offset = 0, int32_t p_max_elements = -1, bool p_update_buffer = true); + void _fill_render_list(RenderListType p_render_list, const PagedArray<GeometryInstance *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi = false, bool p_using_opaque_gi = false, const Plane &p_lod_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, bool p_append = false); Map<Size2i, RID> sdfgi_framebuffer_size_cache; @@ -493,14 +557,17 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { union { struct { - uint32_t geometry_id; - uint32_t material_id; - uint32_t shader_id; - uint32_t surface_type : 4; - uint32_t uses_forward_gi : 1; //set during addition - uint32_t uses_lightmap : 1; //set during addition - uint32_t depth_layer : 4; //set during addition - uint32_t priority : 8; + uint64_t lod_index : 8; + uint64_t surface_index : 10; + uint64_t geometry_id : 32; + uint64_t material_id_low : 14; + + uint64_t material_id_hi : 18; + uint64_t shader_id : 32; + uint64_t uses_forward_gi : 1; + uint64_t uses_lightmap : 1; + uint64_t depth_layer : 4; + uint64_t priority : 8; }; struct { uint64_t sort_key1; @@ -532,20 +599,20 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { float lod_model_scale = 1.0; AABB transformed_aabb; //needed for LOD float depth = 0; - struct PushConstant { - float transform[16]; - uint32_t flags; - uint32_t instance_uniforms_ofs; //base offset in global buffer for instance variables - uint32_t gi_offset; //GI information when using lightmapping (VCT or lightmap index) - uint32_t layer_mask; - float lightmap_uv_scale[4]; - } push_constant; + uint32_t gi_offset_cache = 0; + uint32_t flags_cache = 0; + bool store_transform_cache = true; + int32_t shader_parameters_offset = -1; + uint32_t lightmap_slice_index; + Rect2 lightmap_uv_scale; + uint32_t layer_mask = 1; RID transforms_uniform_set; uint32_t instance_count = 0; RID mesh_instance; bool can_sdfgi = false; //used during setup uint32_t base_flags = 0; + Transform transform; RID gi_probes[MAX_GI_PROBES_PER_INSTANCE]; RID lightmap_instance; GeometryInstanceLightmapSH *lightmap_sh = nullptr; @@ -558,21 +625,14 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { RS::InstanceType base_type; RID skeleton; - - uint32_t layer_mask = 1; - Vector<RID> surface_materials; RID material_override; - Transform transform; AABB aabb; - int32_t shader_parameters_offset = -1; bool use_dynamic_gi = false; bool use_baked_light = false; bool cast_double_sided_shaodows = false; bool mirror = false; - Rect2 lightmap_uv_scale; - uint32_t lightmap_slice_index = 0; bool dirty_dependencies = false; RendererStorage::DependencyTracker dependency_tracker; @@ -604,16 +664,12 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { /* Render List */ struct RenderList { - int max_elements; - - GeometryInstanceSurfaceDataCache **elements = nullptr; - - int element_count; - int alpha_element_count; + LocalVector<GeometryInstanceSurfaceDataCache *> elements; + LocalVector<RenderElementInfo> element_info; void clear() { - element_count = 0; - alpha_element_count = 0; + elements.clear(); + element_info.clear(); } //should eventually be replaced by radix @@ -624,13 +680,14 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { } }; - void sort_by_key(bool p_alpha) { + void sort_by_key() { SortArray<GeometryInstanceSurfaceDataCache *, SortByKey> sorter; - if (p_alpha) { - sorter.sort(&elements[max_elements - alpha_element_count], alpha_element_count); - } else { - sorter.sort(elements, element_count); - } + sorter.sort(elements.ptr(), elements.size()); + } + + void sort_by_key_range(uint32_t p_from, uint32_t p_size) { + SortArray<GeometryInstanceSurfaceDataCache *, SortByKey> sorter; + sorter.sort(elements.ptr() + p_from, p_size); } struct SortByDepth { @@ -639,14 +696,10 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { } }; - void sort_by_depth(bool p_alpha) { //used for shadows + void sort_by_depth() { //used for shadows SortArray<GeometryInstanceSurfaceDataCache *, SortByDepth> sorter; - if (p_alpha) { - sorter.sort(&elements[max_elements - alpha_element_count], alpha_element_count); - } else { - sorter.sort(elements, element_count); - } + sorter.sort(elements.ptr(), elements.size()); } struct SortByReverseDepthAndPriority { @@ -658,50 +711,24 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { void sort_by_reverse_depth_and_priority(bool p_alpha) { //used for alpha SortArray<GeometryInstanceSurfaceDataCache *, SortByReverseDepthAndPriority> sorter; - if (p_alpha) { - sorter.sort(&elements[max_elements - alpha_element_count], alpha_element_count); - } else { - sorter.sort(elements, element_count); - } + sorter.sort(elements.ptr(), elements.size()); } _FORCE_INLINE_ void add_element(GeometryInstanceSurfaceDataCache *p_element) { - if (element_count + alpha_element_count >= max_elements) { - return; - } - elements[element_count] = p_element; - element_count++; - } - - _FORCE_INLINE_ void add_alpha_element(GeometryInstanceSurfaceDataCache *p_element) { - if (element_count + alpha_element_count >= max_elements) { - return; - } - int idx = max_elements - alpha_element_count - 1; - elements[idx] = p_element; - alpha_element_count++; - } - - void init() { - element_count = 0; - alpha_element_count = 0; - elements = memnew_arr(GeometryInstanceSurfaceDataCache *, max_elements); - } - - RenderList() { - max_elements = 0; - } - - ~RenderList() { - memdelete_arr(elements); + elements.push_back(p_element); } }; - RenderList render_list; + RenderList render_list[RENDER_LIST_MAX]; protected: - virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_lod_threshold); - virtual void _render_shadow(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true); + virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_lod_threshold); + + virtual void _render_shadow_begin(); + virtual void _render_shadow_append(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true); + virtual void _render_shadow_process(); + virtual void _render_shadow_end(uint32_t p_barrier = RD::BARRIER_MASK_ALL); + virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region); virtual void _render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region); virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<GeometryInstance *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture); diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp index d5c9ccd956..3035124cca 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp @@ -1148,162 +1148,71 @@ void RendererSceneRenderRD::_sdfgi_update_cascades(RID p_render_buffers) { cascade_data[i].pad = 0; } - RD::get_singleton()->buffer_update(rb->sdfgi->cascades_ubo, 0, sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES, cascade_data); + RD::get_singleton()->buffer_update(rb->sdfgi->cascades_ubo, 0, sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES, cascade_data, RD::BARRIER_MASK_COMPUTE); } -void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_environment, const Vector<RID> &p_directional_lights, const RID *p_positional_light_instances, uint32_t p_positional_light_count) { +void RendererSceneRenderRD::_sdfgi_update_light(RID p_render_buffers, RID p_environment) { RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(rb == nullptr); if (rb->sdfgi == nullptr) { return; } - Environment *env = environment_owner.getornull(p_environment); - - RENDER_TIMESTAMP(">SDFGI Update Probes"); - - /* Update Cascades UBO */ - _sdfgi_update_cascades(p_render_buffers); - /* Update Dynamic Lights Buffer */ - - RENDER_TIMESTAMP("Update Lights"); - /* Update dynamic lights */ + RD::get_singleton()->draw_command_begin_label("SDFGI Update dynamic Light"); - { - int32_t cascade_light_count[SDFGI::MAX_CASCADES]; - - for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { - SDFGI::Cascade &cascade = rb->sdfgi->cascades[i]; - - SDGIShader::Light lights[SDFGI::MAX_DYNAMIC_LIGHTS]; - uint32_t idx = 0; - for (uint32_t j = 0; j < (uint32_t)p_directional_lights.size(); j++) { - if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { - break; - } - - LightInstance *li = light_instance_owner.getornull(p_directional_lights[j]); - ERR_CONTINUE(!li); + /* Update dynamic light */ - if (storage->light_directional_is_sky_only(li->light)) { - continue; - } - - Vector3 dir = -li->transform.basis.get_axis(Vector3::AXIS_Z); - dir.y *= rb->sdfgi->y_mult; - dir.normalize(); - lights[idx].direction[0] = dir.x; - lights[idx].direction[1] = dir.y; - lights[idx].direction[2] = dir.z; - Color color = storage->light_get_color(li->light); - color = color.to_linear(); - lights[idx].color[0] = color.r; - lights[idx].color[1] = color.g; - lights[idx].color[2] = color.b; - lights[idx].type = RS::LIGHT_DIRECTIONAL; - lights[idx].energy = storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY); - lights[idx].has_shadow = storage->light_has_shadow(li->light); - - idx++; - } - - AABB cascade_aabb; - cascade_aabb.position = Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + cascade.position)) * cascade.cell_size; - cascade_aabb.size = Vector3(1, 1, 1) * rb->sdfgi->cascade_size * cascade.cell_size; - - for (uint32_t j = 0; j < p_positional_light_count; j++) { - if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { - break; - } - - LightInstance *li = light_instance_owner.getornull(p_positional_light_instances[j]); - ERR_CONTINUE(!li); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.direct_light_pipeline[SDGIShader::DIRECT_LIGHT_MODE_DYNAMIC]); - uint32_t max_sdfgi_cascade = storage->light_get_max_sdfgi_cascade(li->light); - if (i > max_sdfgi_cascade) { - continue; - } + SDGIShader::DirectLightPushConstant push_constant; - if (!cascade_aabb.intersects(li->aabb)) { - continue; - } + push_constant.grid_size[0] = rb->sdfgi->cascade_size; + push_constant.grid_size[1] = rb->sdfgi->cascade_size; + push_constant.grid_size[2] = rb->sdfgi->cascade_size; + push_constant.max_cascades = rb->sdfgi->cascades.size(); + push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; + push_constant.multibounce = rb->sdfgi->uses_multibounce; + push_constant.y_mult = rb->sdfgi->y_mult; - Vector3 dir = -li->transform.basis.get_axis(Vector3::AXIS_Z); - //faster to not do this here - //dir.y *= rb->sdfgi->y_mult; - //dir.normalize(); - lights[idx].direction[0] = dir.x; - lights[idx].direction[1] = dir.y; - lights[idx].direction[2] = dir.z; - Vector3 pos = li->transform.origin; - pos.y *= rb->sdfgi->y_mult; - lights[idx].position[0] = pos.x; - lights[idx].position[1] = pos.y; - lights[idx].position[2] = pos.z; - Color color = storage->light_get_color(li->light); - color = color.to_linear(); - lights[idx].color[0] = color.r; - lights[idx].color[1] = color.g; - lights[idx].color[2] = color.b; - lights[idx].type = storage->light_get_type(li->light); - lights[idx].energy = storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY); - lights[idx].has_shadow = storage->light_has_shadow(li->light); - lights[idx].attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_ATTENUATION); - lights[idx].radius = storage->light_get_param(li->light, RS::LIGHT_PARAM_RANGE); - lights[idx].spot_angle = Math::deg2rad(storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ANGLE)); - lights[idx].spot_attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ATTENUATION); + for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { + SDFGI::Cascade &cascade = rb->sdfgi->cascades[i]; + push_constant.light_count = rb->sdfgi->cascade_dynamic_light_count[i]; + push_constant.cascade = i; - idx++; - } + if (rb->sdfgi->cascades[i].all_dynamic_lights_dirty || sdfgi_frames_to_update_light == RS::ENV_SDFGI_UPDATE_LIGHT_IN_1_FRAME) { + push_constant.process_offset = 0; + push_constant.process_increment = 1; + } else { + static uint32_t frames_to_update_table[RS::ENV_SDFGI_UPDATE_LIGHT_MAX] = { + 1, 2, 4, 8, 16 + }; - if (idx > 0) { - RD::get_singleton()->buffer_update(cascade.lights_buffer, 0, idx * sizeof(SDGIShader::Light), lights); - } + uint32_t frames_to_update = frames_to_update_table[sdfgi_frames_to_update_light]; - cascade_light_count[i] = idx; + push_constant.process_offset = RSG::rasterizer->get_frame_number() % frames_to_update; + push_constant.process_increment = frames_to_update; } + rb->sdfgi->cascades[i].all_dynamic_lights_dirty = false; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.direct_light_pipeline[SDGIShader::DIRECT_LIGHT_MODE_DYNAMIC]); - - SDGIShader::DirectLightPushConstant push_constant; - - push_constant.grid_size[0] = rb->sdfgi->cascade_size; - push_constant.grid_size[1] = rb->sdfgi->cascade_size; - push_constant.grid_size[2] = rb->sdfgi->cascade_size; - push_constant.max_cascades = rb->sdfgi->cascades.size(); - push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; - push_constant.multibounce = rb->sdfgi->uses_multibounce; - push_constant.y_mult = rb->sdfgi->y_mult; - - for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { - SDFGI::Cascade &cascade = rb->sdfgi->cascades[i]; - push_constant.light_count = cascade_light_count[i]; - push_constant.cascade = i; - - if (rb->sdfgi->cascades[i].all_dynamic_lights_dirty || sdfgi_frames_to_update_light == RS::ENV_SDFGI_UPDATE_LIGHT_IN_1_FRAME) { - push_constant.process_offset = 0; - push_constant.process_increment = 1; - } else { - static uint32_t frames_to_update_table[RS::ENV_SDFGI_UPDATE_LIGHT_MAX] = { - 1, 2, 4, 8, 16 - }; - - uint32_t frames_to_update = frames_to_update_table[sdfgi_frames_to_update_light]; - - push_constant.process_offset = RSG::rasterizer->get_frame_number() % frames_to_update; - push_constant.process_increment = frames_to_update; - } - rb->sdfgi->cascades[i].all_dynamic_lights_dirty = false; + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascade.sdf_direct_light_uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::DirectLightPushConstant)); + RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cascade.solid_cell_dispatch_buffer, 0); + } + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->draw_command_end_label(); +} - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascade.sdf_direct_light_uniform_set, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::DirectLightPushConstant)); - RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cascade.solid_cell_dispatch_buffer, 0); - } - RD::get_singleton()->compute_list_end(); +void RendererSceneRenderRD::_sdfgi_update_probes(RID p_render_buffers, RID p_environment) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(rb == nullptr); + if (rb->sdfgi == nullptr) { + return; } - RENDER_TIMESTAMP("Raytrace"); + RD::get_singleton()->draw_command_begin_label("SDFGI Update Probes"); + + Environment *env = environment_owner.getornull(p_environment); SDGIShader::IntegratePushConstant push_constant; push_constant.grid_size[1] = rb->sdfgi->cascade_size; @@ -1372,7 +1281,7 @@ void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_envi rb->sdfgi->render_pass++; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(true); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.integrate_pipeline[SDGIShader::INTEGRATE_MODE_PROCESS]); int32_t probe_divisor = rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR; @@ -1386,14 +1295,47 @@ void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_envi RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sky_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1); + } + + //end later after raster to avoid barriering on layout changes + //RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); + + RD::get_singleton()->draw_command_end_label(); +} + +void RendererSceneRenderRD::_sdfgi_store_probes(RID p_render_buffers) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(rb == nullptr); + if (rb->sdfgi == nullptr) { + return; } - RD::get_singleton()->compute_list_add_barrier(compute_list); //wait until done + RD::get_singleton()->barrier(RD::BARRIER_MASK_COMPUTE, RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->draw_command_begin_label("SDFGI Store Probes"); + + SDGIShader::IntegratePushConstant push_constant; + push_constant.grid_size[1] = rb->sdfgi->cascade_size; + push_constant.grid_size[2] = rb->sdfgi->cascade_size; + push_constant.grid_size[0] = rb->sdfgi->cascade_size; + push_constant.max_cascades = rb->sdfgi->cascades.size(); + push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; + push_constant.history_index = rb->sdfgi->render_pass % rb->sdfgi->history_size; + push_constant.history_size = rb->sdfgi->history_size; + static const uint32_t ray_count[RS::ENV_SDFGI_RAY_COUNT_MAX] = { 4, 8, 16, 32, 64, 96, 128 }; + push_constant.ray_count = ray_count[sdfgi_ray_count]; + push_constant.ray_bias = rb->sdfgi->probe_bias; + push_constant.image_size[0] = rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count; + push_constant.image_size[1] = rb->sdfgi->probe_axis_count; + push_constant.store_ambient_texture = false; + + push_constant.sky_mode = 0; + push_constant.y_mult = rb->sdfgi->y_mult; // Then store values into the lightprobe texture. Separating these steps has a small performance hit, but it allows for multiple bounces RENDER_TIMESTAMP("Average Probes"); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.integrate_pipeline[SDGIShader::INTEGRATE_MODE_STORE]); //convert to octahedral to store @@ -1403,20 +1345,22 @@ void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_envi for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { push_constant.cascade = i; RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[i].integrate_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdfgi_shader.integrate_default_sky_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1); } - RD::get_singleton()->compute_list_end(); + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); - RENDER_TIMESTAMP("<SDFGI Update Probes"); + RD::get_singleton()->draw_command_end_label(); } - void RendererSceneRenderRD::_setup_giprobes(RID p_render_buffers, const Transform &p_transform, const PagedArray<RID> &p_gi_probes, uint32_t &r_gi_probes_used) { r_gi_probes_used = 0; RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(rb == nullptr); + RD::get_singleton()->draw_command_begin_label("GIProbes Setup"); + RID gi_probe_buffer = render_buffers_get_gi_probe_buffer(p_render_buffers); GI::GIProbeData gi_probe_data[RenderBuffers::MAX_GIPROBES]; @@ -1500,80 +1444,25 @@ void RendererSceneRenderRD::_setup_giprobes(RID p_render_buffers, const Transfor } if (p_gi_probes.size() > 0) { - RD::get_singleton()->buffer_update(gi_probe_buffer, 0, sizeof(GI::GIProbeData) * MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()), gi_probe_data); + RD::get_singleton()->buffer_update(gi_probe_buffer, 0, sizeof(GI::GIProbeData) * MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()), gi_probe_data, RD::BARRIER_MASK_COMPUTE); } -} -void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, const PagedArray<RID> &p_gi_probes) { - RENDER_TIMESTAMP("Render GI"); + RD::get_singleton()->draw_command_end_label(); +} +void RendererSceneRenderRD::_pre_process_gi(RID p_render_buffers, const Transform &p_transform) { + // Do the required buffer transfers and setup before the depth-pre pass, this way GI can + // run in parallel during depth-pre pass and shadow rendering. RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(rb == nullptr); - Environment *env = environment_owner.getornull(p_environment); - - if (rb->ambient_buffer.is_null() || rb->using_half_size_gi != gi.half_resolution) { - if (rb->ambient_buffer.is_valid()) { - RD::get_singleton()->free(rb->ambient_buffer); - RD::get_singleton()->free(rb->reflection_buffer); - } - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; - tf.width = rb->width; - tf.height = rb->height; - if (gi.half_resolution) { - tf.width >>= 1; - tf.height >>= 1; - } - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - rb->reflection_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); - rb->ambient_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); - rb->using_half_size_gi = gi.half_resolution; - - _render_buffers_uniform_set_changed(p_render_buffers); - } - - GI::PushConstant push_constant; - - push_constant.screen_size[0] = rb->width; - push_constant.screen_size[1] = rb->height; - push_constant.z_near = p_projection.get_z_near(); - push_constant.z_far = p_projection.get_z_far(); - push_constant.orthogonal = p_projection.is_orthogonal(); - push_constant.proj_info[0] = -2.0f / (rb->width * p_projection.matrix[0][0]); - push_constant.proj_info[1] = -2.0f / (rb->height * p_projection.matrix[1][1]); - push_constant.proj_info[2] = (1.0f - p_projection.matrix[0][2]) / p_projection.matrix[0][0]; - push_constant.proj_info[3] = (1.0f + p_projection.matrix[1][2]) / p_projection.matrix[1][1]; - push_constant.max_giprobes = MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()); - push_constant.high_quality_vct = gi_probe_quality == RS::GI_PROBE_QUALITY_HIGH; - - bool use_sdfgi = rb->sdfgi != nullptr; - bool use_giprobes = push_constant.max_giprobes > 0; + /* Update Cascades UBO */ - if (env) { - push_constant.ao_color[0] = env->ao_color.r; - push_constant.ao_color[1] = env->ao_color.g; - push_constant.ao_color[2] = env->ao_color.b; - } else { - push_constant.ao_color[0] = 0; - push_constant.ao_color[1] = 0; - push_constant.ao_color[2] = 0; - } + if (rb->sdfgi) { + /* Update general SDFGI Buffer */ - push_constant.cam_rotation[0] = p_transform.basis[0][0]; - push_constant.cam_rotation[1] = p_transform.basis[1][0]; - push_constant.cam_rotation[2] = p_transform.basis[2][0]; - push_constant.cam_rotation[3] = 0; - push_constant.cam_rotation[4] = p_transform.basis[0][1]; - push_constant.cam_rotation[5] = p_transform.basis[1][1]; - push_constant.cam_rotation[6] = p_transform.basis[2][1]; - push_constant.cam_rotation[7] = 0; - push_constant.cam_rotation[8] = p_transform.basis[0][2]; - push_constant.cam_rotation[9] = p_transform.basis[1][2]; - push_constant.cam_rotation[10] = p_transform.basis[2][2]; - push_constant.cam_rotation[11] = 0; + _sdfgi_update_cascades(p_render_buffers); - if (rb->sdfgi) { GI::SDFGIData sdfgi_data; sdfgi_data.grid_size[0] = rb->sdfgi->cascade_size; @@ -1640,9 +1529,172 @@ void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_rough c.to_cell = 1.0 / rb->sdfgi->cascades[i].cell_size; } - RD::get_singleton()->buffer_update(gi.sdfgi_ubo, 0, sizeof(GI::SDFGIData), &sdfgi_data); + RD::get_singleton()->buffer_update(gi.sdfgi_ubo, 0, sizeof(GI::SDFGIData), &sdfgi_data, RD::BARRIER_MASK_COMPUTE); + + /* Update dynamic lights in SDFGI cascades */ + + for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { + SDFGI::Cascade &cascade = rb->sdfgi->cascades[i]; + + SDGIShader::Light lights[SDFGI::MAX_DYNAMIC_LIGHTS]; + uint32_t idx = 0; + for (uint32_t j = 0; j < (uint32_t)render_state.sdfgi_update_data->directional_lights->size(); j++) { + if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { + break; + } + + LightInstance *li = light_instance_owner.getornull(render_state.sdfgi_update_data->directional_lights->get(j)); + ERR_CONTINUE(!li); + + if (storage->light_directional_is_sky_only(li->light)) { + continue; + } + + Vector3 dir = -li->transform.basis.get_axis(Vector3::AXIS_Z); + dir.y *= rb->sdfgi->y_mult; + dir.normalize(); + lights[idx].direction[0] = dir.x; + lights[idx].direction[1] = dir.y; + lights[idx].direction[2] = dir.z; + Color color = storage->light_get_color(li->light); + color = color.to_linear(); + lights[idx].color[0] = color.r; + lights[idx].color[1] = color.g; + lights[idx].color[2] = color.b; + lights[idx].type = RS::LIGHT_DIRECTIONAL; + lights[idx].energy = storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY); + lights[idx].has_shadow = storage->light_has_shadow(li->light); + + idx++; + } + + AABB cascade_aabb; + cascade_aabb.position = Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + cascade.position)) * cascade.cell_size; + cascade_aabb.size = Vector3(1, 1, 1) * rb->sdfgi->cascade_size * cascade.cell_size; + + for (uint32_t j = 0; j < render_state.sdfgi_update_data->positional_light_count; j++) { + if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { + break; + } + + LightInstance *li = light_instance_owner.getornull(render_state.sdfgi_update_data->positional_light_instances[j]); + ERR_CONTINUE(!li); + + uint32_t max_sdfgi_cascade = storage->light_get_max_sdfgi_cascade(li->light); + if (i > max_sdfgi_cascade) { + continue; + } + + if (!cascade_aabb.intersects(li->aabb)) { + continue; + } + + Vector3 dir = -li->transform.basis.get_axis(Vector3::AXIS_Z); + //faster to not do this here + //dir.y *= rb->sdfgi->y_mult; + //dir.normalize(); + lights[idx].direction[0] = dir.x; + lights[idx].direction[1] = dir.y; + lights[idx].direction[2] = dir.z; + Vector3 pos = li->transform.origin; + pos.y *= rb->sdfgi->y_mult; + lights[idx].position[0] = pos.x; + lights[idx].position[1] = pos.y; + lights[idx].position[2] = pos.z; + Color color = storage->light_get_color(li->light); + color = color.to_linear(); + lights[idx].color[0] = color.r; + lights[idx].color[1] = color.g; + lights[idx].color[2] = color.b; + lights[idx].type = storage->light_get_type(li->light); + lights[idx].energy = storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY); + lights[idx].has_shadow = storage->light_has_shadow(li->light); + lights[idx].attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_ATTENUATION); + lights[idx].radius = storage->light_get_param(li->light, RS::LIGHT_PARAM_RANGE); + lights[idx].spot_angle = Math::deg2rad(storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ANGLE)); + lights[idx].spot_attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ATTENUATION); + + idx++; + } + + if (idx > 0) { + RD::get_singleton()->buffer_update(cascade.lights_buffer, 0, idx * sizeof(SDGIShader::Light), lights, RD::BARRIER_MASK_COMPUTE); + } + + rb->sdfgi->cascade_dynamic_light_count[i] = idx; + } + } +} + +void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, const PagedArray<RID> &p_gi_probes) { + RD::get_singleton()->draw_command_begin_label("GI Render"); + + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(rb == nullptr); + Environment *env = environment_owner.getornull(p_environment); + + if (rb->ambient_buffer.is_null() || rb->using_half_size_gi != gi.half_resolution) { + if (rb->ambient_buffer.is_valid()) { + RD::get_singleton()->free(rb->ambient_buffer); + RD::get_singleton()->free(rb->reflection_buffer); + } + + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf.width = rb->width; + tf.height = rb->height; + if (gi.half_resolution) { + tf.width >>= 1; + tf.height >>= 1; + } + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + rb->reflection_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->ambient_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->using_half_size_gi = gi.half_resolution; + + _render_buffers_uniform_set_changed(p_render_buffers); + } + + GI::PushConstant push_constant; + + push_constant.screen_size[0] = rb->width; + push_constant.screen_size[1] = rb->height; + push_constant.z_near = p_projection.get_z_near(); + push_constant.z_far = p_projection.get_z_far(); + push_constant.orthogonal = p_projection.is_orthogonal(); + push_constant.proj_info[0] = -2.0f / (rb->width * p_projection.matrix[0][0]); + push_constant.proj_info[1] = -2.0f / (rb->height * p_projection.matrix[1][1]); + push_constant.proj_info[2] = (1.0f - p_projection.matrix[0][2]) / p_projection.matrix[0][0]; + push_constant.proj_info[3] = (1.0f + p_projection.matrix[1][2]) / p_projection.matrix[1][1]; + push_constant.max_giprobes = MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()); + push_constant.high_quality_vct = gi_probe_quality == RS::GI_PROBE_QUALITY_HIGH; + + bool use_sdfgi = rb->sdfgi != nullptr; + bool use_giprobes = push_constant.max_giprobes > 0; + + if (env) { + push_constant.ao_color[0] = env->ao_color.r; + push_constant.ao_color[1] = env->ao_color.g; + push_constant.ao_color[2] = env->ao_color.b; + } else { + push_constant.ao_color[0] = 0; + push_constant.ao_color[1] = 0; + push_constant.ao_color[2] = 0; } + push_constant.cam_rotation[0] = p_transform.basis[0][0]; + push_constant.cam_rotation[1] = p_transform.basis[1][0]; + push_constant.cam_rotation[2] = p_transform.basis[2][0]; + push_constant.cam_rotation[3] = 0; + push_constant.cam_rotation[4] = p_transform.basis[0][1]; + push_constant.cam_rotation[5] = p_transform.basis[1][1]; + push_constant.cam_rotation[6] = p_transform.basis[2][1]; + push_constant.cam_rotation[7] = 0; + push_constant.cam_rotation[8] = p_transform.basis[0][2]; + push_constant.cam_rotation[9] = p_transform.basis[1][2]; + push_constant.cam_rotation[10] = p_transform.basis[2][2]; + push_constant.cam_rotation[11] = 0; + if (rb->gi_uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(rb->gi_uniform_set)) { Vector<RD::Uniform> uniforms; { @@ -1806,17 +1858,19 @@ void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_rough } else { mode = (use_sdfgi && use_giprobes) ? GI::MODE_COMBINED : (use_sdfgi ? GI::MODE_SDFGI : GI::MODE_GIPROBE); } - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(true); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi.pipelines[mode]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->gi_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(GI::PushConstant)); if (rb->using_half_size_gi) { - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width >> 1, rb->height >> 1, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width >> 1, rb->height >> 1, 1); } else { - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width, rb->height, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width, rb->height, 1); } - RD::get_singleton()->compute_list_end(); + //do barrier later to allow oeverlap + //RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); //no barriers, let other compute, raster and transfer happen at the same time + RD::get_singleton()->draw_command_end_label(); } RID RendererSceneRenderRD::sky_create() { @@ -4015,11 +4069,7 @@ void RendererSceneRenderRD::light_instance_set_shadow_transform(RID p_light_inst LightInstance *light_instance = light_instance_owner.getornull(p_light_instance); ERR_FAIL_COND(!light_instance); - if (storage->light_get_type(light_instance->light) != RS::LIGHT_DIRECTIONAL) { - p_pass = 0; - } - - ERR_FAIL_INDEX(p_pass, 4); + ERR_FAIL_INDEX(p_pass, 6); light_instance->shadow_transform[p_pass].camera = p_projection; light_instance->shadow_transform[p_pass].transform = p_transform; @@ -5150,9 +5200,6 @@ void RendererSceneRenderRD::_free_render_buffer_data(RenderBuffers *rb) { RD::get_singleton()->free(rb->luminance.reduce[i]); } - for (int i = 0; i < rb->luminance.reduce.size(); i++) { - RD::get_singleton()->free(rb->luminance.reduce[i]); - } rb->luminance.reduce.clear(); if (rb->luminance.current.is_valid()) { @@ -5767,7 +5814,7 @@ void RendererSceneRenderRD::_sdfgi_debug_draw(RID p_render_buffers, const Camera RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::DebugPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width, rb->height, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width, rb->height, 1); RD::get_singleton()->compute_list_end(); Size2 rtsize = storage->render_target_get_size(rb->render_target); @@ -6179,7 +6226,7 @@ void RendererSceneRenderRD::_setup_reflections(const PagedArray<RID> &p_reflecti } if (cluster.reflection_count) { - RD::get_singleton()->buffer_update(cluster.reflection_buffer, 0, cluster.reflection_count * sizeof(ReflectionData), cluster.reflections); + RD::get_singleton()->buffer_update(cluster.reflection_buffer, 0, cluster.reflection_count * sizeof(ReflectionData), cluster.reflections, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } } @@ -6571,16 +6618,17 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const r_positional_light_count++; } + //update without barriers if (cluster.omni_light_count) { - RD::get_singleton()->buffer_update(cluster.omni_light_buffer, 0, sizeof(Cluster::LightData) * cluster.omni_light_count, cluster.omni_lights); + RD::get_singleton()->buffer_update(cluster.omni_light_buffer, 0, sizeof(Cluster::LightData) * cluster.omni_light_count, cluster.omni_lights, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } if (cluster.spot_light_count) { - RD::get_singleton()->buffer_update(cluster.spot_light_buffer, 0, sizeof(Cluster::LightData) * cluster.spot_light_count, cluster.spot_lights); + RD::get_singleton()->buffer_update(cluster.spot_light_buffer, 0, sizeof(Cluster::LightData) * cluster.spot_light_count, cluster.spot_lights, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } if (r_directional_light_count) { - RD::get_singleton()->buffer_update(cluster.directional_light_buffer, 0, sizeof(Cluster::DirectionalLightData) * r_directional_light_count, cluster.directional_lights); + RD::get_singleton()->buffer_update(cluster.directional_light_buffer, 0, sizeof(Cluster::DirectionalLightData) * r_directional_light_count, cluster.directional_lights, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } } @@ -6741,7 +6789,7 @@ void RendererSceneRenderRD::_setup_decals(const PagedArray<RID> &p_decals, const } if (cluster.decal_count > 0) { - RD::get_singleton()->buffer_update(cluster.decal_buffer, 0, sizeof(Cluster::DecalData) * cluster.decal_count, cluster.decals); + RD::get_singleton()->buffer_update(cluster.decal_buffer, 0, sizeof(Cluster::DecalData) * cluster.decal_count, cluster.decals, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } } @@ -6786,7 +6834,7 @@ void RendererSceneRenderRD::_allocate_shadow_shrink_stages(RID p_base, int p_bas s.size = base_texture_size; RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R32_SFLOAT; + tf.format = RD::DATA_FORMAT_R16_UNORM; tf.width = base_texture_size; tf.height = base_texture_size; tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT; @@ -6834,6 +6882,8 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e return; } + RENDER_TIMESTAMP(">Volumetric Fog"); + if (env && env->volumetric_fog_enabled && !rb->volumetric_fog) { //required volumetric fog but not existing, create rb->volumetric_fog = memnew(VolumetricFog); @@ -6870,7 +6920,11 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e //update directional shadow + RENDER_TIMESTAMP("Downsample Shadows"); + if (p_use_directional_shadows) { + RD::get_singleton()->draw_command_begin_label("Downsample Directional Shadows"); + if (directional_shadow.shrink_stages.is_empty()) { if (rb->volumetric_fog->uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(rb->volumetric_fog->uniform_set)) { //invalidate uniform set, we will need a new one @@ -6897,6 +6951,7 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e } RD::get_singleton()->compute_list_end(); } + RD::get_singleton()->draw_command_end_label(); } ShadowAtlas *shadow_atlas = shadow_atlas_owner.getornull(p_shadow_atlas); @@ -6904,6 +6959,8 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e if (shadow_atlas) { //shrink shadows that need to be shrunk + RD::get_singleton()->draw_command_begin_label("Downsample Positional Shadows"); + bool force_shrink_shadows = false; if (shadow_atlas->shrink_stages.is_empty()) { @@ -7022,8 +7079,10 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e } } - RD::get_singleton()->compute_list_end(); + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); } + + RD::get_singleton()->draw_command_end_label(); } //update volumetric fog @@ -7273,10 +7332,10 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e push_constant.directional_shadow_pixel_size[1] = 1.0 / dssize.y; */ - RENDER_TIMESTAMP(">Volumetric Fog"); + RD::get_singleton()->draw_command_begin_label("Render Volumetric Fog"); RENDER_TIMESTAMP("Render Fog"); - RD::get_singleton()->buffer_update(volumetric_fog.params_ubo, 0, sizeof(VolumetricFogShader::ParamsUBO), ¶ms); + RD::get_singleton()->buffer_update(volumetric_fog.params_ubo, 0, sizeof(VolumetricFogShader::ParamsUBO), ¶ms, RD::BARRIER_MASK_COMPUTE); RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); @@ -7289,17 +7348,20 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e if (using_sdfgi) { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->sdfgi_uniform_set, 1); } - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth); + RD::get_singleton()->draw_command_end_label(); RD::get_singleton()->compute_list_add_barrier(compute_list); if (use_filter) { + RD::get_singleton()->draw_command_begin_label("Filter Fog"); + RENDER_TIMESTAMP("Filter Fog"); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[VOLUMETRIC_FOG_SHADER_FILTER]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set, 0); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth); RD::get_singleton()->compute_list_end(); //need restart for buffer update @@ -7313,95 +7375,174 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e if (using_sdfgi) { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->sdfgi_uniform_set, 1); } - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth); RD::get_singleton()->compute_list_add_barrier(compute_list); + RD::get_singleton()->draw_command_end_label(); } RENDER_TIMESTAMP("Integrate Fog"); + RD::get_singleton()->draw_command_begin_label("Integrate Fog"); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[VOLUMETRIC_FOG_SHADER_FOG]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set, 0); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, 1); - RD::get_singleton()->compute_list_end(); + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_RASTER); RENDER_TIMESTAMP("<Volumetric Fog"); + RD::get_singleton()->draw_command_end_label(); } -void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold) { - Color clear_color; - if (p_render_buffers.is_valid()) { - RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); - ERR_FAIL_COND(!rb); - clear_color = storage->render_target_get_clear_request_color(rb->render_target); - } else { - clear_color = storage->get_default_clear_color(); +uint32_t RendererSceneRenderRD::_get_render_state_directional_light_count() const { + return render_state.directional_light_count; +} + +bool RendererSceneRenderRD::_needs_post_prepass_render(bool p_use_gi) { + if (render_state.render_buffers.is_valid()) { + RenderBuffers *rb = render_buffers_owner.getornull(render_state.render_buffers); + if (rb->sdfgi != nullptr) { + return true; + } } + return false; +} - //assign render indices to giprobes - for (uint32_t i = 0; i < (uint32_t)p_gi_probes.size(); i++) { - GIProbeInstance *giprobe_inst = gi_probe_instance_owner.getornull(p_gi_probes[i]); - if (giprobe_inst) { - giprobe_inst->render_index = i; +void RendererSceneRenderRD::_post_prepass_render(bool p_use_gi) { + if (render_state.render_buffers.is_valid()) { + if (p_use_gi) { + _sdfgi_update_probes(render_state.render_buffers, render_state.environment); } } +} - const PagedArray<RID> *lights = &p_lights; - const PagedArray<RID> *reflections = &p_reflection_probes; - const PagedArray<RID> *gi_probes = &p_gi_probes; +void RendererSceneRenderRD::_pre_resolve_render(bool p_use_gi) { + if (render_state.render_buffers.is_valid()) { + if (p_use_gi) { + RD::get_singleton()->compute_list_end(); + } + } +} - PagedArray<RID> empty; +void RendererSceneRenderRD::_pre_opaque_render(bool p_use_ssao, bool p_use_gi, RID p_normal_roughness_buffer, RID p_gi_probe_buffer) { + // Render shadows while GI is rendering, due to how barriers are handled, this should happen at the same time - if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_UNSHADED) { - lights = ∅ - reflections = ∅ - gi_probes = ∅ + if (render_state.render_buffers.is_valid() && p_use_gi) { + _sdfgi_store_probes(render_state.render_buffers); } - if (render_buffers_owner.owns(p_render_buffers)) { - RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); - current_cluster_builder = rb->cluster_builder; - } else if (reflection_probe_instance_owner.owns(p_reflection_probe)) { - ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_reflection_probe); - ReflectionAtlas *ra = reflection_atlas_owner.getornull(rpi->atlas); - if (!ra) { - ERR_PRINT("reflection probe has no reflection atlas! Bug?"); - current_cluster_builder = nullptr; - } else { - current_cluster_builder = ra->cluster_builder; + render_state.cube_shadows.clear(); + render_state.shadows.clear(); + render_state.directional_shadows.clear(); + + Plane camera_plane(render_state.cam_transform.origin, -render_state.cam_transform.basis.get_axis(Vector3::AXIS_Z)); + float lod_distance_multiplier = render_state.cam_projection.get_lod_multiplier(); + + { + for (int i = 0; i < render_state.render_shadow_count; i++) { + LightInstance *li = light_instance_owner.getornull(render_state.render_shadows[i].light); + + if (storage->light_get_type(li->light) == RS::LIGHT_DIRECTIONAL) { + render_state.directional_shadows.push_back(i); + } else if (storage->light_get_type(li->light) == RS::LIGHT_OMNI && storage->light_omni_get_shadow_mode(li->light) == RS::LIGHT_OMNI_SHADOW_CUBE) { + render_state.cube_shadows.push_back(i); + } else { + render_state.shadows.push_back(i); + } } - } else { - ERR_PRINT("No cluster builder, bug"); //should never happen, will crash - current_cluster_builder = nullptr; + + //cube shadows are rendered in their own way + for (uint32_t i = 0; i < render_state.cube_shadows.size(); i++) { + _render_shadow_pass(render_state.render_shadows[render_state.cube_shadows[i]].light, render_state.shadow_atlas, render_state.render_shadows[render_state.cube_shadows[i]].pass, render_state.render_shadows[render_state.cube_shadows[i]].instances, camera_plane, lod_distance_multiplier, render_state.screen_lod_threshold, true, true, true); + } + + if (render_state.directional_shadows.size()) { + //open the pass for directional shadows + _update_directional_shadow_atlas(); + RD::get_singleton()->draw_list_begin(directional_shadow.fb, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_CONTINUE); + RD::get_singleton()->draw_list_end(); + } + } + + // Render GI + + bool render_shadows = render_state.directional_shadows.size() || render_state.shadows.size(); + bool render_gi = render_state.render_buffers.is_valid() && p_use_gi; + + if (render_shadows && render_gi) { + RENDER_TIMESTAMP("Render GI + Render Shadows (parallel)"); + } else if (render_shadows) { + RENDER_TIMESTAMP("Render Shadows"); + } else if (render_gi) { + RENDER_TIMESTAMP("Render GI"); } - current_cluster_builder->begin(p_cam_transform, p_cam_projection, !p_reflection_probe.is_valid()); + //prepare shadow rendering + if (render_shadows) { + _render_shadow_begin(); + + //render directional shadows + for (uint32_t i = 0; i < render_state.directional_shadows.size(); i++) { + _render_shadow_pass(render_state.render_shadows[render_state.directional_shadows[i]].light, render_state.shadow_atlas, render_state.render_shadows[render_state.directional_shadows[i]].pass, render_state.render_shadows[render_state.directional_shadows[i]].instances, camera_plane, lod_distance_multiplier, render_state.screen_lod_threshold, false, i == render_state.directional_shadows.size() - 1, false); + } + //render positional shadows + for (uint32_t i = 0; i < render_state.shadows.size(); i++) { + _render_shadow_pass(render_state.render_shadows[render_state.shadows[i]].light, render_state.shadow_atlas, render_state.render_shadows[render_state.shadows[i]].pass, render_state.render_shadows[render_state.shadows[i]].instances, camera_plane, lod_distance_multiplier, render_state.screen_lod_threshold, i == 0, i == render_state.shadows.size() - 1, true); + } + + _render_shadow_process(); + } + + //start GI + if (render_gi) { + _process_gi(render_state.render_buffers, p_normal_roughness_buffer, p_gi_probe_buffer, render_state.environment, render_state.cam_projection, render_state.cam_transform, *render_state.gi_probes); + } + + //Do shadow rendering (in parallel with GI) + if (render_shadows) { + _render_shadow_end(RD::BARRIER_MASK_NO_BARRIER); + } + + if (render_gi) { + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); //use a later barrier + } + + if (render_state.render_buffers.is_valid()) { + if (p_use_ssao) { + _process_ssao(render_state.render_buffers, render_state.environment, p_normal_roughness_buffer, render_state.cam_projection); + } + } + + //full barrier here, we need raster, transfer and compute and it depends from the previous work + RD::get_singleton()->barrier(RD::BARRIER_MASK_ALL, RD::BARRIER_MASK_ALL); + + if (current_cluster_builder) { + current_cluster_builder->begin(render_state.cam_transform, render_state.cam_projection, !render_state.reflection_probe.is_valid()); + } bool using_shadows = true; - if (p_reflection_probe.is_valid()) { - if (!storage->reflection_probe_renders_shadows(reflection_probe_instance_get_probe(p_reflection_probe))) { + if (render_state.reflection_probe.is_valid()) { + if (!storage->reflection_probe_renders_shadows(reflection_probe_instance_get_probe(render_state.reflection_probe))) { using_shadows = false; } } else { //do not render reflections when rendering a reflection probe - _setup_reflections(*reflections, p_cam_transform.affine_inverse(), p_environment); + _setup_reflections(*render_state.reflection_probes, render_state.cam_transform.affine_inverse(), render_state.environment); } uint32_t directional_light_count = 0; uint32_t positional_light_count = 0; - _setup_lights(*lights, p_cam_transform, p_shadow_atlas, using_shadows, directional_light_count, positional_light_count); - _setup_decals(p_decals, p_cam_transform.affine_inverse()); + _setup_lights(*render_state.lights, render_state.cam_transform, render_state.shadow_atlas, using_shadows, directional_light_count, positional_light_count); + _setup_decals(*render_state.decals, render_state.cam_transform.affine_inverse()); - current_cluster_builder->bake_cluster(); + render_state.directional_light_count = directional_light_count; - uint32_t gi_probe_count = 0; - if (p_render_buffers.is_valid()) { - _setup_giprobes(p_render_buffers, p_cam_transform, *gi_probes, gi_probe_count); + if (current_cluster_builder) { + current_cluster_builder->bake_cluster(); } - if (p_render_buffers.is_valid()) { + if (render_state.render_buffers.is_valid()) { bool directional_shadows = false; for (uint32_t i = 0; i < directional_light_count; i++) { if (cluster.directional_lights[i].shadow_enabled) { @@ -7409,10 +7550,103 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform & break; } } - _update_volumetric_fog(p_render_buffers, p_environment, p_cam_projection, p_cam_transform, p_shadow_atlas, directional_light_count, directional_shadows, positional_light_count, gi_probe_count); + _update_volumetric_fog(render_state.render_buffers, render_state.environment, render_state.cam_projection, render_state.cam_transform, render_state.shadow_atlas, directional_light_count, directional_shadows, positional_light_count, render_state.gi_probe_count); + } +} + +void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data) { + //assign render data + { + render_state.render_buffers = p_render_buffers; + render_state.cam_transform = p_cam_transform; + render_state.cam_projection = p_cam_projection; + render_state.cam_ortogonal = p_cam_projection.is_orthogonal(); + render_state.instances = &p_instances; + render_state.lights = &p_lights; + render_state.reflection_probes = &p_reflection_probes; + render_state.gi_probes = &p_gi_probes; + render_state.decals = &p_decals; + render_state.lightmaps = &p_lightmaps; + render_state.environment = p_environment; + render_state.camera_effects = p_camera_effects; + render_state.shadow_atlas = p_shadow_atlas; + render_state.reflection_atlas = p_reflection_atlas; + render_state.reflection_probe = p_reflection_probe; + render_state.reflection_probe_pass = p_reflection_probe_pass; + render_state.screen_lod_threshold = p_screen_lod_threshold; + + render_state.render_shadows = p_render_shadows; + render_state.render_shadow_count = p_render_shadow_count; + render_state.render_sdfgi_regions = p_render_sdfgi_regions; + render_state.render_sdfgi_region_count = p_render_sdfgi_region_count; + render_state.sdfgi_update_data = p_sdfgi_update_data; + } + + PagedArray<RID> empty; + + if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_UNSHADED) { + render_state.lights = ∅ + render_state.reflection_probes = ∅ + render_state.gi_probes = ∅ + } + + //sdfgi first + if (p_render_buffers.is_valid()) { + for (int i = 0; i < render_state.render_sdfgi_region_count; i++) { + _render_sdfgi_region(p_render_buffers, render_state.render_sdfgi_regions[i].region, render_state.render_sdfgi_regions[i].instances); + } + if (render_state.sdfgi_update_data->update_static) { + _render_sdfgi_static_lights(p_render_buffers, render_state.sdfgi_update_data->static_cascade_count, p_sdfgi_update_data->static_cascade_indices, render_state.sdfgi_update_data->static_positional_lights); + } + } + + Color clear_color; + if (p_render_buffers.is_valid()) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(!rb); + clear_color = storage->render_target_get_clear_request_color(rb->render_target); + } else { + clear_color = storage->get_default_clear_color(); + } + + //assign render indices to giprobes + for (uint32_t i = 0; i < (uint32_t)p_gi_probes.size(); i++) { + GIProbeInstance *giprobe_inst = gi_probe_instance_owner.getornull(p_gi_probes[i]); + if (giprobe_inst) { + giprobe_inst->render_index = i; + } + } + + if (render_buffers_owner.owns(render_state.render_buffers)) { + RenderBuffers *rb = render_buffers_owner.getornull(render_state.render_buffers); + current_cluster_builder = rb->cluster_builder; + } else if (reflection_probe_instance_owner.owns(render_state.reflection_probe)) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(render_state.reflection_probe); + ReflectionAtlas *ra = reflection_atlas_owner.getornull(rpi->atlas); + if (!ra) { + ERR_PRINT("reflection probe has no reflection atlas! Bug?"); + current_cluster_builder = nullptr; + } else { + current_cluster_builder = ra->cluster_builder; + } + } else { + ERR_PRINT("No cluster builder, bug"); //should never happen, will crash + current_cluster_builder = nullptr; + } + + if (p_render_buffers.is_valid()) { + _pre_process_gi(p_render_buffers, p_cam_transform); + } + + render_state.gi_probe_count = 0; + if (render_state.render_buffers.is_valid()) { + _setup_giprobes(render_state.render_buffers, render_state.cam_transform, *render_state.gi_probes, render_state.gi_probe_count); + _sdfgi_update_light(render_state.render_buffers, render_state.environment); } - _render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_ortogonal, p_instances, directional_light_count, *gi_probes, p_lightmaps, p_environment, current_cluster_builder->get_cluster_buffer(), current_cluster_builder->get_cluster_size(), current_cluster_builder->get_max_cluster_elements(), p_camera_effects, p_shadow_atlas, p_reflection_atlas, p_reflection_probe, p_reflection_probe_pass, clear_color, p_screen_lod_threshold); + render_state.depth_prepass_used = false; + //calls _pre_opaque_render between depth pre-pass and opaque pass + _render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_ortogonal, p_instances, *render_state.gi_probes, p_lightmaps, p_environment, current_cluster_builder->get_cluster_buffer(), current_cluster_builder->get_cluster_size(), current_cluster_builder->get_max_cluster_elements(), p_camera_effects, p_shadow_atlas, p_reflection_atlas, p_reflection_probe, p_reflection_probe_pass, clear_color, p_screen_lod_threshold); if (p_render_buffers.is_valid()) { if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_OMNI_LIGHTS || debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_SPOT_LIGHTS || debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_DECALS || debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_REFLECTION_PROBES) { @@ -7446,7 +7680,7 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform & } } -void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<GeometryInstance *> &p_instances, const Plane &p_camera_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold) { +void RendererSceneRenderRD::_render_shadow_pass(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<GeometryInstance *> &p_instances, const Plane &p_camera_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold, bool p_open_pass, bool p_close_pass, bool p_clear_region) { LightInstance *light_instance = light_instance_owner.getornull(p_light); ERR_FAIL_COND(!light_instance); @@ -7469,13 +7703,7 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p CameraMatrix light_projection; Transform light_transform; - bool clear_region = true; - bool begin_texture = true; - bool end_texture = true; - if (storage->light_get_type(light_instance->light) == RS::LIGHT_DIRECTIONAL) { - _update_directional_shadow_atlas(); - //set pssm stuff if (light_instance->last_scene_shadow_pass != scene_pass) { light_instance->directional_rect = _get_directional_shadow_rect(directional_shadow.size, directional_shadow.light_count, directional_shadow.current_light); @@ -7492,7 +7720,6 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p atlas_rect.size.width = light_instance->directional_rect.size.x; atlas_rect.size.height = light_instance->directional_rect.size.y; - int pass_count = 1; if (storage->light_directional_get_shadow_mode(light_instance->light) == RS::LIGHT_DIRECTIONAL_SHADOW_PARALLEL_4_SPLITS) { atlas_rect.size.width /= 2; atlas_rect.size.height /= 2; @@ -7505,7 +7732,6 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p atlas_rect.position.x += atlas_rect.size.width; atlas_rect.position.y += atlas_rect.size.height; } - pass_count = 4; } else if (storage->light_directional_get_shadow_mode(light_instance->light) == RS::LIGHT_DIRECTIONAL_SHADOW_PARALLEL_2_SPLITS) { atlas_rect.size.height /= 2; @@ -7513,7 +7739,6 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p } else { atlas_rect.position.y += atlas_rect.size.height; } - pass_count = 2; } light_instance->shadow_transform[p_pass].atlas_rect = atlas_rect; @@ -7527,10 +7752,6 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p render_texture = RID(); flip_y = true; - clear_region = false; - begin_texture = (directional_shadow.current_light == 1) && (p_pass == 0); //light is 1-index because it was incremented above - end_texture = (directional_shadow.current_light == directional_shadow.light_count) && (p_pass == pass_count - 1); - } else { //set from shadow atlas @@ -7568,14 +7789,17 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p render_fb = cubemap->side_fb[p_pass]; render_texture = cubemap->cubemap; - light_projection = light_instance->shadow_transform[0].camera; - light_transform = light_instance->shadow_transform[0].transform; + light_projection = light_instance->shadow_transform[p_pass].camera; + light_transform = light_instance->shadow_transform[p_pass].transform; render_cubemap = true; finalize_cubemap = p_pass == 5; atlas_fb = shadow_atlas->fb; atlas_size = shadow_atlas->size; - clear_region = false; + + if (p_pass == 0) { + _render_shadow_begin(); + } } else { light_projection = light_instance->shadow_transform[0].camera; @@ -7602,8 +7826,10 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p if (render_cubemap) { //rendering to cubemap - _render_shadow(render_fb, p_instances, light_projection, light_transform, zfar, 0, 0, false, false, use_pancake, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold); + _render_shadow_append(render_fb, p_instances, light_projection, light_transform, zfar, 0, 0, false, false, use_pancake, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold, Rect2(), false, true, true, true); if (finalize_cubemap) { + _render_shadow_process(); + _render_shadow_end(); //reblit Rect2 atlas_rect_norm = atlas_rect; atlas_rect_norm.position.x /= float(atlas_size); @@ -7614,10 +7840,14 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p storage->get_effects()->copy_cubemap_to_dp(render_texture, atlas_fb, atlas_rect_norm, light_projection.get_z_near(), light_projection.get_z_far(), false); atlas_rect_norm.position.y += atlas_rect_norm.size.height; storage->get_effects()->copy_cubemap_to_dp(render_texture, atlas_fb, atlas_rect_norm, light_projection.get_z_near(), light_projection.get_z_far(), true); + + //restore transform so it can be properly used + light_instance_set_shadow_transform(p_light, CameraMatrix(), light_instance->transform, zfar, 0, 0, 0); } + } else { //render shadow - _render_shadow(render_fb, p_instances, light_projection, light_transform, zfar, 0, 0, using_dual_paraboloid, using_dual_paraboloid_flip, use_pancake, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold, atlas_rect, flip_y, clear_region, begin_texture, end_texture); + _render_shadow_append(render_fb, p_instances, light_projection, light_transform, zfar, 0, 0, using_dual_paraboloid, using_dual_paraboloid_flip, use_pancake, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold, atlas_rect, flip_y, p_clear_region, p_open_pass, p_close_pass); } } @@ -7625,7 +7855,7 @@ void RendererSceneRenderRD::render_material(const Transform &p_cam_transform, co _render_material(p_cam_transform, p_cam_projection, p_cam_ortogonal, p_instances, p_framebuffer, p_region); } -void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, const PagedArray<GeometryInstance *> &p_instances) { +void RendererSceneRenderRD::_render_sdfgi_region(RID p_render_buffers, int p_region, const PagedArray<GeometryInstance *> &p_instances) { //print_line("rendering region " + itos(p_region)); RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(!rb); @@ -7651,6 +7881,8 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con _render_sdfgi(p_render_buffers, from, size, bounds, p_instances, rb->sdfgi->render_albedo, rb->sdfgi->render_emission, rb->sdfgi->render_emission_aniso, rb->sdfgi->render_geom_facing); if (cascade_next != cascade) { + RD::get_singleton()->draw_command_begin_label("SDFGI Pre-Process Cascade"); + RENDER_TIMESTAMP(">SDFGI Update SDF"); //done rendering! must update SDF //clear dispatch indirect data @@ -7700,7 +7932,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con groups.z = rb->sdfgi->cascade_size - ABS(dirty.z); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, groups.x, groups.y, groups.z, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, groups.x, groups.y, groups.z); //no barrier, continue together @@ -7742,7 +7974,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].integrate_uniform_set, 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdfgi_shader.integrate_default_sky_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -7750,7 +7982,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].integrate_uniform_set, 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdfgi_shader.integrate_default_sky_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -7766,7 +7998,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].integrate_uniform_set, 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdfgi_shader.integrate_default_sky_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1); } } @@ -7790,7 +8022,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE_HALF]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->sdf_initialize_half_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size); RD::get_singleton()->compute_list_add_barrier(compute_list); //must start with regular jumpflood @@ -7810,7 +8042,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con push_constant.step_size = s; RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_half_uniform_set[jf_us], 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size); RD::get_singleton()->compute_list_add_barrier(compute_list); jf_us = jf_us == 0 ? 1 : 0; @@ -7828,7 +8060,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con push_constant.step_size = s; RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_half_uniform_set[jf_us], 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size, optimized_jf_group_size, optimized_jf_group_size, optimized_jf_group_size); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size); RD::get_singleton()->compute_list_add_barrier(compute_list); jf_us = jf_us == 0 ? 1 : 0; } @@ -7840,7 +8072,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_UPSCALE]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->sdf_upscale_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); RD::get_singleton()->compute_list_add_barrier(compute_list); //run one pass of fullsize jumpflood to fix up half size arctifacts @@ -7850,7 +8082,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_OPTIMIZED]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_uniform_set[rb->sdfgi->upscale_jfa_uniform_set_index], 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, optimized_jf_group_size, optimized_jf_group_size, optimized_jf_group_size); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); RD::get_singleton()->compute_list_add_barrier(compute_list); } else { @@ -7860,7 +8092,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->sdf_initialize_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -7877,7 +8109,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con push_constant.step_size = s; RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_uniform_set[jf_us], 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); RD::get_singleton()->compute_list_add_barrier(compute_list); jf_us = jf_us == 0 ? 1 : 0; @@ -7895,7 +8127,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con push_constant.step_size = s; RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_uniform_set[jf_us], 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, optimized_jf_group_size, optimized_jf_group_size, optimized_jf_group_size); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); RD::get_singleton()->compute_list_add_barrier(compute_list); jf_us = jf_us == 0 ? 1 : 0; } @@ -7942,7 +8174,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_STORE]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].sdf_store_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); RD::get_singleton()->compute_list_end(); @@ -7979,6 +8211,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con #endif RENDER_TIMESTAMP("<SDFGI Update SDF"); + RD::get_singleton()->draw_command_end_label(); } } @@ -7999,11 +8232,13 @@ void RendererSceneRenderRD::render_particle_collider_heightfield(RID p_collider, _render_particle_collider_heightfield(fb, cam_xform, cm, p_instances); } -void RendererSceneRenderRD::render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_light_cull_result) { +void RendererSceneRenderRD::_render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_light_cull_result) { RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(!rb); ERR_FAIL_COND(!rb->sdfgi); + RD::get_singleton()->draw_command_begin_label("SDFGI Render Static Lighs"); + _sdfgi_update_cascades(p_render_buffers); //need cascades updated for this SDGIShader::Light lights[SDFGI::MAX_STATIC_LIGHTS]; @@ -8112,6 +8347,8 @@ void RendererSceneRenderRD::render_sdfgi_static_lights(RID p_render_buffers, uin } RD::get_singleton()->compute_list_end(); + + RD::get_singleton()->draw_command_end_label(); } bool RendererSceneRenderRD::free(RID p_rid) { @@ -8797,7 +9034,7 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { cluster.max_reflections = max_cluster_elements; cluster.reflections = memnew_arr(Cluster::ReflectionData, cluster.max_reflections); - cluster.reflection_sort = memnew_arr(Cluster::InstanceSort<ReflectionProbeInstance>, cluster.max_decals); + cluster.reflection_sort = memnew_arr(Cluster::InstanceSort<ReflectionProbeInstance>, cluster.max_reflections); cluster.reflection_buffer = RD::get_singleton()->storage_buffer_create(sizeof(Cluster::ReflectionData) * cluster.max_reflections); } diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.h b/servers/rendering/renderer_rd/renderer_scene_render_rd.h index af8cdb9b71..707c858ed9 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.h +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.h @@ -109,8 +109,13 @@ protected: void _setup_reflections(const PagedArray<RID> &p_reflections, const Transform &p_camera_inverse_transform, RID p_environment); void _setup_giprobes(RID p_render_buffers, const Transform &p_transform, const PagedArray<RID> &p_gi_probes, uint32_t &r_gi_probes_used); - virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_cluster_max_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_color, float p_screen_lod_threshold) = 0; - virtual void _render_shadow(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true) = 0; + virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_cluster_max_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_color, float p_screen_lod_threshold) = 0; + + virtual void _render_shadow_begin() = 0; + virtual void _render_shadow_append(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true) = 0; + virtual void _render_shadow_process() = 0; + virtual void _render_shadow_end(uint32_t p_barrier = RD::BARRIER_MASK_ALL) = 0; + virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; virtual void _render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<GeometryInstance *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) = 0; @@ -132,8 +137,16 @@ protected: void _setup_sky(RID p_environment, RID p_render_buffers, const CameraMatrix &p_projection, const Transform &p_transform, const Size2i p_screen_size); void _update_sky(RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform); void _draw_sky(bool p_can_continue_color, bool p_can_continue_depth, RID p_fb, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform); + void _pre_process_gi(RID p_render_buffers, const Transform &p_transform); void _process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, const PagedArray<RID> &p_gi_probes); + bool _needs_post_prepass_render(bool p_use_gi); + void _post_prepass_render(bool p_use_gi); + void _pre_resolve_render(bool p_use_gi); + + void _pre_opaque_render(bool p_use_ssao, bool p_use_gi, RID p_normal_roughness_buffer, RID p_gi_probe_buffer); + uint32_t _get_render_state_directional_light_count() const; + // needed for a single argument calls (material and uv2) PagedArrayPool<GeometryInstance *> cull_argument_pool; PagedArray<GeometryInstance *> cull_argument; //need this to exist @@ -651,7 +664,7 @@ private: RS::LightType light_type = RS::LIGHT_DIRECTIONAL; - ShadowTransform shadow_transform[4]; + ShadowTransform shadow_transform[6]; AABB aabb; RID self; @@ -1031,8 +1044,14 @@ private: float y_mult = 1.0; uint32_t render_pass = 0; + + int32_t cascade_dynamic_light_count[SDFGI::MAX_CASCADES]; //used dynamically }; + void _sdfgi_update_light(RID p_render_buffers, RID p_environment); + void _sdfgi_update_probes(RID p_render_buffers, RID p_environment); + void _sdfgi_store_probes(RID p_render_buffers); + RS::EnvironmentSDFGIRayCount sdfgi_ray_count = RS::ENV_SDFGI_RAY_COUNT_16; RS::EnvironmentSDFGIFramesToConverge sdfgi_frames_to_converge = RS::ENV_SDFGI_CONVERGE_IN_10_FRAMES; RS::EnvironmentSDFGIFramesToUpdateLight sdfgi_frames_to_update_light = RS::ENV_SDFGI_UPDATE_LIGHT_IN_4_FRAMES; @@ -1460,6 +1479,41 @@ private: } cluster; + struct RenderState { + RID render_buffers; + Transform cam_transform; + CameraMatrix cam_projection; + bool cam_ortogonal = false; + const PagedArray<GeometryInstance *> *instances = nullptr; + const PagedArray<RID> *lights = nullptr; + const PagedArray<RID> *reflection_probes = nullptr; + const PagedArray<RID> *gi_probes = nullptr; + const PagedArray<RID> *decals = nullptr; + const PagedArray<RID> *lightmaps = nullptr; + RID environment; + RID camera_effects; + RID shadow_atlas; + RID reflection_atlas; + RID reflection_probe; + int reflection_probe_pass = 0; + float screen_lod_threshold = 0.0; + + const RenderShadowData *render_shadows = nullptr; + int render_shadow_count = 0; + const RenderSDFGIData *render_sdfgi_regions = nullptr; + int render_sdfgi_region_count = 0; + const RenderSDFGIUpdateData *sdfgi_update_data = nullptr; + + uint32_t directional_light_count = 0; + uint32_t gi_probe_count = 0; + + LocalVector<int> cube_shadows; + LocalVector<int> shadows; + LocalVector<int> directional_shadows; + + bool depth_prepass_used; + } render_state; + struct VolumetricFog { uint32_t width = 0; uint32_t height = 0; @@ -1547,6 +1601,10 @@ private: uint32_t max_cluster_elements = 512; bool low_end = false; + void _render_shadow_pass(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<GeometryInstance *> &p_instances, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0, float p_screen_lod_threshold = 0.0, bool p_open_pass = true, bool p_close_pass = true, bool p_clear_region = true); + void _render_sdfgi_region(RID p_render_buffers, int p_region, const PagedArray<GeometryInstance *> &p_instances); + void _render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_light_cull_result); + public: virtual Transform geometry_instance_get_transform(GeometryInstance *p_instance) = 0; virtual AABB geometry_instance_get_aabb(GeometryInstance *p_instance) = 0; @@ -1594,7 +1652,6 @@ public: virtual int sdfgi_get_pending_region_count(RID p_render_buffers) const; virtual AABB sdfgi_get_pending_region_bounds(RID p_render_buffers, int p_region) const; virtual uint32_t sdfgi_get_pending_region_cascade(RID p_render_buffers, int p_region) const; - virtual void sdfgi_update_probes(RID p_render_buffers, RID p_environment, const Vector<RID> &p_directional_lights, const RID *p_positional_light_instances, uint32_t p_positional_light_count); RID sdfgi_get_ubo() const { return gi.sdfgi_ubo; } /* SKY API */ @@ -1997,15 +2054,10 @@ public: float render_buffers_get_volumetric_fog_end(RID p_render_buffers); float render_buffers_get_volumetric_fog_detail_spread(RID p_render_buffers); - void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold); - - void render_shadow(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<GeometryInstance *> &p_instances, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0, float p_screen_lod_threshold = 0.0); + void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data = nullptr); void render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region); - void render_sdfgi(RID p_render_buffers, int p_region, const PagedArray<GeometryInstance *> &p_instances); - void render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_light_cull_result); - void render_particle_collider_heightfield(RID p_collider, const Transform &p_transform, const PagedArray<GeometryInstance *> &p_instances); virtual void set_scene_pass(uint64_t p_pass) { diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.cpp b/servers/rendering/renderer_rd/renderer_storage_rd.cpp index 6d4343e183..f9c22dd4bf 100644 --- a/servers/rendering/renderer_rd/renderer_storage_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_storage_rd.cpp @@ -3098,7 +3098,7 @@ void RendererStorageRD::update_mesh_instances() { RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SkeletonShader::PushConstant)); //dispatch without barrier, so all is done at the same time - RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.vertex_count, 1, 1, 64, 1, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.vertex_count, 1, 1); } mi->dirty = false; @@ -4555,7 +4555,7 @@ void RendererStorageRD::_particles_process(Particles *p_particles, float p_delta RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ParticlesShader::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_particles->amount, 1, 1, 64, 1, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_particles->amount, 1, 1); RD::get_singleton()->compute_list_end(); } @@ -4609,7 +4609,7 @@ void RendererStorageRD::particles_set_view_axis(RID p_particles, const Vector3 & RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_sort_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©_push_constant, sizeof(ParticlesShader::CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1, 64, 1, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1); RD::get_singleton()->compute_list_end(); @@ -4621,7 +4621,7 @@ void RendererStorageRD::particles_set_view_axis(RID p_particles, const Vector3 & RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_sort_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©_push_constant, sizeof(ParticlesShader::CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1, 64, 1, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1); RD::get_singleton()->compute_list_end(); } @@ -4728,7 +4728,7 @@ void RendererStorageRD::update_particles() { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_copy_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©_push_constant, sizeof(ParticlesShader::CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1, 64, 1, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1); RD::get_singleton()->compute_list_end(); } @@ -6980,7 +6980,7 @@ void RendererStorageRD::render_target_sdf_process(RID p_render_target) { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rt->sdf_buffer_process_uniform_sets[1], 0); //fill [0] RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RenderTargetSDF::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1); /* Process */ @@ -6996,7 +6996,7 @@ void RendererStorageRD::render_target_sdf_process(RID p_render_target) { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rt->sdf_buffer_process_uniform_sets[swap ? 1 : 0], 0); push_constant.stride = stride; RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RenderTargetSDF::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1); stride /= 2; swap = !swap; RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -7007,7 +7007,7 @@ void RendererStorageRD::render_target_sdf_process(RID p_render_target) { RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, rt_sdf.pipelines[shrink ? RenderTargetSDF::SHADER_STORE_SHRINK : RenderTargetSDF::SHADER_STORE]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rt->sdf_buffer_process_uniform_sets[swap ? 1 : 0], 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RenderTargetSDF::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1); RD::get_singleton()->compute_list_end(); } diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.h b/servers/rendering/renderer_rd/renderer_storage_rd.h index aa7195232a..48d43568c4 100644 --- a/servers/rendering/renderer_rd/renderer_storage_rd.h +++ b/servers/rendering/renderer_rd/renderer_storage_rd.h @@ -1482,13 +1482,7 @@ public: return s->lod_count > 0; } - _FORCE_INLINE_ RID mesh_surface_get_index_array(void *p_surface) const { - Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); - - return s->index_array; - } - - _FORCE_INLINE_ RID mesh_surface_get_index_array_with_lod(void *p_surface, float p_model_scale, float p_distance_threshold, float p_lod_threshold) const { + _FORCE_INLINE_ uint32_t mesh_surface_get_lod(void *p_surface, float p_model_scale, float p_distance_threshold, float p_lod_threshold) const { Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); int32_t current_lod = -1; @@ -1500,9 +1494,19 @@ public: current_lod = i; } if (current_lod == -1) { + return 0; + } else { + return current_lod + 1; + } + } + + _FORCE_INLINE_ RID mesh_surface_get_index_array(void *p_surface, uint32_t p_lod) const { + Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); + + if (p_lod == 0) { return s->index_array; } else { - return s->lods[current_lod].index_array; + return s->lods[p_lod - 1].index_array; } } diff --git a/servers/rendering/renderer_rd/shader_rd.cpp b/servers/rendering/renderer_rd/shader_rd.cpp index 2ae22a8a38..e4a39ff813 100644 --- a/servers/rendering/renderer_rd/shader_rd.cpp +++ b/servers/rendering/renderer_rd/shader_rd.cpp @@ -301,6 +301,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) { builder.append(compute_codev.get_data()); // version info (if exists) builder.append("\n"); //make sure defines begin at newline + builder.append(base_compute_defines.get_data()); builder.append(general_defines.get_data()); builder.append(variant_defines[p_variant].get_data()); @@ -401,7 +402,6 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio builder.append(fragment_codev.get_data()); // version info (if exists) builder.append("\n"); //make sure defines begin at newline - builder.append(general_defines.get_data()); builder.append(variant_defines[i].get_data()); for (int j = 0; j < version->custom_defines.size(); j++) { @@ -440,6 +440,7 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio builder.append(compute_codev.get_data()); // version info (if exists) builder.append("\n"); //make sure defines begin at newline + builder.append(base_compute_defines.get_data()); builder.append(general_defines.get_data()); builder.append(variant_defines[i].get_data()); @@ -596,6 +597,22 @@ bool ShaderRD::is_variant_enabled(int p_variant) const { return variants_enabled[p_variant]; } +ShaderRD::ShaderRD() { + // Do not feel forced to use this, in most cases it makes little to no difference. + bool use_32_threads = false; + if (RD::get_singleton()->get_device_vendor_name() == "NVIDIA") { + use_32_threads = true; + } + String base_compute_define_text; + if (use_32_threads) { + base_compute_define_text = "\n#define NATIVE_LOCAL_GROUP_SIZE 32\n#define NATIVE_LOCAL_SIZE_2D_X 8\n#define NATIVE_LOCAL_SIZE_2D_Y 4\n"; + } else { + base_compute_define_text = "\n#define NATIVE_LOCAL_GROUP_SIZE 64\n#define NATIVE_LOCAL_SIZE_2D_X 8\n#define NATIVE_LOCAL_SIZE_2D_Y 8\n"; + } + + base_compute_defines = base_compute_define_text.ascii(); +} + void ShaderRD::initialize(const Vector<String> &p_variant_defines, const String &p_general_defines) { ERR_FAIL_COND(variant_defines.size()); ERR_FAIL_COND(p_variant_defines.size() == 0); diff --git a/servers/rendering/renderer_rd/shader_rd.h b/servers/rendering/renderer_rd/shader_rd.h index a3474c6f93..e0f4dcf2d0 100644 --- a/servers/rendering/renderer_rd/shader_rd.h +++ b/servers/rendering/renderer_rd/shader_rd.h @@ -99,8 +99,10 @@ class ShaderRD { const char *name; + CharString base_compute_defines; + protected: - ShaderRD() {} + ShaderRD(); void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name); public: diff --git a/servers/rendering/renderer_rd/shaders/scene_forward.glsl b/servers/rendering/renderer_rd/shaders/scene_forward.glsl index ea203c8abe..83e2ae9cbd 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward.glsl @@ -89,12 +89,6 @@ MATERIAL_UNIFORMS } material; #endif -/* clang-format off */ - -VERTEX_SHADER_GLOBALS - -/* clang-format on */ - invariant gl_Position; #ifdef MODE_DUAL_PARABOLOID @@ -103,28 +97,43 @@ layout(location = 8) out float dp_clip; #endif +layout(location = 9) out flat uint instance_index; + +/* clang-format off */ + +VERTEX_SHADER_GLOBALS + +/* clang-format on */ + void main() { vec4 instance_custom = vec4(0.0); #if defined(COLOR_USED) color_interp = color_attrib; #endif - mat4 world_matrix = draw_call.transform; + instance_index = draw_call.instance_index; + + bool is_multimesh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH); + if (!is_multimesh) { + instance_index += gl_InstanceIndex; + } + + mat4 world_matrix = instances.data[instance_index].transform; mat3 world_normal_matrix; - if (bool(draw_call.flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) { world_normal_matrix = inverse(mat3(world_matrix)); } else { world_normal_matrix = mat3(world_matrix); } - if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH)) { + if (is_multimesh) { //multimesh, instances are for it - uint offset = (draw_call.flags >> INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT) & INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK; + uint offset = (instances.data[instance_index].flags >> INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT) & INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK; offset *= gl_InstanceIndex; mat4 matrix; - if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) { matrix = mat4(transforms.data[offset + 0], transforms.data[offset + 1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); offset += 2; } else { @@ -132,14 +141,14 @@ void main() { offset += 3; } - if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_HAS_COLOR)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_HAS_COLOR)) { #ifdef COLOR_USED color_interp *= transforms.data[offset]; #endif offset += 1; } - if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA)) { instance_custom = transforms.data[offset]; } @@ -161,7 +170,7 @@ void main() { #endif #if 0 - if (bool(draw_call.flags & INSTANCE_FLAGS_SKELETON)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_SKELETON)) { //multimesh, instances are for it uvec2 bones_01 = uvec2(bone_attrib.x & 0xFFFF, bone_attrib.x >> 16) * 3; @@ -304,7 +313,8 @@ VERTEX_SHADER_CODE #endif #ifdef MODE_RENDER_MATERIAL if (scene_data.material_uv2_mode) { - gl_Position.xy = (uv2_attrib.xy + draw_call.lightmap_uv_scale.xy) * 2.0 - 1.0; + vec2 uv_offset = unpackHalf2x16(draw_call.uv_offset); + gl_Position.xy = (uv2_attrib.xy + uv_offset) * 2.0 - 1.0; gl_Position.z = 0.00001; gl_Position.w = 1.0; } @@ -350,9 +360,11 @@ layout(location = 8) in float dp_clip; #endif +layout(location = 9) in flat uint instance_index; + //defines to keep compatibility with vertex -#define world_matrix draw_call.transform +#define world_matrix instances.data[instance_index].transform #define projection_matrix scene_data.projection_matrix #if defined(ENABLE_SSS) && defined(ENABLE_TRANSMITTANCE) @@ -1770,7 +1782,7 @@ vec4 fog_process(vec3 vertex) { } } - float fog_amount = 1.0 - exp(vertex.z * scene_data.fog_density); + float fog_amount = 1.0 - exp(min(0.0, vertex.z * scene_data.fog_density)); if (abs(scene_data.fog_height_density) > 0.001) { float y = (scene_data.camera_matrix * vec4(vertex, 1.0)).y; @@ -2083,7 +2095,7 @@ FRAGMENT_SHADER_CODE #endif uint decal_index = 32 * i + bit; - if (!bool(decals.data[decal_index].mask & draw_call.layer_mask)) { + if (!bool(decals.data[decal_index].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -2210,8 +2222,8 @@ FRAGMENT_SHADER_CODE #ifdef USE_LIGHTMAP //lightmap - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE)) { //has lightmap capture - uint index = draw_call.gi_offset; + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE)) { //has lightmap capture + uint index = instances.data[instance_index].gi_offset; vec3 wnormal = mat3(scene_data.camera_matrix) * normal; const float c1 = 0.429043; @@ -2230,12 +2242,12 @@ FRAGMENT_SHADER_CODE 2.0 * c2 * lightmap_captures.data[index].sh[1].rgb * wnormal.y + 2.0 * c2 * lightmap_captures.data[index].sh[2].rgb * wnormal.z); - } else if (bool(draw_call.flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap - bool uses_sh = bool(draw_call.flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP); - uint ofs = draw_call.gi_offset & 0xFFFF; + } else if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap + bool uses_sh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP); + uint ofs = instances.data[instance_index].gi_offset & 0xFFFF; vec3 uvw; - uvw.xy = uv2 * draw_call.lightmap_uv_scale.zw + draw_call.lightmap_uv_scale.xy; - uvw.z = float((draw_call.gi_offset >> 16) & 0xFFFF); + uvw.xy = uv2 * instances.data[instance_index].lightmap_uv_scale.zw + instances.data[instance_index].lightmap_uv_scale.xy; + uvw.z = float((instances.data[instance_index].gi_offset >> 16) & 0xFFFF); if (uses_sh) { uvw.z *= 4.0; //SH textures use 4 times more data @@ -2244,7 +2256,7 @@ FRAGMENT_SHADER_CODE vec3 lm_light_l1_0 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 2.0), 0.0).rgb; vec3 lm_light_l1p1 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 3.0), 0.0).rgb; - uint idx = draw_call.gi_offset >> 20; + uint idx = instances.data[instance_index].gi_offset >> 20; vec3 n = normalize(lightmaps.data[idx].normal_xform * normal); ambient_light += lm_light_l0 * 0.282095f; @@ -2264,7 +2276,7 @@ FRAGMENT_SHADER_CODE } #elif defined(USE_FORWARD_GI) - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture //make vertex orientation the world one, but still align to camera vec3 cam_pos = mat3(scene_data.camera_matrix) * vertex; @@ -2336,9 +2348,9 @@ FRAGMENT_SHADER_CODE } } - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes - uint index1 = draw_call.gi_offset & 0xFFFF; + uint index1 = instances.data[instance_index].gi_offset & 0xFFFF; vec3 ref_vec = normalize(reflect(normalize(vertex), normal)); //find arbitrary tangent and bitangent, then build a matrix vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); @@ -2350,7 +2362,7 @@ FRAGMENT_SHADER_CODE vec4 spec_accum = vec4(0.0); gi_probe_compute(index1, vertex, normal, ref_vec, normal_mat, roughness * roughness, ambient_light, specular_light, spec_accum, amb_accum); - uint index2 = draw_call.gi_offset >> 16; + uint index2 = instances.data[instance_index].gi_offset >> 16; if (index2 != 0xFFFF) { gi_probe_compute(index2, vertex, normal, ref_vec, normal_mat, roughness * roughness, ambient_light, specular_light, spec_accum, amb_accum); @@ -2369,7 +2381,7 @@ FRAGMENT_SHADER_CODE } #elif !defined(LOW_END_MODE) - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers vec2 coord; @@ -2448,7 +2460,7 @@ FRAGMENT_SHADER_CODE #endif uint reflection_index = 32 * i + bit; - if (!bool(reflections.data[reflection_index].mask & draw_call.layer_mask)) { + if (!bool(reflections.data[reflection_index].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -2519,7 +2531,7 @@ FRAGMENT_SHADER_CODE break; } - if (!bool(directional_lights.data[i].mask & draw_call.layer_mask)) { + if (!bool(directional_lights.data[i].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -2838,7 +2850,7 @@ FRAGMENT_SHADER_CODE break; } - if (!bool(directional_lights.data[i].mask & draw_call.layer_mask)) { + if (!bool(directional_lights.data[i].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -2968,7 +2980,7 @@ FRAGMENT_SHADER_CODE #endif uint light_index = 32 * i + bit; - if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) { + if (!bool(omni_lights.data[light_index].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -3041,7 +3053,7 @@ FRAGMENT_SHADER_CODE uint light_index = 32 * i + bit; - if (!bool(spot_lights.data[light_index].mask & draw_call.layer_mask)) { + if (!bool(spot_lights.data[light_index].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -3214,9 +3226,9 @@ FRAGMENT_SHADER_CODE normal_roughness_output_buffer = vec4(normal * 0.5 + 0.5, roughness); #ifdef MODE_RENDER_GIPROBE - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes - uint index1 = draw_call.gi_offset & 0xFFFF; - uint index2 = draw_call.gi_offset >> 16; + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes + uint index1 = instances.data[instance_index].gi_offset & 0xFFFF; + uint index2 = instances.data[instance_index].gi_offset >> 16; giprobe_buffer.x = index1 & 0xFF; giprobe_buffer.y = index2 & 0xFF; } else { @@ -3275,6 +3287,7 @@ FRAGMENT_SHADER_CODE // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky. frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); + ; #endif //MODE_MULTIPLE_RENDER_TARGETS diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl index e9b79e1560..d78890fa9e 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl @@ -21,12 +21,10 @@ #endif layout(push_constant, binding = 0, std430) uniform DrawCall { - mat4 transform; - uint flags; - uint instance_uniforms_ofs; //base offset in global buffer for instance variables - uint gi_offset; //GI information when using lightmapping (VCT or lightmap index) - uint layer_mask; - vec4 lightmap_uv_scale; + uint instance_index; + uint uv_offset; + uint pad0; + uint pad1; } draw_call; @@ -45,96 +43,13 @@ draw_call; #define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_REPEAT 10 #define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_REPEAT 11 -layout(set = 0, binding = 1) uniform sampler material_samplers[12]; - -layout(set = 0, binding = 2) uniform sampler shadow_sampler; - #define SDFGI_MAX_CASCADES 8 -layout(set = 0, binding = 3, std140) uniform SceneData { - mat4 projection_matrix; - mat4 inv_projection_matrix; - - mat4 camera_matrix; - mat4 inv_camera_matrix; - - vec2 viewport_size; - vec2 screen_pixel_size; - - uint cluster_shift; - uint cluster_width; - uint cluster_type_size; - uint max_cluster_element_count_div_32; - - //use vec4s because std140 doesnt play nice with vec2s, z and w are wasted - vec4 directional_penumbra_shadow_kernel[32]; - vec4 directional_soft_shadow_kernel[32]; - vec4 penumbra_shadow_kernel[32]; - vec4 soft_shadow_kernel[32]; - - uint directional_penumbra_shadow_samples; - uint directional_soft_shadow_samples; - uint penumbra_shadow_samples; - uint soft_shadow_samples; - - vec4 ambient_light_color_energy; +/* Set 1: Base Pass (never changes) */ - float ambient_color_sky_mix; - bool use_ambient_light; - bool use_ambient_cubemap; - bool use_reflection_cubemap; - - mat3 radiance_inverse_xform; - - vec2 shadow_atlas_pixel_size; - vec2 directional_shadow_pixel_size; - - uint directional_light_count; - float dual_paraboloid_side; - float z_far; - float z_near; - - bool ssao_enabled; - float ssao_light_affect; - float ssao_ao_affect; - bool roughness_limiter_enabled; - - float roughness_limiter_amount; - float roughness_limiter_limit; - uvec2 roughness_limiter_pad; - - vec4 ao_color; - - mat4 sdf_to_bounds; - - ivec3 sdf_offset; - bool material_uv2_mode; - - ivec3 sdf_size; - bool gi_upscale_for_msaa; - - bool volumetric_fog_enabled; - float volumetric_fog_inv_length; - float volumetric_fog_detail_spread; - uint volumetric_fog_pad; - - bool fog_enabled; - float fog_density; - float fog_height; - float fog_height_density; - - vec3 fog_light_color; - float fog_sun_scatter; - - float fog_aerial_perspective; - - float time; - float reflection_multiplier; // one normally, zero when rendering reflections - - bool pancake_shadows; -} +layout(set = 0, binding = 1) uniform sampler material_samplers[12]; -scene_data; +layout(set = 0, binding = 2) uniform sampler shadow_sampler; #define INSTANCE_FLAGS_USE_GI_BUFFERS (1 << 6) #define INSTANCE_FLAGS_USE_SDFGI (1 << 7) @@ -153,22 +68,22 @@ scene_data; #define INSTANCE_FLAGS_SKELETON (1 << 19) #define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 20) -layout(set = 0, binding = 4, std430) restrict readonly buffer OmniLights { +layout(set = 0, binding = 3, std430) restrict readonly buffer OmniLights { LightData data[]; } omni_lights; -layout(set = 0, binding = 5, std430) restrict readonly buffer SpotLights { +layout(set = 0, binding = 4, std430) restrict readonly buffer SpotLights { LightData data[]; } spot_lights; -layout(set = 0, binding = 6) buffer restrict readonly ReflectionProbeData { +layout(set = 0, binding = 5) buffer restrict readonly ReflectionProbeData { ReflectionData data[]; } reflections; -layout(set = 0, binding = 7, std140) uniform DirectionalLights { +layout(set = 0, binding = 6, std140) uniform DirectionalLights { DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; } directional_lights; @@ -180,7 +95,7 @@ struct Lightmap { mat3 normal_xform; }; -layout(set = 0, binding = 8, std140) restrict readonly buffer Lightmaps { +layout(set = 0, binding = 7, std140) restrict readonly buffer Lightmaps { Lightmap data[]; } lightmaps; @@ -189,20 +104,20 @@ struct LightmapCapture { vec4 sh[9]; }; -layout(set = 0, binding = 9, std140) restrict readonly buffer LightmapCaptures { +layout(set = 0, binding = 8, std140) restrict readonly buffer LightmapCaptures { LightmapCapture data[]; } lightmap_captures; -layout(set = 0, binding = 10) uniform texture2D decal_atlas; -layout(set = 0, binding = 11) uniform texture2D decal_atlas_srgb; +layout(set = 0, binding = 9) uniform texture2D decal_atlas; +layout(set = 0, binding = 10) uniform texture2D decal_atlas_srgb; -layout(set = 0, binding = 12, std430) restrict readonly buffer Decals { +layout(set = 0, binding = 11, std430) restrict readonly buffer Decals { DecalData data[]; } decals; -layout(set = 0, binding = 13, std430) restrict readonly buffer GlobalVariableData { +layout(set = 0, binding = 12, std430) restrict readonly buffer GlobalVariableData { vec4 data[]; } global_variables; @@ -216,7 +131,7 @@ struct SDFGIProbeCascadeData { float to_cell; // 1/bounds * grid_size }; -layout(set = 0, binding = 14, std140) uniform SDFGI { +layout(set = 0, binding = 13, std140) uniform SDFGI { vec3 grid_size; uint max_cascades; @@ -246,47 +161,140 @@ sdfgi; #endif //LOW_END_MODE -// decal atlas +/* Set 2: Render Pass (changes per render pass) */ -/* Set 1, Radiance */ +layout(set = 1, binding = 0, std140) uniform SceneData { + mat4 projection_matrix; + mat4 inv_projection_matrix; + + mat4 camera_matrix; + mat4 inv_camera_matrix; + + vec2 viewport_size; + vec2 screen_pixel_size; + + uint cluster_shift; + uint cluster_width; + uint cluster_type_size; + uint max_cluster_element_count_div_32; + + //use vec4s because std140 doesnt play nice with vec2s, z and w are wasted + vec4 directional_penumbra_shadow_kernel[32]; + vec4 directional_soft_shadow_kernel[32]; + vec4 penumbra_shadow_kernel[32]; + vec4 soft_shadow_kernel[32]; + + uint directional_penumbra_shadow_samples; + uint directional_soft_shadow_samples; + uint penumbra_shadow_samples; + uint soft_shadow_samples; + + vec4 ambient_light_color_energy; + + float ambient_color_sky_mix; + bool use_ambient_light; + bool use_ambient_cubemap; + bool use_reflection_cubemap; + + mat3 radiance_inverse_xform; + + vec2 shadow_atlas_pixel_size; + vec2 directional_shadow_pixel_size; + + uint directional_light_count; + float dual_paraboloid_side; + float z_far; + float z_near; + + bool ssao_enabled; + float ssao_light_affect; + float ssao_ao_affect; + bool roughness_limiter_enabled; + + float roughness_limiter_amount; + float roughness_limiter_limit; + uvec2 roughness_limiter_pad; + + vec4 ao_color; + + mat4 sdf_to_bounds; + + ivec3 sdf_offset; + bool material_uv2_mode; + + ivec3 sdf_size; + bool gi_upscale_for_msaa; + + bool volumetric_fog_enabled; + float volumetric_fog_inv_length; + float volumetric_fog_detail_spread; + uint volumetric_fog_pad; + + bool fog_enabled; + float fog_density; + float fog_height; + float fog_height_density; + + vec3 fog_light_color; + float fog_sun_scatter; + + float fog_aerial_perspective; + + float time; + float reflection_multiplier; // one normally, zero when rendering reflections + + bool pancake_shadows; +} + +scene_data; + +struct InstanceData { + mat4 transform; + uint flags; + uint instance_uniforms_ofs; //base offset in global buffer for instance variables + uint gi_offset; //GI information when using lightmapping (VCT or lightmap index) + uint layer_mask; + vec4 lightmap_uv_scale; +}; + +layout(set = 1, binding = 1, std430) buffer restrict readonly InstanceDataBuffer { + InstanceData data[]; +} +instances; #ifdef USE_RADIANCE_CUBEMAP_ARRAY -layout(set = 1, binding = 0) uniform textureCubeArray radiance_cubemap; +layout(set = 1, binding = 2) uniform textureCubeArray radiance_cubemap; #else -layout(set = 1, binding = 0) uniform textureCube radiance_cubemap; +layout(set = 1, binding = 2) uniform textureCube radiance_cubemap; #endif -/* Set 2, Reflection and Shadow Atlases (view dependent) */ - -layout(set = 1, binding = 1) uniform textureCubeArray reflection_atlas; +layout(set = 1, binding = 3) uniform textureCubeArray reflection_atlas; -layout(set = 1, binding = 2) uniform texture2D shadow_atlas; +layout(set = 1, binding = 4) uniform texture2D shadow_atlas; -layout(set = 1, binding = 3) uniform texture2D directional_shadow_atlas; +layout(set = 1, binding = 5) uniform texture2D directional_shadow_atlas; -layout(set = 1, binding = 4) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES]; +layout(set = 1, binding = 6) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES]; -#ifndef LOW_END_MODE -layout(set = 1, binding = 5) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; +#ifndef LOW_END_MOD +layout(set = 1, binding = 7) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; #endif -layout(set = 1, binding = 6, std430) buffer restrict readonly ClusterBuffer { +layout(set = 1, binding = 8, std430) buffer restrict readonly ClusterBuffer { uint data[]; } cluster_buffer; -/* Set 3, Render Buffers */ - #ifdef MODE_RENDER_SDF -layout(r16ui, set = 1, binding = 7) uniform restrict writeonly uimage3D albedo_volume_grid; -layout(r32ui, set = 1, binding = 8) uniform restrict writeonly uimage3D emission_grid; -layout(r32ui, set = 1, binding = 9) uniform restrict writeonly uimage3D emission_aniso_grid; -layout(r32ui, set = 1, binding = 10) uniform restrict uimage3D geom_facing_grid; +layout(r16ui, set = 1, binding = 9) uniform restrict writeonly uimage3D albedo_volume_grid; +layout(r32ui, set = 1, binding = 10) uniform restrict writeonly uimage3D emission_grid; +layout(r32ui, set = 1, binding = 11) uniform restrict writeonly uimage3D emission_aniso_grid; +layout(r32ui, set = 1, binding = 12) uniform restrict uimage3D geom_facing_grid; //still need to be present for shaders that use it, so remap them to something #define depth_buffer shadow_atlas @@ -295,17 +303,17 @@ layout(r32ui, set = 1, binding = 10) uniform restrict uimage3D geom_facing_grid; #else -layout(set = 1, binding = 7) uniform texture2D depth_buffer; -layout(set = 1, binding = 8) uniform texture2D color_buffer; +layout(set = 1, binding = 9) uniform texture2D depth_buffer; +layout(set = 1, binding = 10) uniform texture2D color_buffer; #ifndef LOW_END_MODE -layout(set = 1, binding = 9) uniform texture2D normal_roughness_buffer; -layout(set = 1, binding = 10) uniform texture2D ao_buffer; -layout(set = 1, binding = 11) uniform texture2D ambient_buffer; -layout(set = 1, binding = 12) uniform texture2D reflection_buffer; -layout(set = 1, binding = 13) uniform texture2DArray sdfgi_lightprobe_texture; -layout(set = 1, binding = 14) uniform texture3D sdfgi_occlusion_cascades; +layout(set = 1, binding = 11) uniform texture2D normal_roughness_buffer; +layout(set = 1, binding = 12) uniform texture2D ao_buffer; +layout(set = 1, binding = 13) uniform texture2D ambient_buffer; +layout(set = 1, binding = 14) uniform texture2D reflection_buffer; +layout(set = 1, binding = 15) uniform texture2DArray sdfgi_lightprobe_texture; +layout(set = 1, binding = 16) uniform texture3D sdfgi_occlusion_cascades; struct GIProbeData { mat4 xform; @@ -323,22 +331,22 @@ struct GIProbeData { uint mipmaps; }; -layout(set = 1, binding = 15, std140) uniform GIProbes { +layout(set = 1, binding = 17, std140) uniform GIProbes { GIProbeData data[MAX_GI_PROBES]; } gi_probes; -layout(set = 1, binding = 16) uniform texture3D volumetric_fog_texture; +layout(set = 1, binding = 18) uniform texture3D volumetric_fog_texture; #endif // LOW_END_MODE #endif -/* Set 4 Skeleton & Instancing (Multimesh) */ +/* Set 2 Skeleton & Instancing (can change per item) */ layout(set = 2, binding = 0, std430) restrict readonly buffer Transforms { vec4 data[]; } transforms; -/* Set 5 User Material */ +/* Set 3 User Material */ diff --git a/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl b/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl index 29443ae7db..a29b24e560 100644 --- a/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl +++ b/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl @@ -6,8 +6,20 @@ VERSION_DEFINES #define BLOCK_SIZE 8 +#ifdef MODE_REDUCE_SUBGROUP + +#extension GL_KHR_shader_subgroup_ballot : enable +#extension GL_KHR_shader_subgroup_arithmetic : enable + +//nvidia friendly, max 32 +layout(local_size_x = 8, local_size_y = 4, local_size_z = 1) in; + +#else + layout(local_size_x = BLOCK_SIZE, local_size_y = BLOCK_SIZE, local_size_z = 1) in; +#endif + #ifdef MODE_REDUCE shared float tmp_data[BLOCK_SIZE * BLOCK_SIZE]; @@ -16,8 +28,12 @@ const uint unswizzle_table[BLOCK_SIZE] = uint[](0, 0, 0, 1, 0, 2, 1, 3); #endif -layout(r32f, set = 0, binding = 0) uniform restrict readonly image2D source_depth; -layout(r32f, set = 0, binding = 1) uniform restrict writeonly image2D dst_depth; +#if defined(MODE_REDUCE) || defined(MODE_REDUCE_SUBGROUP) +layout(set = 0, binding = 0) uniform sampler2D source_depth; +#else +layout(r16, set = 0, binding = 0) uniform restrict readonly image2D source_depth; +#endif +layout(r16, set = 1, binding = 0) uniform restrict writeonly image2D dst_depth; layout(push_constant, binding = 1, std430) uniform Params { ivec2 source_size; @@ -29,6 +45,48 @@ layout(push_constant, binding = 1, std430) uniform Params { params; void main() { +#ifdef MODE_REDUCE_SUBGROUP + + uvec2 local_pos = gl_LocalInvocationID.xy; + ivec2 image_offset = params.source_offset; + ivec2 image_pos = image_offset + ivec2(gl_GlobalInvocationID.xy * ivec2(1, 2)); + + float depth = texelFetch(source_depth, min(image_pos, params.source_size - ivec2(1)), 0).r; + depth += texelFetch(source_depth, min(image_pos + ivec2(0, 1), params.source_size - ivec2(1)), 0).r; + depth *= 0.5; + +#ifdef MODE_REDUCE_8 + //fast version, reduce all + float depth_average = subgroupAdd(depth) / 32.0; + if (local_pos == uvec2(0)) { + imageStore(dst_depth, image_pos / 8, vec4(depth_average)); + } +#else + //bit slower version, reduce by regions + uint group_size = (8 / params.min_size); + uvec2 group_id = local_pos / (8 / params.min_size); + + uvec4 mask; + float depth_average = 0; + + while (true) { + uvec2 first = subgroupBroadcastFirst(group_id); + mask = subgroupBallot(first == group_id); + if (first == group_id) { + depth_average = subgroupAdd(depth); + break; + } + } + + depth_average /= float(group_size * group_size); + + if (local_pos == group_id) { + imageStore(dst_depth, image_pos / int(group_size), vec4(depth_average)); + } +#endif + +#endif + #ifdef MODE_REDUCE uvec2 pos = gl_LocalInvocationID.xy; @@ -36,7 +94,7 @@ void main() { ivec2 image_offset = params.source_offset; ivec2 image_pos = image_offset + ivec2(gl_GlobalInvocationID.xy); uint dst_t = swizzle_table[pos.y] * BLOCK_SIZE + swizzle_table[pos.x]; - tmp_data[dst_t] = imageLoad(source_depth, min(image_pos, params.source_size - ivec2(1))).r; + tmp_data[dst_t] = texelFetch(source_depth, min(image_pos, params.source_size - ivec2(1)), 0).r; ivec2 image_size = params.source_size; uint t = pos.y * BLOCK_SIZE + pos.x; diff --git a/servers/rendering/renderer_scene_cull.cpp b/servers/rendering/renderer_scene_cull.cpp index db601ba49c..8067f9574c 100644 --- a/servers/rendering/renderer_scene_cull.cpp +++ b/servers/rendering/renderer_scene_cull.cpp @@ -1906,6 +1906,9 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons RS::LightOmniShadowMode shadow_mode = RSG::storage->light_omni_get_shadow_mode(p_instance->base); if (shadow_mode == RS::LIGHT_OMNI_SHADOW_DUAL_PARABOLOID || !scene_render->light_instances_can_render_shadow_cube()) { + if (max_shadows_used + 2 > MAX_UPDATE_SHADOWS) { + return true; + } for (int i = 0; i < 2; i++) { //using this one ensures that raster deferred will have it RENDER_TIMESTAMP("Culling Shadow Paraboloid" + itos(i)); @@ -1922,7 +1925,6 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons planes.write[4] = light_transform.xform(Plane(Vector3(0, -1, z).normalized(), radius)); planes.write[5] = light_transform.xform(Plane(Vector3(0, 0, -z), 0)); - geometry_instances_to_shadow_render.clear(); instance_shadow_cull_result.clear(); Vector<Vector3> points = Geometry3D::compute_convex_mesh_points(&planes[0], planes.size()); @@ -1943,6 +1945,8 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons Plane near_plane(light_transform.origin, light_transform.basis.get_axis(2) * z); + RendererSceneRender::RenderShadowData &shadow_data = render_shadow_data[max_shadows_used++]; + for (int j = 0; j < (int)instance_shadow_cull_result.size(); j++) { Instance *instance = instance_shadow_cull_result[j]; if (!instance->visible || !((1 << instance->base_type) & RS::INSTANCE_GEOMETRY_MASK) || !static_cast<InstanceGeometryData *>(instance->base_data)->can_cast_shadows) { @@ -1957,16 +1961,21 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons } } - geometry_instances_to_shadow_render.push_back(static_cast<InstanceGeometryData *>(instance->base_data)->geometry_instance); + shadow_data.instances.push_back(static_cast<InstanceGeometryData *>(instance->base_data)->geometry_instance); } RSG::storage->update_mesh_instances(); scene_render->light_instance_set_shadow_transform(light->instance, CameraMatrix(), light_transform, radius, 0, i, 0); - scene_render->render_shadow(light->instance, p_shadow_atlas, i, geometry_instances_to_shadow_render); + shadow_data.light = light->instance; + shadow_data.pass = i; } } else { //shadow cube + if (max_shadows_used + 6 > MAX_UPDATE_SHADOWS) { + return true; + } + real_t radius = RSG::storage->light_get_param(p_instance->base, RS::LIGHT_PARAM_RANGE); CameraMatrix cm; cm.set_perspective(90, 1, 0.01, radius); @@ -1996,7 +2005,6 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons Vector<Plane> planes = cm.get_projection_planes(xform); - geometry_instances_to_shadow_render.clear(); instance_shadow_cull_result.clear(); Vector<Vector3> points = Geometry3D::compute_convex_mesh_points(&planes[0], planes.size()); @@ -2015,7 +2023,7 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons p_scenario->indexers[Scenario::INDEXER_GEOMETRY].convex_query(planes.ptr(), planes.size(), points.ptr(), points.size(), cull_convex); - Plane near_plane(xform.origin, -xform.basis.get_axis(2)); + RendererSceneRender::RenderShadowData &shadow_data = render_shadow_data[max_shadows_used++]; for (int j = 0; j < (int)instance_shadow_cull_result.size(); j++) { Instance *instance = instance_shadow_cull_result[j]; @@ -2030,22 +2038,28 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons } } - geometry_instances_to_shadow_render.push_back(static_cast<InstanceGeometryData *>(instance->base_data)->geometry_instance); + shadow_data.instances.push_back(static_cast<InstanceGeometryData *>(instance->base_data)->geometry_instance); } RSG::storage->update_mesh_instances(); scene_render->light_instance_set_shadow_transform(light->instance, cm, xform, radius, 0, i, 0); - scene_render->render_shadow(light->instance, p_shadow_atlas, i, geometry_instances_to_shadow_render); + + shadow_data.light = light->instance; + shadow_data.pass = i; } //restore the regular DP matrix - scene_render->light_instance_set_shadow_transform(light->instance, CameraMatrix(), light_transform, radius, 0, 0, 0); + //scene_render->light_instance_set_shadow_transform(light->instance, CameraMatrix(), light_transform, radius, 0, 0, 0); } } break; case RS::LIGHT_SPOT: { RENDER_TIMESTAMP("Culling Spot Light"); + if (max_shadows_used + 1 > MAX_UPDATE_SHADOWS) { + return true; + } + real_t radius = RSG::storage->light_get_param(p_instance->base, RS::LIGHT_PARAM_RANGE); real_t angle = RSG::storage->light_get_param(p_instance->base, RS::LIGHT_PARAM_SPOT_ANGLE); @@ -2054,7 +2068,6 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons Vector<Plane> planes = cm.get_projection_planes(light_transform); - geometry_instances_to_shadow_render.clear(); instance_shadow_cull_result.clear(); Vector<Vector3> points = Geometry3D::compute_convex_mesh_points(&planes[0], planes.size()); @@ -2073,7 +2086,7 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons p_scenario->indexers[Scenario::INDEXER_GEOMETRY].convex_query(planes.ptr(), planes.size(), points.ptr(), points.size(), cull_convex); - Plane near_plane(light_transform.origin, -light_transform.basis.get_axis(2)); + RendererSceneRender::RenderShadowData &shadow_data = render_shadow_data[max_shadows_used++]; for (int j = 0; j < (int)instance_shadow_cull_result.size(); j++) { Instance *instance = instance_shadow_cull_result[j]; @@ -2088,13 +2101,14 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons RSG::storage->mesh_instance_check_for_update(instance->mesh_instance); } } - geometry_instances_to_shadow_render.push_back(static_cast<InstanceGeometryData *>(instance->base_data)->geometry_instance); + shadow_data.instances.push_back(static_cast<InstanceGeometryData *>(instance->base_data)->geometry_instance); } RSG::storage->update_mesh_instances(); scene_render->light_instance_set_shadow_transform(light->instance, cm, light_transform, radius, 0, 0, 0); - scene_render->render_shadow(light->instance, p_shadow_atlas, 0, geometry_instances_to_shadow_render); + shadow_data.light = light->instance; + shadow_data.pass = 0; } break; } @@ -2147,14 +2161,13 @@ void RendererSceneCull::render_camera(RID p_render_buffers, RID p_camera, RID p_ RID environment = _render_get_environment(p_camera, p_scenario); - _prepare_scene(camera->transform, camera_matrix, ortho, camera->vaspect, p_render_buffers, environment, camera->visible_layers, p_scenario, p_shadow_atlas, RID(), p_screen_lod_threshold); - _render_scene(p_render_buffers, camera->transform, camera_matrix, ortho, environment, camera->effects, p_scenario, p_shadow_atlas, RID(), -1, p_screen_lod_threshold); + _render_scene(camera->transform, camera_matrix, ortho, camera->vaspect, p_render_buffers, environment, camera->effects, camera->visible_layers, p_scenario, p_shadow_atlas, RID(), -1, p_screen_lod_threshold); #endif } void RendererSceneCull::render_camera(RID p_render_buffers, Ref<XRInterface> &p_interface, XRInterface::Eyes p_eye, RID p_camera, RID p_scenario, Size2 p_viewport_size, float p_screen_lod_threshold, RID p_shadow_atlas) { // render for AR/VR interface - +#if 0 Camera *camera = camera_owner.getornull(p_camera); ERR_FAIL_COND(!camera); @@ -2234,6 +2247,7 @@ void RendererSceneCull::render_camera(RID p_render_buffers, Ref<XRInterface> &p_ // And render our scene... _render_scene(p_render_buffers, cam_transform, camera_matrix, false, environment, camera->effects, p_scenario, p_shadow_atlas, RID(), -1, p_screen_lod_threshold); +#endif }; void RendererSceneCull::_frustum_cull_threaded(uint32_t p_thread, FrustumCullData *cull_data) { @@ -2452,7 +2466,7 @@ void RendererSceneCull::_frustum_cull(FrustumCullData &cull_data, FrustumCullRes } } -void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, float p_screen_lod_threshold, bool p_using_shadows) { +void RendererSceneCull::_render_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, RID p_force_camera_effects, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold, bool p_using_shadows) { // Note, in stereo rendering: // - p_cam_transform will be a transform in the middle of our two eyes // - p_cam_projection is a wider frustrum that encompasses both eyes @@ -2466,6 +2480,7 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca scene_render->set_scene_pass(render_pass); if (p_render_buffers.is_valid()) { + //no rendering code here, this is only to set up what needs to be done, request regions, etc. scene_render->sdfgi_update(p_render_buffers, p_environment, p_cam_transform.origin); //update conditions for SDFGI (whether its used or not) } @@ -2596,62 +2611,28 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca //render shadows - for (uint32_t i = 0; i < cull.shadow_count; i++) { - for (uint32_t j = 0; j < cull.shadows[i].cascade_count; j++) { - const Cull::Shadow::Cascade &c = cull.shadows[i].cascades[j]; - // print_line("shadow " + itos(i) + " cascade " + itos(j) + " elements: " + itos(c.cull_result.size())); - scene_render->light_instance_set_shadow_transform(cull.shadows[i].light_instance, c.projection, c.transform, c.zfar, c.split, j, c.shadow_texel_size, c.bias_scale, c.range_begin, c.uv_scale); - scene_render->render_shadow(cull.shadows[i].light_instance, p_shadow_atlas, j, frustum_cull_result.directional_shadows[i].cascade_geometry_instances[j], near_plane, p_cam_projection.get_lod_multiplier(), p_screen_lod_threshold); - } - } + max_shadows_used = 0; - //render SDFGI + if (p_using_shadows) { //setup shadow maps - { - if (cull.sdfgi.region_count > 0) { - //update regions - for (uint32_t i = 0; i < cull.sdfgi.region_count; i++) { - scene_render->render_sdfgi(p_render_buffers, i, frustum_cull_result.sdfgi_region_geometry_instances[i]); - } - //check if static lights were culled - bool static_lights_culled = false; - for (uint32_t i = 0; i < cull.sdfgi.cascade_light_count; i++) { - if (frustum_cull_result.sdfgi_cascade_lights[i].size()) { - static_lights_culled = true; - break; - } - } + // Directional Shadows - if (static_lights_culled) { - scene_render->render_sdfgi_static_lights(p_render_buffers, cull.sdfgi.cascade_light_count, cull.sdfgi.cascade_light_index, frustum_cull_result.sdfgi_cascade_lights); + for (uint32_t i = 0; i < cull.shadow_count; i++) { + for (uint32_t j = 0; j < cull.shadows[i].cascade_count; j++) { + const Cull::Shadow::Cascade &c = cull.shadows[i].cascades[j]; + // print_line("shadow " + itos(i) + " cascade " + itos(j) + " elements: " + itos(c.cull_result.size())); + scene_render->light_instance_set_shadow_transform(cull.shadows[i].light_instance, c.projection, c.transform, c.zfar, c.split, j, c.shadow_texel_size, c.bias_scale, c.range_begin, c.uv_scale); + if (max_shadows_used == MAX_UPDATE_SHADOWS) { + continue; + } + render_shadow_data[max_shadows_used].light = cull.shadows[i].light_instance; + render_shadow_data[max_shadows_used].pass = j; + render_shadow_data[max_shadows_used].instances.merge_unordered(frustum_cull_result.directional_shadows[i].cascade_geometry_instances[j]); + max_shadows_used++; } } - if (p_render_buffers.is_valid()) { - scene_render->sdfgi_update_probes(p_render_buffers, p_environment, directional_lights, scenario->dynamic_lights.ptr(), scenario->dynamic_lights.size()); - } - } - - //light_samplers_culled=0; - - /* - print_line("OT: "+rtos( (OS::get_singleton()->get_ticks_usec()-t)/1000.0)); - print_line("OTO: "+itos(p_scenario->octree.get_octant_count())); - print_line("OTE: "+itos(p_scenario->octree.get_elem_count())); - print_line("OTP: "+itos(p_scenario->octree.get_pair_count())); - */ - - /* STEP 3 - PROCESS PORTALS, VALIDATE ROOMS */ - //removed, will replace with culling - - /* STEP 4 - REMOVE FURTHER CULLED OBJECTS, ADD LIGHTS */ - - /* STEP 5 - PROCESS POSITIONAL LIGHTS */ - - if (p_using_shadows) { //setup shadow maps - - //SortArray<Instance*,_InstanceLightsort> sorter; - //sorter.sort(light_cull_result,light_cull_count); + // Positional Shadowss for (uint32_t i = 0; i < (uint32_t)frustum_cull_result.lights.size(); i++) { Instance *ins = frustum_cull_result.lights[i]; @@ -2738,12 +2719,49 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca bool redraw = scene_render->shadow_atlas_update_light(p_shadow_atlas, light->instance, coverage, light->last_version); - if (redraw) { + if (redraw && max_shadows_used < MAX_UPDATE_SHADOWS) { //must redraw! RENDER_TIMESTAMP(">Rendering Light " + itos(i)); light->shadow_dirty = _light_instance_update_shadow(ins, p_cam_transform, p_cam_projection, p_cam_orthogonal, p_cam_vaspect, p_shadow_atlas, scenario, p_screen_lod_threshold); RENDER_TIMESTAMP("<Rendering Light " + itos(i)); + } else { + light->shadow_dirty = redraw; + } + } + } + + //render SDFGI + + { + sdfgi_update_data.update_static = false; + + if (cull.sdfgi.region_count > 0) { + //update regions + for (uint32_t i = 0; i < cull.sdfgi.region_count; i++) { + render_sdfgi_data[i].instances.merge_unordered(frustum_cull_result.sdfgi_region_geometry_instances[i]); + render_sdfgi_data[i].region = i; } + //check if static lights were culled + bool static_lights_culled = false; + for (uint32_t i = 0; i < cull.sdfgi.cascade_light_count; i++) { + if (frustum_cull_result.sdfgi_cascade_lights[i].size()) { + static_lights_culled = true; + break; + } + } + + if (static_lights_culled) { + sdfgi_update_data.static_cascade_count = cull.sdfgi.cascade_light_count; + sdfgi_update_data.static_cascade_indices = cull.sdfgi.cascade_light_index; + sdfgi_update_data.static_positional_lights = frustum_cull_result.sdfgi_cascade_lights; + sdfgi_update_data.update_static = true; + } + } + + if (p_render_buffers.is_valid()) { + sdfgi_update_data.directional_lights = &directional_lights; + sdfgi_update_data.positional_light_instances = scenario->dynamic_lights.ptr(); + sdfgi_update_data.positional_light_count = scenario->dynamic_lights.size(); } } @@ -2751,6 +2769,28 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca for (int i = 0; i < directional_lights.size(); i++) { frustum_cull_result.light_instances.push_back(directional_lights[i]); } + + RID camera_effects; + if (p_force_camera_effects.is_valid()) { + camera_effects = p_force_camera_effects; + } else { + camera_effects = scenario->camera_effects; + } + /* PROCESS GEOMETRY AND DRAW SCENE */ + + RENDER_TIMESTAMP("Render Scene "); + scene_render->render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_orthogonal, frustum_cull_result.geometry_instances, frustum_cull_result.light_instances, frustum_cull_result.reflections, frustum_cull_result.gi_probes, frustum_cull_result.decals, frustum_cull_result.lightmaps, p_environment, camera_effects, p_shadow_atlas, p_reflection_probe.is_valid() ? RID() : scenario->reflection_atlas, p_reflection_probe, p_reflection_probe_pass, p_screen_lod_threshold, render_shadow_data, max_shadows_used, render_sdfgi_data, cull.sdfgi.region_count, &sdfgi_update_data); + + for (uint32_t i = 0; i < max_shadows_used; i++) { + render_shadow_data[i].instances.clear(); + } + max_shadows_used = 0; + + for (uint32_t i = 0; i < cull.sdfgi.region_count; i++) { + render_sdfgi_data[i].instances.clear(); + } + + // virtual void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold,const RenderShadowData *p_render_shadows,int p_render_shadow_count,const RenderSDFGIData *p_render_sdfgi_regions,int p_render_sdfgi_region_count,const RenderSDFGIStaticLightData *p_render_sdfgi_static_lights=nullptr) = 0; } RID RendererSceneCull::_render_get_environment(RID p_camera, RID p_scenario) { @@ -2774,21 +2814,6 @@ RID RendererSceneCull::_render_get_environment(RID p_camera, RID p_scenario) { return RID(); } -void RendererSceneCull::_render_scene(RID p_render_buffers, const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, RID p_environment, RID p_force_camera_effects, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold) { - Scenario *scenario = scenario_owner.getornull(p_scenario); - - RID camera_effects; - if (p_force_camera_effects.is_valid()) { - camera_effects = p_force_camera_effects; - } else { - camera_effects = scenario->camera_effects; - } - /* PROCESS GEOMETRY AND DRAW SCENE */ - - RENDER_TIMESTAMP("Render Scene "); - scene_render->render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_orthogonal, frustum_cull_result.geometry_instances, frustum_cull_result.light_instances, frustum_cull_result.reflections, frustum_cull_result.gi_probes, frustum_cull_result.decals, frustum_cull_result.lightmaps, p_environment, camera_effects, p_shadow_atlas, p_reflection_probe.is_valid() ? RID() : scenario->reflection_atlas, p_reflection_probe, p_reflection_probe_pass, p_screen_lod_threshold); -} - void RendererSceneCull::render_empty_scene(RID p_render_buffers, RID p_scenario, RID p_shadow_atlas) { #ifndef _3D_DISABLED @@ -2801,7 +2826,7 @@ void RendererSceneCull::render_empty_scene(RID p_render_buffers, RID p_scenario, environment = scenario->fallback_environment; } RENDER_TIMESTAMP("Render Empty Scene "); - scene_render->render_scene(p_render_buffers, Transform(), CameraMatrix(), true, PagedArray<RendererSceneRender::GeometryInstance *>(), PagedArray<RID>(), PagedArray<RID>(), PagedArray<RID>(), PagedArray<RID>(), PagedArray<RID>(), RID(), RID(), p_shadow_atlas, scenario->reflection_atlas, RID(), 0, 0); + scene_render->render_scene(p_render_buffers, Transform(), CameraMatrix(), true, PagedArray<RendererSceneRender::GeometryInstance *>(), PagedArray<RID>(), PagedArray<RID>(), PagedArray<RID>(), PagedArray<RID>(), PagedArray<RID>(), RID(), RID(), p_shadow_atlas, scenario->reflection_atlas, RID(), 0, 0, nullptr, 0, nullptr, 0, nullptr); #endif } @@ -2864,8 +2889,7 @@ bool RendererSceneCull::_render_reflection_probe_step(Instance *p_instance, int } RENDER_TIMESTAMP("Render Reflection Probe, Step " + itos(p_step)); - _prepare_scene(xform, cm, false, false, RID(), RID(), RSG::storage->reflection_probe_get_cull_mask(p_instance->base), p_instance->scenario->self, shadow_atlas, reflection_probe->instance, lod_threshold, use_shadows); - _render_scene(RID(), xform, cm, false, RID(), RID(), p_instance->scenario->self, shadow_atlas, reflection_probe->instance, p_step, lod_threshold); + _render_scene(xform, cm, false, false, RID(), RID(), RID(), RSG::storage->reflection_probe_get_cull_mask(p_instance->base), p_instance->scenario->self, shadow_atlas, reflection_probe->instance, p_step, lod_threshold, use_shadows); } else { //do roughness postprocess step until it believes it's done @@ -3493,7 +3517,12 @@ RendererSceneCull::RendererSceneCull() { instance_cull_result.set_page_pool(&instance_cull_page_pool); instance_shadow_cull_result.set_page_pool(&instance_cull_page_pool); - geometry_instances_to_shadow_render.set_page_pool(&geometry_instance_cull_page_pool); + for (uint32_t i = 0; i < MAX_UPDATE_SHADOWS; i++) { + render_shadow_data[i].instances.set_page_pool(&geometry_instance_cull_page_pool); + } + for (uint32_t i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) { + render_sdfgi_data[i].instances.set_page_pool(&geometry_instance_cull_page_pool); + } frustum_cull_result.init(&rid_cull_page_pool, &geometry_instance_cull_page_pool, &instance_cull_page_pool); frustum_cull_result_threads.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count()); @@ -3510,7 +3539,12 @@ RendererSceneCull::~RendererSceneCull() { instance_cull_result.reset(); instance_shadow_cull_result.reset(); - geometry_instances_to_shadow_render.reset(); + for (uint32_t i = 0; i < MAX_UPDATE_SHADOWS; i++) { + render_shadow_data[i].instances.reset(); + } + for (uint32_t i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) { + render_sdfgi_data[i].instances.reset(); + } frustum_cull_result.reset(); for (uint32_t i = 0; i < frustum_cull_result_threads.size(); i++) { diff --git a/servers/rendering/renderer_scene_cull.h b/servers/rendering/renderer_scene_cull.h index 2ffaf48675..a04e336f10 100644 --- a/servers/rendering/renderer_scene_cull.h +++ b/servers/rendering/renderer_scene_cull.h @@ -54,7 +54,8 @@ public: enum { SDFGI_MAX_CASCADES = 8, SDFGI_MAX_REGIONS_PER_CASCADE = 3, - MAX_INSTANCE_PAIRS = 32 + MAX_INSTANCE_PAIRS = 32, + MAX_UPDATE_SHADOWS = 512 }; uint64_t render_pass; @@ -696,7 +697,6 @@ public: PagedArray<Instance *> instance_cull_result; PagedArray<Instance *> instance_shadow_cull_result; - PagedArray<RendererSceneRender::GeometryInstance *> geometry_instances_to_shadow_render; struct FrustumCullResult { PagedArray<RendererSceneRender::GeometryInstance *> geometry_instances; @@ -816,6 +816,12 @@ public: FrustumCullResult frustum_cull_result; LocalVector<FrustumCullResult> frustum_cull_result_threads; + RendererSceneRender::RenderShadowData render_shadow_data[MAX_UPDATE_SHADOWS]; + uint32_t max_shadows_used = 0; + + RendererSceneRender::RenderSDFGIData render_sdfgi_data[SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE]; + RendererSceneRender::RenderSDFGIUpdateData sdfgi_update_data; + uint32_t thread_cull_threshold = 200; RID_PtrOwner<Instance> instance_owner; @@ -924,8 +930,7 @@ public: void _frustum_cull(FrustumCullData &cull_data, FrustumCullResult &cull_result, uint64_t p_from, uint64_t p_to); bool _render_reflection_probe_step(Instance *p_instance, int p_step); - void _prepare_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, float p_screen_lod_threshold, bool p_using_shadows = true); - void _render_scene(RID p_render_buffers, const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, RID p_environment, RID p_force_camera_effects, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold); + void _render_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, RID p_force_camera_effects, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold, bool p_using_shadows = true); void render_empty_scene(RID p_render_buffers, RID p_scenario, RID p_shadow_atlas); void render_camera(RID p_render_buffers, RID p_camera, RID p_scenario, Size2 p_viewport_size, float p_screen_lod_threshold, RID p_shadow_atlas); diff --git a/servers/rendering/renderer_scene_render.h b/servers/rendering/renderer_scene_render.h index ecec03db94..015327f9d9 100644 --- a/servers/rendering/renderer_scene_render.h +++ b/servers/rendering/renderer_scene_render.h @@ -87,7 +87,6 @@ public: virtual int sdfgi_get_pending_region_count(RID p_render_buffers) const = 0; virtual AABB sdfgi_get_pending_region_bounds(RID p_render_buffers, int p_region) const = 0; virtual uint32_t sdfgi_get_pending_region_cascade(RID p_render_buffers, int p_region) const = 0; - virtual void sdfgi_update_probes(RID p_render_buffers, RID p_environment, const Vector<RID> &p_directional_lights, const RID *p_positional_light_instances, uint32_t p_positional_light_count) = 0; /* SKY API */ @@ -195,12 +194,31 @@ public: virtual void gi_probe_set_quality(RS::GIProbeQuality) = 0; - virtual void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold) = 0; + struct RenderShadowData { + RID light; + int pass = 0; + PagedArray<GeometryInstance *> instances; + }; + + struct RenderSDFGIData { + int region = 0; + PagedArray<GeometryInstance *> instances; + }; + + struct RenderSDFGIUpdateData { + bool update_static = false; + uint32_t static_cascade_count; + uint32_t *static_cascade_indices; + PagedArray<RID> *static_positional_lights; + + const Vector<RID> *directional_lights; + const RID *positional_light_instances; + uint32_t positional_light_count; + }; + + virtual void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data = nullptr) = 0; - virtual void render_shadow(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<GeometryInstance *> &p_instances, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0, float p_screen_lod_threshold = 0.0) = 0; virtual void render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; - virtual void render_sdfgi(RID p_render_buffers, int p_region, const PagedArray<GeometryInstance *> &p_instances) = 0; - virtual void render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_lights) = 0; virtual void render_particle_collider_heightfield(RID p_collider, const Transform &p_transform, const PagedArray<GeometryInstance *> &p_instances) = 0; virtual void set_scene_pass(uint64_t p_pass) = 0; diff --git a/servers/rendering/renderer_storage.h b/servers/rendering/renderer_storage.h index 7a80c2b0bf..f015b50eee 100644 --- a/servers/rendering/renderer_storage.h +++ b/servers/rendering/renderer_storage.h @@ -98,6 +98,7 @@ public: while (to_clean_up.size()) { to_clean_up.front()->get().first->instances.erase(to_clean_up.front()->get().second); + dependencies.erase(to_clean_up.front()->get().first); to_clean_up.pop_front(); } } diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 70497bcdb3..4b0eafe369 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -240,10 +240,6 @@ void RenderingDevice::_compute_list_set_push_constant(ComputeListID p_list, cons compute_list_set_push_constant(p_list, p_data.ptr(), p_data_size); } -void RenderingDevice::compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads, uint32_t p_x_local_group, uint32_t p_y_local_group, uint32_t p_z_local_group) { - compute_list_dispatch(p_list, (p_x_threads - 1) / p_x_local_group + 1, (p_y_threads - 1) / p_y_local_group + 1, (p_z_threads - 1) / p_z_local_group + 1); -} - void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("texture_create", "format", "view", "data"), &RenderingDevice::_texture_create, DEFVAL(Array())); ClassDB::bind_method(D_METHOD("texture_create_shared", "view", "with_texture"), &RenderingDevice::_texture_create_shared); @@ -319,7 +315,7 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("draw_list_end", "post_barrier"), &RenderingDevice::draw_list_end, DEFVAL(BARRIER_MASK_ALL)); - ClassDB::bind_method(D_METHOD("compute_list_begin"), &RenderingDevice::compute_list_begin); + ClassDB::bind_method(D_METHOD("compute_list_begin", "allow_draw_overlap"), &RenderingDevice::compute_list_begin, DEFVAL(false)); ClassDB::bind_method(D_METHOD("compute_list_bind_compute_pipeline", "compute_list", "compute_pipeline"), &RenderingDevice::compute_list_bind_compute_pipeline); ClassDB::bind_method(D_METHOD("compute_list_set_push_constant", "compute_list", "buffer", "size_bytes"), &RenderingDevice::_compute_list_set_push_constant); ClassDB::bind_method(D_METHOD("compute_list_bind_uniform_set", "compute_list", "uniform_set", "set_index"), &RenderingDevice::compute_list_bind_uniform_set); @@ -352,10 +348,15 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("draw_command_insert_label", "name", "color"), &RenderingDevice::draw_command_insert_label); ClassDB::bind_method(D_METHOD("draw_command_end_label"), &RenderingDevice::draw_command_end_label); + ClassDB::bind_method(D_METHOD("get_device_vendor_name"), &RenderingDevice::get_device_vendor_name); + ClassDB::bind_method(D_METHOD("get_device_name"), &RenderingDevice::get_device_name); + ClassDB::bind_method(D_METHOD("get_device_pipeline_cache_uuid"), &RenderingDevice::get_device_pipeline_cache_uuid); + BIND_CONSTANT(BARRIER_MASK_RASTER); BIND_CONSTANT(BARRIER_MASK_COMPUTE); BIND_CONSTANT(BARRIER_MASK_TRANSFER); BIND_CONSTANT(BARRIER_MASK_ALL); + BIND_CONSTANT(BARRIER_MASK_NO_BARRIER); BIND_ENUM_CONSTANT(DATA_FORMAT_R4G4_UNORM_PACK8); BIND_ENUM_CONSTANT(DATA_FORMAT_R4G4B4A4_UNORM_PACK16); @@ -760,6 +761,7 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(INITIAL_ACTION_CLEAR); //start rendering and clear the framebuffer (supply params) BIND_ENUM_CONSTANT(INITIAL_ACTION_CLEAR_REGION); //start rendering and clear the framebuffer (supply params) + BIND_ENUM_CONSTANT(INITIAL_ACTION_CLEAR_REGION_CONTINUE); //continue rendering and clear the framebuffer (supply params) BIND_ENUM_CONSTANT(INITIAL_ACTION_KEEP); //start rendering); but keep attached color texture contents (depth will be cleared) BIND_ENUM_CONSTANT(INITIAL_ACTION_DROP); //start rendering); ignore what is there); just write above it BIND_ENUM_CONSTANT(INITIAL_ACTION_CONTINUE); //continue rendering (framebuffer must have been left in "continue" state as final action previously) diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 47ef54cef7..9fbf58d131 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -343,6 +343,7 @@ public: BARRIER_MASK_RASTER = 1, BARRIER_MASK_COMPUTE = 2, BARRIER_MASK_TRANSFER = 4, + BARRIER_MASK_NO_BARRIER = 8, BARRIER_MASK_ALL = BARRIER_MASK_RASTER | BARRIER_MASK_COMPUTE | BARRIER_MASK_TRANSFER }; @@ -944,6 +945,7 @@ public: enum InitialAction { INITIAL_ACTION_CLEAR, //start rendering and clear the whole framebuffer (region or not) (supply params) INITIAL_ACTION_CLEAR_REGION, //start rendering and clear the framebuffer in the specified region (supply params) + INITIAL_ACTION_CLEAR_REGION_CONTINUE, //countinue rendering and clear the framebuffer in the specified region (supply params) INITIAL_ACTION_KEEP, //start rendering, but keep attached color texture contents (depth will be cleared) INITIAL_ACTION_DROP, //start rendering, ignore what is there, just write above it INITIAL_ACTION_CONTINUE, //continue rendering (framebuffer must have been left in "continue" state as final action previously) @@ -983,12 +985,12 @@ public: typedef int64_t ComputeListID; - virtual ComputeListID compute_list_begin() = 0; + virtual ComputeListID compute_list_begin(bool p_allow_draw_overlap = false) = 0; virtual void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline) = 0; virtual void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index) = 0; virtual void compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size) = 0; virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) = 0; - virtual void compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads, uint32_t p_x_local_group, uint32_t p_y_local_group, uint32_t p_z_local_group); + virtual void compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads) = 0; virtual void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset) = 0; virtual void compute_list_add_barrier(ComputeListID p_list) = 0; @@ -1078,6 +1080,10 @@ public: virtual void draw_command_insert_label(String p_label_name, const Color p_color = Color(1, 1, 1, 1)) = 0; virtual void draw_command_end_label() = 0; + virtual String get_device_vendor_name() const = 0; + virtual String get_device_name() const = 0; + virtual String get_device_pipeline_cache_uuid() const = 0; + static RenderingDevice *get_singleton(); RenderingDevice(); |