summaryrefslogtreecommitdiff
path: root/thirdparty
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty')
-rw-r--r--thirdparty/README.md2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_comp.cpp178
-rw-r--r--thirdparty/basis_universal/encoder/basisu_comp.h21
-rw-r--r--thirdparty/basis_universal/encoder/basisu_enc.cpp4
-rw-r--r--thirdparty/basis_universal/encoder/basisu_frontend.cpp4
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_transcoder.cpp2
-rw-r--r--thirdparty/embree/common/sys/sysinfo.cpp24
-rw-r--r--thirdparty/embree/patches/emscripten-nthreads.patch42
8 files changed, 233 insertions, 44 deletions
diff --git a/thirdparty/README.md b/thirdparty/README.md
index 937d149747..ed38fe0ec8 100644
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -20,7 +20,7 @@ Files extracted from upstream source:
## basis_universal
- Upstream: https://github.com/BinomialLLC/basis_universal
-- Version: git (1531cfaf9ed5232248a0a45736686a849ca3befc, 2022)
+- Version: git (a91e94c8495d7f470d3df326a364d49324cfd4a3, 2022)
- License: Apache 2.0
Files extracted from upstream source:
diff --git a/thirdparty/basis_universal/encoder/basisu_comp.cpp b/thirdparty/basis_universal/encoder/basisu_comp.cpp
index 166a1c4fe0..41eae2b78a 100644
--- a/thirdparty/basis_universal/encoder/basisu_comp.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_comp.cpp
@@ -1501,7 +1501,8 @@ namespace basisu
if (m_params.m_compute_stats)
{
- printf("Slice: %u\n", slice_index);
+ if (m_params.m_print_stats)
+ printf("Slice: %u\n", slice_index);
image_stats& s = m_stats[slice_index];
@@ -1511,81 +1512,100 @@ namespace basisu
// ---- .basis stats
em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 3);
- em.print(".basis RGB Avg: ");
+ if (m_params.m_print_stats)
+ em.print(".basis RGB Avg: ");
s.m_basis_rgb_avg_psnr = em.m_psnr;
em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 4);
- em.print(".basis RGBA Avg: ");
+ if (m_params.m_print_stats)
+ em.print(".basis RGBA Avg: ");
s.m_basis_rgba_avg_psnr = em.m_psnr;
em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 1);
- em.print(".basis R Avg: ");
+ if (m_params.m_print_stats)
+ em.print(".basis R Avg: ");
em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 1, 1);
- em.print(".basis G Avg: ");
+ if (m_params.m_print_stats)
+ em.print(".basis G Avg: ");
em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 2, 1);
- em.print(".basis B Avg: ");
+ if (m_params.m_print_stats)
+ em.print(".basis B Avg: ");
if (m_params.m_uastc)
{
em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 3, 1);
- em.print(".basis A Avg: ");
+ if (m_params.m_print_stats)
+ em.print(".basis A Avg: ");
s.m_basis_a_avg_psnr = em.m_psnr;
}
em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0);
- em.print(".basis 709 Luma: ");
+ if (m_params.m_print_stats)
+ em.print(".basis 709 Luma: ");
s.m_basis_luma_709_psnr = static_cast<float>(em.m_psnr);
s.m_basis_luma_709_ssim = static_cast<float>(em.m_ssim);
em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0, true, true);
- em.print(".basis 601 Luma: ");
+ if (m_params.m_print_stats)
+ em.print(".basis 601 Luma: ");
s.m_basis_luma_601_psnr = static_cast<float>(em.m_psnr);
if (m_slice_descs.size() == 1)
{
const uint32_t output_size = comp_size ? (uint32_t)comp_size : (uint32_t)comp_data.size();
- debug_printf(".basis RGB PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_rgb_avg_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height)));
- debug_printf(".basis Luma 709 PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_luma_709_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height)));
+ if (m_params.m_print_stats)
+ {
+ debug_printf(".basis RGB PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_rgb_avg_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height)));
+ debug_printf(".basis Luma 709 PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_luma_709_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height)));
+ }
}
if (m_decoded_output_textures_unpacked_bc7[slice_index].get_width())
{
// ---- BC7 stats
em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 3);
- em.print("BC7 RGB Avg: ");
+ if (m_params.m_print_stats)
+ em.print("BC7 RGB Avg: ");
s.m_bc7_rgb_avg_psnr = em.m_psnr;
em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 4);
- em.print("BC7 RGBA Avg: ");
+ if (m_params.m_print_stats)
+ em.print("BC7 RGBA Avg: ");
s.m_bc7_rgba_avg_psnr = em.m_psnr;
em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 1);
- em.print("BC7 R Avg: ");
+ if (m_params.m_print_stats)
+ em.print("BC7 R Avg: ");
em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 1, 1);
- em.print("BC7 G Avg: ");
+ if (m_params.m_print_stats)
+ em.print("BC7 G Avg: ");
em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 2, 1);
- em.print("BC7 B Avg: ");
+ if (m_params.m_print_stats)
+ em.print("BC7 B Avg: ");
if (m_params.m_uastc)
{
em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 3, 1);
- em.print("BC7 A Avg: ");
+ if (m_params.m_print_stats)
+ em.print("BC7 A Avg: ");
s.m_bc7_a_avg_psnr = em.m_psnr;
}
em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0);
- em.print("BC7 709 Luma: ");
+ if (m_params.m_print_stats)
+ em.print("BC7 709 Luma: ");
s.m_bc7_luma_709_psnr = static_cast<float>(em.m_psnr);
s.m_bc7_luma_709_ssim = static_cast<float>(em.m_ssim);
em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0, true, true);
- em.print("BC7 601 Luma: ");
+ if (m_params.m_print_stats)
+ em.print("BC7 601 Luma: ");
s.m_bc7_luma_601_psnr = static_cast<float>(em.m_psnr);
}
@@ -1593,16 +1613,19 @@ namespace basisu
{
// ---- Nearly best possible ETC1S stats
em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 3);
- em.print("Unquantized ETC1S RGB Avg: ");
+ if (m_params.m_print_stats)
+ em.print("Unquantized ETC1S RGB Avg: ");
s.m_best_etc1s_rgb_avg_psnr = static_cast<float>(em.m_psnr);
em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0);
- em.print("Unquantized ETC1S 709 Luma: ");
+ if (m_params.m_print_stats)
+ em.print("Unquantized ETC1S 709 Luma: ");
s.m_best_etc1s_luma_709_psnr = static_cast<float>(em.m_psnr);
s.m_best_etc1s_luma_709_ssim = static_cast<float>(em.m_ssim);
em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0, true, true);
- em.print("Unquantized ETC1S 601 Luma: ");
+ if (m_params.m_print_stats)
+ em.print("Unquantized ETC1S 601 Luma: ");
s.m_best_etc1s_luma_601_psnr = static_cast<float>(em.m_psnr);
}
}
@@ -2311,6 +2334,8 @@ namespace basisu
}
comp_params.m_compute_stats = (pStats != nullptr);
+ comp_params.m_print_stats = (flags_and_quality & cFlagPrintStats) != 0;
+ comp_params.m_status_output = (flags_and_quality & cFlagPrintStatus) != 0;
// Create the compressor, initialize it, and process the input
basis_compressor comp;
@@ -2328,6 +2353,11 @@ namespace basisu
return nullptr;
}
+ if ((pStats) && (comp.get_opencl_failed()))
+ {
+ pStats->m_opencl_failed = true;
+ }
+
// Get the output file data and return it to the caller
void* pFile_data = nullptr;
const uint8_vec* pFile_data_vec = comp_params.m_create_ktx2_file ? &comp.get_output_ktx2_file() : &comp.get_output_basis_file();
@@ -2388,4 +2418,108 @@ namespace basisu
free(p);
}
+ bool basis_benchmark_etc1s_opencl(bool* pOpenCL_failed)
+ {
+ if (pOpenCL_failed)
+ *pOpenCL_failed = false;
+
+ if (!opencl_is_available())
+ {
+ error_printf("basis_benchmark_etc1s_opencl: OpenCL support must be enabled first!\n");
+ return false;
+ }
+
+ const uint32_t W = 1024, H = 1024;
+ basisu::vector<image> images;
+ image& img = images.enlarge(1)->resize(W, H);
+
+ const uint32_t NUM_RAND_LETTERS = 6000;// 40000;
+
+ rand r;
+ r.seed(200);
+
+ for (uint32_t i = 0; i < NUM_RAND_LETTERS; i++)
+ {
+ uint32_t x = r.irand(0, W - 1), y = r.irand(0, H - 1);
+ uint32_t sx = r.irand(1, 4), sy = r.irand(1, 4);
+ color_rgba c(r.byte(), r.byte(), r.byte(), 255);
+
+ img.debug_text(x, y, sx, sy, c, nullptr, false, "%c", static_cast<char>(r.irand(32, 127)));
+ }
+
+ //save_png("test.png", img);
+
+ image_stats stats;
+
+ uint32_t flags_and_quality = cFlagSRGB | cFlagThreaded | 255;
+ size_t comp_size = 0;
+
+ double best_cpu_time = 1e+9f, best_gpu_time = 1e+9f;
+
+ const uint32_t TIMES_TO_ENCODE = 2;
+ interval_timer tm;
+
+ for (uint32_t i = 0; i < TIMES_TO_ENCODE; i++)
+ {
+ tm.start();
+ void* pComp_data = basis_compress(
+ images,
+ flags_and_quality, 1.0f,
+ &comp_size,
+ &stats);
+ double cpu_time = tm.get_elapsed_secs();
+ if (!pComp_data)
+ {
+ error_printf("basis_benchmark_etc1s_opencl: basis_compress() failed (CPU)!\n");
+ return false;
+ }
+
+ best_cpu_time = minimum(best_cpu_time, cpu_time);
+
+ basis_free_data(pComp_data);
+ }
+
+ printf("Best CPU time: %3.3f\n", best_cpu_time);
+
+ for (uint32_t i = 0; i < TIMES_TO_ENCODE; i++)
+ {
+ tm.start();
+ void* pComp_data = basis_compress(
+ images,
+ flags_and_quality | cFlagUseOpenCL, 1.0f,
+ &comp_size,
+ &stats);
+
+ if (stats.m_opencl_failed)
+ {
+ error_printf("basis_benchmark_etc1s_opencl: OpenCL failed!\n");
+
+ basis_free_data(pComp_data);
+
+ if (pOpenCL_failed)
+ *pOpenCL_failed = true;
+
+ return false;
+ }
+
+ double gpu_time = tm.get_elapsed_secs();
+ if (!pComp_data)
+ {
+ error_printf("basis_benchmark_etc1s_opencl: basis_compress() failed (GPU)!\n");
+ return false;
+ }
+
+ best_gpu_time = minimum(best_gpu_time, gpu_time);
+
+ basis_free_data(pComp_data);
+ }
+
+ printf("Best GPU time: %3.3f\n", best_gpu_time);
+
+ return best_gpu_time < best_cpu_time;
+ }
+
} // namespace basisu
+
+
+
diff --git a/thirdparty/basis_universal/encoder/basisu_comp.h b/thirdparty/basis_universal/encoder/basisu_comp.h
index aa5ea6fec3..b6c9fef9e2 100644
--- a/thirdparty/basis_universal/encoder/basisu_comp.h
+++ b/thirdparty/basis_universal/encoder/basisu_comp.h
@@ -92,6 +92,8 @@ namespace basisu
m_best_etc1s_luma_709_psnr = 0.0f;
m_best_etc1s_luma_601_psnr = 0.0f;
m_best_etc1s_luma_709_ssim = 0.0f;
+
+ m_opencl_failed = false;
}
std::string m_filename;
@@ -119,6 +121,8 @@ namespace basisu
float m_best_etc1s_luma_709_psnr;
float m_best_etc1s_luma_601_psnr;
float m_best_etc1s_luma_709_ssim;
+
+ bool m_opencl_failed;
};
template<bool def>
@@ -255,6 +259,7 @@ namespace basisu
m_write_output_basis_files.clear();
m_compression_level.clear();
m_compute_stats.clear();
+ m_print_stats.clear();
m_check_for_alpha.clear();
m_force_alpha.clear();
m_multithreading.clear();
@@ -373,6 +378,9 @@ namespace basisu
// Compute and display image metrics
bool_param<false> m_compute_stats;
+
+ // Print stats to stdout, if m_compute_stats is true.
+ bool_param<true> m_print_stats;
// Check to see if any input image has an alpha channel, if so then the output basis file will have alpha channels
bool_param<true> m_check_for_alpha;
@@ -583,11 +591,16 @@ namespace basisu
cFlagYFlip = 1 << 16, // flip source image on Y axis before compression
cFlagUASTC = 1 << 17, // use UASTC compression vs. ETC1S
- cFlagUASTCRDO = 1 << 18 // use RDO postprocessing when generating UASTC files (must set uastc_rdo_quality to the quality scalar)
+ cFlagUASTCRDO = 1 << 18, // use RDO postprocessing when generating UASTC files (must set uastc_rdo_quality to the quality scalar)
+
+ cFlagPrintStats = 1 << 19, // print image stats to stdout
+ cFlagPrintStatus = 1 << 20 // print status to stdout
};
// This function accepts an array of source images.
// If more than one image is provided, it's assumed the images form a mipmap pyramid and automatic mipmap generation is disabled.
+ // Returns a pointer to the compressed .basis or .ktx2 file data. *pSize is the size of the compressed data. The returned block must be freed using basis_free_data().
+ // basisu_encoder_init() MUST be called first!
void* basis_compress(
const basisu::vector<image> &source_images,
uint32_t flags_and_quality, float uastc_rdo_quality,
@@ -604,6 +617,12 @@ namespace basisu
// Frees the dynamically allocated file data returned by basis_compress().
void basis_free_data(void* p);
+ // Runs a short benchmark using synthetic image data to time OpenCL encoding vs. CPU encoding, with multithreading enabled.
+ // Returns true if opencl is worth using on this system, otherwise false.
+ // If pOpenCL_failed is not null, it will be set to true if OpenCL encoding failed *on this particular machine/driver/BasisU version* and the encoder falled back to CPU encoding.
+ // basisu_encoder_init() MUST be called first. If OpenCL support wasn't enabled this always returns false.
+ bool basis_benchmark_etc1s_opencl(bool *pOpenCL_failed = nullptr);
+
// Parallel compression API
struct parallel_results
{
diff --git a/thirdparty/basis_universal/encoder/basisu_enc.cpp b/thirdparty/basis_universal/encoder/basisu_enc.cpp
index b427215ee3..c431ceaf12 100644
--- a/thirdparty/basis_universal/encoder/basisu_enc.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp
@@ -187,6 +187,8 @@ namespace basisu
opencl_init(opencl_force_serialization);
}
+ interval_timer::init(); // make sure interval_timer globals are initialized from main thread to avoid TSAN reports
+
g_library_initialized = true;
}
@@ -227,7 +229,7 @@ namespace basisu
{
QueryPerformanceFrequency(reinterpret_cast<LARGE_INTEGER*>(pTicks));
}
-#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__)
+#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__EMSCRIPTEN__)
#include <sys/time.h>
inline void query_counter(timer_ticks* pTicks)
{
diff --git a/thirdparty/basis_universal/encoder/basisu_frontend.cpp b/thirdparty/basis_universal/encoder/basisu_frontend.cpp
index 00210e6679..1f30a33c70 100644
--- a/thirdparty/basis_universal/encoder/basisu_frontend.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_frontend.cpp
@@ -2328,8 +2328,6 @@ namespace basisu
m_optimized_cluster_selectors.resize(total_selector_clusters);
- uint32_t total_clusters_processed = 0;
-
// For each selector codebook entry, and for each of the 4x4 selectors, determine which selector minimizes the error across all the blocks that use that quantized selector.
const uint32_t N = 256;
for (uint32_t cluster_index_iter = 0; cluster_index_iter < total_selector_clusters; cluster_index_iter += N)
@@ -2338,7 +2336,7 @@ namespace basisu
const uint32_t last_index = minimum<uint32_t>((uint32_t)total_selector_clusters, cluster_index_iter + N);
#ifndef __EMSCRIPTEN__
- m_params.m_pJob_pool->add_job([this, first_index, last_index, &total_clusters_processed, &total_selector_clusters] {
+ m_params.m_pJob_pool->add_job([this, first_index, last_index] {
#endif
for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp b/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
index 630731900f..3aeba0ee7a 100644
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
@@ -16867,7 +16867,7 @@ namespace basist
{
m_format = basist::basis_tex_format::cETC1S;
- // 3.10.2: "Whether the image has 1 or 2 slices can be determined from the DFD’s sample count."
+ // 3.10.2: "Whether the image has 1 or 2 slices can be determined from the DFD's sample count."
// If m_has_alpha is true it may be 2-channel RRRG or 4-channel RGBA, but we let the caller deal with that.
m_has_alpha = (m_header.m_dfd_byte_length == 60);
diff --git a/thirdparty/embree/common/sys/sysinfo.cpp b/thirdparty/embree/common/sys/sysinfo.cpp
index c98f61fa53..7f7a009a1e 100644
--- a/thirdparty/embree/common/sys/sysinfo.cpp
+++ b/thirdparty/embree/common/sys/sysinfo.cpp
@@ -640,6 +640,12 @@ namespace embree
#if defined(__EMSCRIPTEN__)
#include <emscripten.h>
+
+// -- GODOT start --
+extern "C" {
+extern int godot_js_os_hw_concurrency_get();
+}
+// -- GODOT end --
#endif
namespace embree
@@ -653,21 +659,9 @@ namespace embree
nThreads = sysconf(_SC_NPROCESSORS_ONLN); // does not work in Linux LXC container
assert(nThreads);
#elif defined(__EMSCRIPTEN__)
- // WebAssembly supports pthreads, but not pthread_getaffinity_np. Get the number of logical
- // threads from the browser or Node.js using JavaScript.
- nThreads = MAIN_THREAD_EM_ASM_INT({
- const isBrowser = typeof window !== 'undefined';
- const isNode = typeof process !== 'undefined' && process.versions != null &&
- process.versions.node != null;
- if (isBrowser) {
- // Return 1 if the browser does not expose hardwareConcurrency.
- return window.navigator.hardwareConcurrency || 1;
- } else if (isNode) {
- return require('os').cpus().length;
- } else {
- return 1;
- }
- });
+ // -- GODOT start --
+ nThreads = godot_js_os_hw_concurrency_get();
+ // -- GODOT end --
#else
cpu_set_t set;
if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0)
diff --git a/thirdparty/embree/patches/emscripten-nthreads.patch b/thirdparty/embree/patches/emscripten-nthreads.patch
new file mode 100644
index 0000000000..e42f203475
--- /dev/null
+++ b/thirdparty/embree/patches/emscripten-nthreads.patch
@@ -0,0 +1,42 @@
+diff --git a/thirdparty/embree/common/sys/sysinfo.cpp b/thirdparty/embree/common/sys/sysinfo.cpp
+index c98f61fa53..7f7a009a1e 100644
+--- a/thirdparty/embree/common/sys/sysinfo.cpp
++++ b/thirdparty/embree/common/sys/sysinfo.cpp
+@@ -640,6 +640,12 @@ namespace embree
+
+ #if defined(__EMSCRIPTEN__)
+ #include <emscripten.h>
++
++// -- GODOT start --
++extern "C" {
++extern int godot_js_os_hw_concurrency_get();
++}
++// -- GODOT end --
+ #endif
+
+ namespace embree
+@@ -653,21 +659,9 @@ namespace embree
+ nThreads = sysconf(_SC_NPROCESSORS_ONLN); // does not work in Linux LXC container
+ assert(nThreads);
+ #elif defined(__EMSCRIPTEN__)
+- // WebAssembly supports pthreads, but not pthread_getaffinity_np. Get the number of logical
+- // threads from the browser or Node.js using JavaScript.
+- nThreads = MAIN_THREAD_EM_ASM_INT({
+- const isBrowser = typeof window !== 'undefined';
+- const isNode = typeof process !== 'undefined' && process.versions != null &&
+- process.versions.node != null;
+- if (isBrowser) {
+- // Return 1 if the browser does not expose hardwareConcurrency.
+- return window.navigator.hardwareConcurrency || 1;
+- } else if (isNode) {
+- return require('os').cpus().length;
+- } else {
+- return 1;
+- }
+- });
++ // -- GODOT start --
++ nThreads = godot_js_os_hw_concurrency_get();
++ // -- GODOT end --
+ #else
+ cpu_set_t set;
+ if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0)